diff options
Diffstat (limited to 'lib/CodeGen')
281 files changed, 21606 insertions, 9958 deletions
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 632ea8e9cdc4..444f618d8b8c 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -1,9 +1,8 @@ //===- AggressiveAntiDepBreaker.cpp - Anti-dep breaker --------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h index 5dce3c2499e5..0cf2e6d78f7f 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.h +++ b/lib/CodeGen/AggressiveAntiDepBreaker.h @@ -1,9 +1,8 @@ //==- llvm/CodeGen/AggressiveAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp index 37dcb0be824e..c99800659bfd 100644 --- a/lib/CodeGen/AllocationOrder.cpp +++ b/lib/CodeGen/AllocationOrder.cpp @@ -1,9 +1,8 @@ //===-- llvm/CodeGen/AllocationOrder.cpp - Allocation Order ---------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/AllocationOrder.h b/lib/CodeGen/AllocationOrder.h index 467bcc2edc6f..9247dd844936 100644 --- a/lib/CodeGen/AllocationOrder.h +++ b/lib/CodeGen/AllocationOrder.h @@ -1,9 +1,8 @@ //===-- llvm/CodeGen/AllocationOrder.h - Allocation Order -*- C++ -*-------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index 797f05ee5cf3..d158e70b86ac 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -1,9 +1,8 @@ //===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities -----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -83,6 +82,7 @@ unsigned llvm::ComputeLinearIndex(Type *Ty, /// void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl<EVT> &ValueVTs, + SmallVectorImpl<EVT> *MemVTs, SmallVectorImpl<uint64_t> *Offsets, uint64_t StartingOffset) { // Given a struct type, recursively traverse the elements. @@ -92,7 +92,7 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, EI = EB, EE = STy->element_end(); EI != EE; ++EI) - ComputeValueVTs(TLI, DL, *EI, ValueVTs, Offsets, + ComputeValueVTs(TLI, DL, *EI, ValueVTs, MemVTs, Offsets, StartingOffset + SL->getElementOffset(EI - EB)); return; } @@ -101,7 +101,7 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *EltTy = ATy->getElementType(); uint64_t EltSize = DL.getTypeAllocSize(EltTy); for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) - ComputeValueVTs(TLI, DL, EltTy, ValueVTs, Offsets, + ComputeValueVTs(TLI, DL, EltTy, ValueVTs, MemVTs, Offsets, StartingOffset + i * EltSize); return; } @@ -110,10 +110,50 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, return; // Base case: we can get an EVT for this LLVM IR type. ValueVTs.push_back(TLI.getValueType(DL, Ty)); + if (MemVTs) + MemVTs->push_back(TLI.getMemValueType(DL, Ty)); if (Offsets) Offsets->push_back(StartingOffset); } +void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, + Type *Ty, SmallVectorImpl<EVT> &ValueVTs, + SmallVectorImpl<uint64_t> *Offsets, + uint64_t StartingOffset) { + return ComputeValueVTs(TLI, DL, Ty, ValueVTs, /*MemVTs=*/nullptr, Offsets, + StartingOffset); +} + +void llvm::computeValueLLTs(const DataLayout &DL, Type &Ty, + SmallVectorImpl<LLT> &ValueTys, + SmallVectorImpl<uint64_t> *Offsets, + uint64_t StartingOffset) { + // Given a struct type, recursively traverse the elements. + if (StructType *STy = dyn_cast<StructType>(&Ty)) { + const StructLayout *SL = DL.getStructLayout(STy); + for (unsigned I = 0, E = STy->getNumElements(); I != E; ++I) + computeValueLLTs(DL, *STy->getElementType(I), ValueTys, Offsets, + StartingOffset + SL->getElementOffset(I)); + return; + } + // Given an array type, recursively traverse the elements. + if (ArrayType *ATy = dyn_cast<ArrayType>(&Ty)) { + Type *EltTy = ATy->getElementType(); + uint64_t EltSize = DL.getTypeAllocSize(EltTy); + for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) + computeValueLLTs(DL, *EltTy, ValueTys, Offsets, + StartingOffset + i * EltSize); + return; + } + // Interpret void as zero return values. + if (Ty.isVoidTy()) + return; + // Base case: we can get an LLT for this LLVM IR type. + ValueTys.push_back(getLLTForType(Ty, DL)); + if (Offsets != nullptr) + Offsets->push_back(StartingOffset * 8); +} + /// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V. GlobalValue *llvm::ExtractTypeInfo(Value *V) { V = V->stripPointerCasts(); diff --git a/lib/CodeGen/AntiDepBreaker.h b/lib/CodeGen/AntiDepBreaker.h index d93716287981..b11148595136 100644 --- a/lib/CodeGen/AntiDepBreaker.h +++ b/lib/CodeGen/AntiDepBreaker.h @@ -1,9 +1,8 @@ //===- llvm/CodeGen/AntiDepBreaker.h - Anti-Dependence Breaking -*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp index 9011f025f595..f6ef85a5b78f 100644 --- a/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -1,9 +1,8 @@ //===-- CodeGen/AsmPrinter/ARMException.cpp - ARM EHABI Exception Impl ----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/AsmPrinter/AccelTable.cpp b/lib/CodeGen/AsmPrinter/AccelTable.cpp index 95875ccb8a0b..b1b7921ea976 100644 --- a/lib/CodeGen/AsmPrinter/AccelTable.cpp +++ b/lib/CodeGen/AsmPrinter/AccelTable.cpp @@ -1,9 +1,8 @@ //===- llvm/CodeGen/AsmPrinter/AccelTable.cpp - Accelerator Tables --------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -56,10 +55,10 @@ void AccelTableBase::finalize(AsmPrinter *Asm, StringRef Prefix) { // Create the individual hash data outputs. for (auto &E : Entries) { // Unique the entries. - std::stable_sort(E.second.Values.begin(), E.second.Values.end(), - [](const AccelTableData *A, const AccelTableData *B) { - return *A < *B; - }); + llvm::stable_sort(E.second.Values, + [](const AccelTableData *A, const AccelTableData *B) { + return *A < *B; + }); E.second.Values.erase( std::unique(E.second.Values.begin(), E.second.Values.end()), E.second.Values.end()); @@ -82,10 +81,9 @@ void AccelTableBase::finalize(AsmPrinter *Asm, StringRef Prefix) { // Sort the contents of the buckets by hash value so that hash collisions end // up together. Stable sort makes testing easier and doesn't cost much more. for (auto &Bucket : Buckets) - std::stable_sort(Bucket.begin(), Bucket.end(), - [](HashData *LHS, HashData *RHS) { - return LHS->HashValue < RHS->HashValue; - }); + llvm::stable_sort(Bucket, [](HashData *LHS, HashData *RHS) { + return LHS->HashValue < RHS->HashValue; + }); } namespace { @@ -557,8 +555,8 @@ void llvm::emitDWARF5AccelTable( SmallVector<unsigned, 1> CUIndex(CUs.size()); int Count = 0; for (const auto &CU : enumerate(CUs)) { - if (CU.value()->getCUNode()->getNameTableKind() == - DICompileUnit::DebugNameTableKind::None) + if (CU.value()->getCUNode()->getNameTableKind() != + DICompileUnit::DebugNameTableKind::Default) continue; CUIndex[CU.index()] = Count++; assert(CU.index() == CU.value()->getUniqueID()); @@ -616,30 +614,10 @@ void AppleAccelTableStaticTypeData::emit(AsmPrinter *Asm) const { Asm->emitInt32(QualifiedNameHash); } -#ifndef _MSC_VER -// The lines below are rejected by older versions (TBD) of MSVC. constexpr AppleAccelTableData::Atom AppleAccelTableTypeData::Atoms[]; constexpr AppleAccelTableData::Atom AppleAccelTableOffsetData::Atoms[]; constexpr AppleAccelTableData::Atom AppleAccelTableStaticOffsetData::Atoms[]; constexpr AppleAccelTableData::Atom AppleAccelTableStaticTypeData::Atoms[]; -#else -// FIXME: Erase this path once the minimum MSCV version has been bumped. -const SmallVector<AppleAccelTableData::Atom, 4> - AppleAccelTableOffsetData::Atoms = { - Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)}; -const SmallVector<AppleAccelTableData::Atom, 4> AppleAccelTableTypeData::Atoms = - {Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4), - Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2), - Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)}; -const SmallVector<AppleAccelTableData::Atom, 4> - AppleAccelTableStaticOffsetData::Atoms = { - Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)}; -const SmallVector<AppleAccelTableData::Atom, 4> - AppleAccelTableStaticTypeData::Atoms = { - Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4), - Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2), - Atom(5, dwarf::DW_FORM_data1), Atom(6, dwarf::DW_FORM_data4)}; -#endif #ifndef NDEBUG void AppleAccelTableWriter::Header::print(raw_ostream &OS) const { diff --git a/lib/CodeGen/AsmPrinter/AddressPool.cpp b/lib/CodeGen/AsmPrinter/AddressPool.cpp index 042243b79259..f11c7de5ed8a 100644 --- a/lib/CodeGen/AsmPrinter/AddressPool.cpp +++ b/lib/CodeGen/AsmPrinter/AddressPool.cpp @@ -1,9 +1,8 @@ //===- llvm/CodeGen/AddressPool.cpp - Dwarf Debug Framework ---------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -24,21 +23,24 @@ unsigned AddressPool::getIndex(const MCSymbol *Sym, bool TLS) { return IterBool.first->second.Number; } - -void AddressPool::emitHeader(AsmPrinter &Asm, MCSection *Section) { +MCSymbol *AddressPool::emitHeader(AsmPrinter &Asm, MCSection *Section) { static const uint8_t AddrSize = Asm.getDataLayout().getPointerSize(); - uint64_t Length = sizeof(uint16_t) // version - + sizeof(uint8_t) // address_size - + sizeof(uint8_t) // segment_selector_size - + AddrSize * Pool.size(); // entries + StringRef Prefix = "debug_addr_"; + MCSymbol *BeginLabel = Asm.createTempSymbol(Prefix + "start"); + MCSymbol *EndLabel = Asm.createTempSymbol(Prefix + "end"); + Asm.OutStreamer->AddComment("Length of contribution"); - Asm.emitInt32(Length); // TODO: Support DWARF64 format. + Asm.EmitLabelDifference(EndLabel, BeginLabel, + 4); // TODO: Support DWARF64 format. + Asm.OutStreamer->EmitLabel(BeginLabel); Asm.OutStreamer->AddComment("DWARF version number"); Asm.emitInt16(Asm.getDwarfVersion()); Asm.OutStreamer->AddComment("Address size"); Asm.emitInt8(AddrSize); Asm.OutStreamer->AddComment("Segment selector size"); Asm.emitInt8(0); // TODO: Support non-zero segment_selector_size. + + return EndLabel; } // Emit addresses into the section given. @@ -49,8 +51,10 @@ void AddressPool::emit(AsmPrinter &Asm, MCSection *AddrSection) { // Start the dwarf addr section. Asm.OutStreamer->SwitchSection(AddrSection); + MCSymbol *EndLabel = nullptr; + if (Asm.getDwarfVersion() >= 5) - emitHeader(Asm, AddrSection); + EndLabel = emitHeader(Asm, AddrSection); // Define the symbol that marks the start of the contribution. // It is referenced via DW_AT_addr_base. @@ -67,4 +71,7 @@ void AddressPool::emit(AsmPrinter &Asm, MCSection *AddrSection) { for (const MCExpr *Entry : Entries) Asm.OutStreamer->EmitValue(Entry, Asm.getDataLayout().getPointerSize()); + + if (EndLabel) + Asm.OutStreamer->EmitLabel(EndLabel); } diff --git a/lib/CodeGen/AsmPrinter/AddressPool.h b/lib/CodeGen/AsmPrinter/AddressPool.h index 2209c7eb50ed..f92cf72093ca 100644 --- a/lib/CodeGen/AsmPrinter/AddressPool.h +++ b/lib/CodeGen/AsmPrinter/AddressPool.h @@ -1,9 +1,8 @@ //===- llvm/CodeGen/AddressPool.h - Dwarf Debug Framework -------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -55,7 +54,7 @@ public: void setLabel(MCSymbol *Sym) { AddressTableBaseSym = Sym; } private: - void emitHeader(AsmPrinter &Asm, MCSection *Section); + MCSymbol *emitHeader(AsmPrinter &Asm, MCSection *Section); /// Symbol designates the start of the contribution to the address table. MCSymbol *AddressTableBaseSym = nullptr; diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 7070451e3330..54f6cc2d5571 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1,9 +1,8 @@ //===- AsmPrinter.cpp - Common AsmPrinter code ----------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -35,7 +34,6 @@ #include "llvm/BinaryFormat/COFF.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/BinaryFormat/ELF.h" -#include "llvm/CodeGen/AsmPrinterHandler.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/CodeGen/GCStrategy.h" @@ -60,7 +58,6 @@ #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Comdat.h" #include "llvm/IR/Constant.h" @@ -80,6 +77,7 @@ #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/RemarkStreamer.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/MC/MCAsmInfo.h" @@ -101,6 +99,9 @@ #include "llvm/MC/MCValue.h" #include "llvm/MC/SectionKind.h" #include "llvm/Pass.h" +#include "llvm/Remarks/Remark.h" +#include "llvm/Remarks/RemarkFormat.h" +#include "llvm/Remarks/RemarkStringTable.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" @@ -143,9 +144,10 @@ static const char *const CodeViewLineTablesGroupDescription = STATISTIC(EmittedInsts, "Number of machine instrs printed"); -static cl::opt<bool> - PrintSchedule("print-schedule", cl::Hidden, cl::init(false), - cl::desc("Print 'sched: [latency:throughput]' in .s output")); +static cl::opt<bool> EnableRemarksSection( + "remarks-section", + cl::desc("Emit a section containing remark diagnostics metadata"), + cl::init(false)); char AsmPrinter::ID = 0; @@ -232,6 +234,12 @@ void AsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) { S.EmitInstruction(Inst, getSubtargetInfo()); } +void AsmPrinter::emitInitialRawDwarfLocDirective(const MachineFunction &MF) { + assert(DD && "Dwarf debug file is not defined."); + assert(OutStreamer->hasRawTextSupport() && "Expected assembly output mode."); + (void)DD->emitInitialLocDirective(MF, /*CUID=*/0); +} + /// getCurrentSection() - Return the current section we are emitting to. const MCSection *AsmPrinter::getCurrentSection() const { return OutStreamer->getCurrentSectionOnly(); @@ -252,6 +260,9 @@ bool AsmPrinter::doInitialization(Module &M) { const_cast<TargetLoweringObjectFile&>(getObjFileLowering()) .Initialize(OutContext, TM); + const_cast<TargetLoweringObjectFile &>(getObjFileLowering()) + .getModuleMetadata(M); + OutStreamer->InitSections(false); // Emit the version-min deployment target directive if needed. @@ -300,16 +311,17 @@ bool AsmPrinter::doInitialization(Module &M) { if (MAI->doesSupportDebugInformation()) { bool EmitCodeView = MMI->getModule()->getCodeViewFlag(); if (EmitCodeView && TM.getTargetTriple().isOSWindows()) { - Handlers.push_back(HandlerInfo(new CodeViewDebug(this), - DbgTimerName, DbgTimerDescription, - CodeViewLineTablesGroupName, - CodeViewLineTablesGroupDescription)); + Handlers.emplace_back(llvm::make_unique<CodeViewDebug>(this), + DbgTimerName, DbgTimerDescription, + CodeViewLineTablesGroupName, + CodeViewLineTablesGroupDescription); } if (!EmitCodeView || MMI->getModule()->getDwarfVersion()) { DD = new DwarfDebug(this, &M); DD->beginModule(); - Handlers.push_back(HandlerInfo(DD, DbgTimerName, DbgTimerDescription, - DWARFGroupName, DWARFGroupDescription)); + Handlers.emplace_back(std::unique_ptr<DwarfDebug>(DD), DbgTimerName, + DbgTimerDescription, DWARFGroupName, + DWARFGroupDescription); } } @@ -362,14 +374,15 @@ bool AsmPrinter::doInitialization(Module &M) { break; } if (ES) - Handlers.push_back(HandlerInfo(ES, EHTimerName, EHTimerDescription, - DWARFGroupName, DWARFGroupDescription)); + Handlers.emplace_back(std::unique_ptr<EHStreamer>(ES), EHTimerName, + EHTimerDescription, DWARFGroupName, + DWARFGroupDescription); if (mdconst::extract_or_null<ConstantInt>( MMI->getModule()->getModuleFlag("cfguardtable"))) - Handlers.push_back(HandlerInfo(new WinCFGuard(this), CFGuardName, - CFGuardDescription, DWARFGroupName, - DWARFGroupDescription)); + Handlers.emplace_back(llvm::make_unique<WinCFGuard>(this), CFGuardName, + CFGuardDescription, DWARFGroupName, + DWARFGroupDescription); return false; } @@ -483,7 +496,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM); const DataLayout &DL = GV->getParent()->getDataLayout(); - uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType()); + uint64_t Size = DL.getTypeAllocSize(GV->getValueType()); // If the alignment is specified, we *must* obey it. Overaligning a global // with a specified alignment is a prompt way to break globals emitted to @@ -658,6 +671,9 @@ void AsmPrinter::EmitFunctionHeader() { if (MAI->hasDotTypeDotSizeDirective()) OutStreamer->EmitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction); + if (F.hasFnAttribute(Attribute::Cold)) + OutStreamer->EmitSymbolAttribute(CurrentFnSym, MCSA_Cold); + if (isVerbose()) { F.printAsOperand(OutStreamer->GetCommentOS(), /*PrintType=*/false, F.getParent()); @@ -738,74 +754,30 @@ void AsmPrinter::EmitFunctionEntryLabel() { } /// emitComments - Pretty-print comments for instructions. -/// It returns true iff the sched comment was emitted. -/// Otherwise it returns false. -static bool emitComments(const MachineInstr &MI, raw_ostream &CommentOS, - AsmPrinter *AP) { +static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) { const MachineFunction *MF = MI.getMF(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); // Check for spills and reloads - int FI; - - const MachineFrameInfo &MFI = MF->getFrameInfo(); - bool Commented = false; - - auto getSize = - [&MFI](const SmallVectorImpl<const MachineMemOperand *> &Accesses) { - unsigned Size = 0; - for (auto A : Accesses) - if (MFI.isSpillSlotObjectIndex( - cast<FixedStackPseudoSourceValue>(A->getPseudoValue()) - ->getFrameIndex())) - Size += A->getSize(); - return Size; - }; // We assume a single instruction only has a spill or reload, not // both. - const MachineMemOperand *MMO; - SmallVector<const MachineMemOperand *, 2> Accesses; - if (TII->isLoadFromStackSlotPostFE(MI, FI)) { - if (MFI.isSpillSlotObjectIndex(FI)) { - MMO = *MI.memoperands_begin(); - CommentOS << MMO->getSize() << "-byte Reload"; - Commented = true; - } - } else if (TII->hasLoadFromStackSlot(MI, Accesses)) { - if (auto Size = getSize(Accesses)) { - CommentOS << Size << "-byte Folded Reload"; - Commented = true; - } - } else if (TII->isStoreToStackSlotPostFE(MI, FI)) { - if (MFI.isSpillSlotObjectIndex(FI)) { - MMO = *MI.memoperands_begin(); - CommentOS << MMO->getSize() << "-byte Spill"; - Commented = true; - } - } else if (TII->hasStoreToStackSlot(MI, Accesses)) { - if (auto Size = getSize(Accesses)) { - CommentOS << Size << "-byte Folded Spill"; - Commented = true; - } + Optional<unsigned> Size; + if ((Size = MI.getRestoreSize(TII))) { + CommentOS << *Size << "-byte Reload\n"; + } else if ((Size = MI.getFoldedRestoreSize(TII))) { + if (*Size) + CommentOS << *Size << "-byte Folded Reload\n"; + } else if ((Size = MI.getSpillSize(TII))) { + CommentOS << *Size << "-byte Spill\n"; + } else if ((Size = MI.getFoldedSpillSize(TII))) { + if (*Size) + CommentOS << *Size << "-byte Folded Spill\n"; } // Check for spill-induced copies - if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse)) { - Commented = true; - CommentOS << " Reload Reuse"; - } - - if (Commented) { - if (AP->EnablePrintSchedInfo) { - // If any comment was added above and we need sched info comment then add - // this new comment just after the above comment w/o "\n" between them. - CommentOS << " " << MF->getSubtarget().getSchedInfoStr(MI) << "\n"; - return true; - } - CommentOS << "\n"; - } - return false; + if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse)) + CommentOS << " Reload Reuse\n"; } /// emitImplicitDef - This method emits the specified machine instruction @@ -1093,10 +1065,8 @@ void AsmPrinter::EmitFunctionBody() { } } - if (isVerbose() && emitComments(MI, OutStreamer->GetCommentOS(), this)) { - MachineInstr *MIP = const_cast<MachineInstr *>(&MI); - MIP->setAsmPrinterFlag(MachineInstr::NoSchedComment); - } + if (isVerbose()) + emitComments(MI, OutStreamer->GetCommentOS()); switch (MI.getOpcode()) { case TargetOpcode::CFI_INSTRUCTION: @@ -1105,11 +1075,13 @@ void AsmPrinter::EmitFunctionBody() { case TargetOpcode::LOCAL_ESCAPE: emitFrameAlloc(MI); break; + case TargetOpcode::ANNOTATION_LABEL: case TargetOpcode::EH_LABEL: case TargetOpcode::GC_LABEL: OutStreamer->EmitLabel(MI.getOperand(0).getMCSymbol()); break; case TargetOpcode::INLINEASM: + case TargetOpcode::INLINEASM_BR: EmitInlineAsm(&MI); break; case TargetOpcode::DBG_VALUE: @@ -1266,7 +1238,7 @@ static bool isGOTEquivalentCandidate(const GlobalVariable *GV, // GlobalVariable or Function, i.e., as GlobalValue. if (!GV->hasGlobalUnnamedAddr() || !GV->hasInitializer() || !GV->isConstant() || !GV->isDiscardableIfUnused() || - !dyn_cast<GlobalValue>(GV->getOperand(0))) + !isa<GlobalValue>(GV->getOperand(0))) return false; // To be a got equivalent, at least one of its users need to be a constant @@ -1329,9 +1301,19 @@ void AsmPrinter::emitGlobalIndirectSymbol(Module &M, else assert(GIS.hasLocalLinkage() && "Invalid alias or ifunc linkage"); + bool IsFunction = GIS.getValueType()->isFunctionTy(); + + // Treat bitcasts of functions as functions also. This is important at least + // on WebAssembly where object and function addresses can't alias each other. + if (!IsFunction) + if (auto *CE = dyn_cast<ConstantExpr>(GIS.getIndirectSymbol())) + if (CE->getOpcode() == Instruction::BitCast) + IsFunction = + CE->getOperand(0)->getType()->getPointerElementType()->isFunctionTy(); + // Set the symbol type to function if the alias has a function type. // This affects codegen when the aliasee is not a function. - if (GIS.getType()->getPointerElementType()->isFunctionTy()) { + if (IsFunction) { OutStreamer->EmitSymbolAttribute(Name, MCSA_ELF_TypeFunction); if (isa<GlobalIFunc>(GIS)) OutStreamer->EmitSymbolAttribute(Name, MCSA_ELF_TypeIndFunction); @@ -1363,6 +1345,66 @@ void AsmPrinter::emitGlobalIndirectSymbol(Module &M, } } +void AsmPrinter::emitRemarksSection(Module &M) { + RemarkStreamer *RS = M.getContext().getRemarkStreamer(); + if (!RS) + return; + const remarks::Serializer &Serializer = RS->getSerializer(); + + // Switch to the right section: .remarks/__remarks. + MCSection *RemarksSection = + OutContext.getObjectFileInfo()->getRemarksSection(); + OutStreamer->SwitchSection(RemarksSection); + + // Emit the magic number. + OutStreamer->EmitBytes(remarks::Magic); + // Explicitly emit a '\0'. + OutStreamer->EmitIntValue(/*Value=*/0, /*Size=*/1); + + // Emit the version number: little-endian uint64_t. + // The version number is located at the offset 0x0 in the section. + std::array<char, 8> Version; + support::endian::write64le(Version.data(), remarks::Version); + OutStreamer->EmitBinaryData(StringRef(Version.data(), Version.size())); + + // Emit the string table in the section. + // Note: we need to use the streamer here to emit it in the section. We can't + // just use the serialize function with a raw_ostream because of the way + // MCStreamers work. + uint64_t StrTabSize = + Serializer.StrTab ? Serializer.StrTab->SerializedSize : 0; + // Emit the total size of the string table (the size itself excluded): + // little-endian uint64_t. + // The total size is located after the version number. + // Note: even if no string table is used, emit 0. + std::array<char, 8> StrTabSizeBuf; + support::endian::write64le(StrTabSizeBuf.data(), StrTabSize); + OutStreamer->EmitBinaryData( + StringRef(StrTabSizeBuf.data(), StrTabSizeBuf.size())); + + if (const Optional<remarks::StringTable> &StrTab = Serializer.StrTab) { + std::vector<StringRef> StrTabStrings = StrTab->serialize(); + // Emit a list of null-terminated strings. + // Note: the order is important here: the ID used in the remarks corresponds + // to the position of the string in the section. + for (StringRef Str : StrTabStrings) { + OutStreamer->EmitBytes(Str); + // Explicitly emit a '\0'. + OutStreamer->EmitIntValue(/*Value=*/0, /*Size=*/1); + } + } + + // Emit the null-terminated absolute path to the remark file. + // The path is located at the offset 0x4 in the section. + StringRef FilenameRef = RS->getFilename(); + SmallString<128> Filename = FilenameRef; + sys::fs::make_absolute(Filename); + assert(!Filename.empty() && "The filename can't be empty."); + OutStreamer->EmitBytes(Filename); + // Explicitly emit a '\0'. + OutStreamer->EmitIntValue(/*Value=*/0, /*Size=*/1); +} + bool AsmPrinter::doFinalization(Module &M) { // Set the MachineFunction to nullptr so that we can catch attempted // accesses to MF specific features at the module level and so that @@ -1394,6 +1436,12 @@ bool AsmPrinter::doFinalization(Module &M) { EmitVisibility(Name, V, false); } + // Emit the remarks section contents. + // FIXME: Figure out when is the safest time to emit this section. It should + // not come after debug info. + if (EnableRemarksSection) + emitRemarksSection(M); + const TargetLoweringObjectFile &TLOF = getObjFileLowering(); TLOF.emitModuleMetadata(*OutStreamer, M); @@ -1448,7 +1496,6 @@ bool AsmPrinter::doFinalization(Module &M) { NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, HI.TimerGroupDescription, TimePassesIsEnabled); HI.Handler->endModule(); - delete HI.Handler; } Handlers.clear(); DD = nullptr; @@ -1592,6 +1639,24 @@ bool AsmPrinter::doFinalization(Module &M) { OutStreamer->EmitAddrsigSym(getSymbol(&GV)); } + // Emit symbol partition specifications (ELF only). + if (TM.getTargetTriple().isOSBinFormatELF()) { + unsigned UniqueID = 0; + for (const GlobalValue &GV : M.global_values()) { + if (!GV.hasPartition() || GV.isDeclarationForLinker() || + GV.getVisibility() != GlobalValue::DefaultVisibility) + continue; + + OutStreamer->SwitchSection(OutContext.getELFSection( + ".llvm_sympart", ELF::SHT_LLVM_SYMPART, 0, 0, "", ++UniqueID)); + OutStreamer->EmitBytes(GV.getPartition()); + OutStreamer->EmitZeros(1); + OutStreamer->EmitValue( + MCSymbolRefExpr::create(getSymbol(&GV), OutContext), + MAI->getCodePointerSize()); + } + } + // Allow the target to emit any magic that it wants at the end of the file, // after everything else has gone out. EmitEndOfAsmFile(M); @@ -1628,11 +1693,6 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { } ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE(); - - const TargetSubtargetInfo &STI = MF.getSubtarget(); - EnablePrintSchedInfo = PrintSchedule.getNumOccurrences() - ? PrintSchedule - : STI.supportPrintSchedInfo(); } namespace { @@ -1905,8 +1965,7 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { } /// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each -/// global in the specified llvm.used list for which emitUsedDirectiveFor -/// is true, as being used with this directive. +/// global in the specified llvm.used list. void AsmPrinter::EmitLLVMUsedList(const ConstantArray *InitList) { // Should be an array of 'i8*'. for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { @@ -1933,7 +1992,7 @@ struct Structor { /// priority. void AsmPrinter::EmitXXStructorList(const DataLayout &DL, const Constant *List, bool isCtor) { - // Should be an array of '{ int, void ()* }' structs. The first value is the + // Should be an array of '{ i32, void ()*, i8* }' structs. The first value is the // init priority. if (!isa<ConstantArray>(List)) return; @@ -1941,12 +2000,10 @@ void AsmPrinter::EmitXXStructorList(const DataLayout &DL, const Constant *List, const ConstantArray *InitList = dyn_cast<ConstantArray>(List); if (!InitList) return; // Not an array! StructType *ETy = dyn_cast<StructType>(InitList->getType()->getElementType()); - // FIXME: Only allow the 3-field form in LLVM 4.0. - if (!ETy || ETy->getNumElements() < 2 || ETy->getNumElements() > 3) - return; // Not an array of two or three elements! - if (!isa<IntegerType>(ETy->getTypeAtIndex(0U)) || - !isa<PointerType>(ETy->getTypeAtIndex(1U))) return; // Not (int, ptr). - if (ETy->getNumElements() == 3 && !isa<PointerType>(ETy->getTypeAtIndex(2U))) + if (!ETy || ETy->getNumElements() != 3 || + !isa<IntegerType>(ETy->getTypeAtIndex(0U)) || + !isa<PointerType>(ETy->getTypeAtIndex(1U)) || + !isa<PointerType>(ETy->getTypeAtIndex(2U))) return; // Not (int, ptr, ptr). // Gather the structors in a form that's convenient for sorting by priority. @@ -1962,16 +2019,16 @@ void AsmPrinter::EmitXXStructorList(const DataLayout &DL, const Constant *List, Structor &S = Structors.back(); S.Priority = Priority->getLimitedValue(65535); S.Func = CS->getOperand(1); - if (ETy->getNumElements() == 3 && !CS->getOperand(2)->isNullValue()) + if (!CS->getOperand(2)->isNullValue()) S.ComdatKey = dyn_cast<GlobalValue>(CS->getOperand(2)->stripPointerCasts()); } // Emit the function pointers in the target-specific order unsigned Align = Log2_32(DL.getPointerPrefAlignment()); - std::stable_sort(Structors.begin(), Structors.end(), - [](const Structor &L, - const Structor &R) { return L.Priority < R.Priority; }); + llvm::stable_sort(Structors, [](const Structor &L, const Structor &R) { + return L.Priority < R.Priority; + }); for (Structor &S : Structors) { const TargetLoweringObjectFile &Obj = getObjFileLowering(); const MCSymbol *KeySym = nullptr; @@ -2199,7 +2256,10 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { // We can emit the pointer value into this slot if the slot is an // integer slot equal to the size of the pointer. - if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType())) + // + // If the pointer is larger than the resultant integer, then + // as with Trunc just depend on the assembler to truncate it. + if (DL.getTypeAllocSize(Ty) <= DL.getTypeAllocSize(Op->getType())) return OpExpr; // Otherwise the pointer is smaller than the resultant integer, mask off @@ -2740,7 +2800,7 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const { /// GetCPISymbol - Return the symbol for the specified constant pool entry. MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const { - if (getSubtargetInfo().getTargetTriple().isKnownWindowsMSVCEnvironment()) { + if (getSubtargetInfo().getTargetTriple().isWindowsMSVCEnvironment()) { const MachineConstantPoolEntry &CPE = MF->getConstantPool()->getConstants()[CPID]; if (!CPE.isMachineConstantPoolEntry()) { @@ -2858,7 +2918,7 @@ void AsmPrinter::setupCodePaddingContext(const MachineBasicBlock &MBB, MCCodePaddingContext &Context) const { assert(MF != nullptr && "Machine function must be valid"); Context.IsPaddingActive = !MF->hasInlineAsm() && - !MF->getFunction().optForSize() && + !MF->getFunction().hasOptSize() && TM.getOptLevel() != CodeGenOpt::None; Context.IsBasicBlockReachableViaFallthrough = std::find(MBB.pred_begin(), MBB.pred_end(), MBB.getPrevNode()) != @@ -2918,13 +2978,16 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const { // Print the main label for the block. if (MBB.pred_empty() || - (isBlockOnlyReachableByFallthrough(&MBB) && !MBB.isEHFuncletEntry())) { + (isBlockOnlyReachableByFallthrough(&MBB) && !MBB.isEHFuncletEntry() && + !MBB.hasLabelMustBeEmitted())) { if (isVerbose()) { // NOTE: Want this comment at start of line, don't emit with AddComment. OutStreamer->emitRawComment(" %bb." + Twine(MBB.getNumber()) + ":", false); } } else { + if (isVerbose() && MBB.hasLabelMustBeEmitted()) + OutStreamer->AddComment("Label of block must be emitted"); OutStreamer->EmitLabel(MBB.getSymbol()); } } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index afce3ad3133b..992e44d95306 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -1,9 +1,8 @@ //===-- AsmPrinterDwarf.cpp - AsmPrinter Dwarf Support --------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -19,6 +18,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" @@ -43,11 +43,11 @@ void AsmPrinter::EmitSLEB128(int64_t Value, const char *Desc) const { OutStreamer->EmitSLEB128IntValue(Value); } -void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc) const { +void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc, unsigned PadTo) const { if (isVerbose() && Desc) OutStreamer->AddComment(Desc); - OutStreamer->EmitULEB128IntValue(Value); + OutStreamer->EmitULEB128IntValue(Value, PadTo); } /// Emit something like ".uleb128 Hi-Lo". @@ -183,6 +183,25 @@ void AsmPrinter::EmitDwarfOffset(const MCSymbol *Label, uint64_t Offset) const { EmitLabelPlusOffset(Label, Offset, MAI->getCodePointerSize()); } +void AsmPrinter::EmitCallSiteOffset(const MCSymbol *Hi, + const MCSymbol *Lo, + unsigned Encoding) const { + // The least significant 3 bits specify the width of the encoding + if ((Encoding & 0x7) == dwarf::DW_EH_PE_uleb128) + EmitLabelDifferenceAsULEB128(Hi, Lo); + else + EmitLabelDifference(Hi, Lo, GetSizeOfEncodedValue(Encoding)); +} + +void AsmPrinter::EmitCallSiteValue(uint64_t Value, + unsigned Encoding) const { + // The least significant 3 bits specify the width of the encoding + if ((Encoding & 0x7) == dwarf::DW_EH_PE_uleb128) + EmitULEB128(Value); + else + OutStreamer->EmitIntValue(Value, GetSizeOfEncodedValue(Encoding)); +} + //===----------------------------------------------------------------------===// // Dwarf Lowering Routines //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 62103e3107c0..7721e996aca5 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -1,9 +1,8 @@ //===-- AsmPrinterInlineAsm.cpp - AsmPrinter Inline Asm Handling ----------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -19,7 +18,6 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" @@ -155,15 +153,10 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI, " we don't have an asm parser for this target\n"); Parser->setAssemblerDialect(Dialect); Parser->setTargetParser(*TAP.get()); - Parser->setEnablePrintSchedInfo(EnablePrintSchedInfo); // Enable lexing Masm binary and hex integer literals in intel inline // assembly. if (Dialect == InlineAsm::AD_Intel) Parser->getLexer().setLexMasmIntegers(true); - if (MF) { - const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); - TAP->SetFrameRegister(TRI->getFrameRegister(*MF)); - } emitInlineAsmStart(); // Don't implicitly switch to the text section before the asm. @@ -176,9 +169,8 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI, } static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI, - MachineModuleInfo *MMI, int InlineAsmVariant, - AsmPrinter *AP, unsigned LocCookie, - raw_ostream &OS) { + MachineModuleInfo *MMI, AsmPrinter *AP, + unsigned LocCookie, raw_ostream &OS) { // Switch to the inline assembly variant. OS << "\t.intel_syntax\n\t"; @@ -270,11 +262,9 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI, ++OpNo; // Skip over the ID number. if (InlineAsm::isMemKind(OpFlags)) { - Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant, - /*Modifier*/ nullptr, OS); + Error = AP->PrintAsmMemoryOperand(MI, OpNo, /*Modifier*/ nullptr, OS); } else { - Error = AP->PrintAsmOperand(MI, OpNo, InlineAsmVariant, - /*Modifier*/ nullptr, OS); + Error = AP->PrintAsmOperand(MI, OpNo, /*Modifier*/ nullptr, OS); } } if (Error) { @@ -291,9 +281,9 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI, } static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI, - MachineModuleInfo *MMI, int InlineAsmVariant, - int AsmPrinterVariant, AsmPrinter *AP, - unsigned LocCookie, raw_ostream &OS) { + MachineModuleInfo *MMI, int AsmPrinterVariant, + AsmPrinter *AP, unsigned LocCookie, + raw_ostream &OS) { int CurVariant = -1; // The number of the {.|.|.} region we are in. const char *LastEmitted = AsmStr; // One past the last character emitted. unsigned NumOperands = MI->getNumOperands(); @@ -435,17 +425,25 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI, unsigned OpFlags = MI->getOperand(OpNo).getImm(); ++OpNo; // Skip over the ID number. + // FIXME: Shouldn't arch-independent output template handling go into + // PrintAsmOperand? if (Modifier[0] == 'l') { // Labels are target independent. - // FIXME: What if the operand isn't an MBB, report error? - const MCSymbol *Sym = MI->getOperand(OpNo).getMBB()->getSymbol(); - Sym->print(OS, AP->MAI); + if (MI->getOperand(OpNo).isBlockAddress()) { + const BlockAddress *BA = MI->getOperand(OpNo).getBlockAddress(); + MCSymbol *Sym = AP->GetBlockAddressSymbol(BA); + Sym->print(OS, AP->MAI); + } else if (MI->getOperand(OpNo).isMBB()) { + const MCSymbol *Sym = MI->getOperand(OpNo).getMBB()->getSymbol(); + Sym->print(OS, AP->MAI); + } else { + Error = true; + } } else { if (InlineAsm::isMemKind(OpFlags)) { - Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant, - Modifier[0] ? Modifier : nullptr, - OS); + Error = AP->PrintAsmMemoryOperand( + MI, OpNo, Modifier[0] ? Modifier : nullptr, OS); } else { - Error = AP->PrintAsmOperand(MI, OpNo, InlineAsmVariant, + Error = AP->PrintAsmOperand(MI, OpNo, Modifier[0] ? Modifier : nullptr, OS); } } @@ -515,18 +513,11 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { // The variant of the current asmprinter. int AsmPrinterVariant = MAI->getAssemblerDialect(); - InlineAsm::AsmDialect InlineAsmVariant = MI->getInlineAsmDialect(); AsmPrinter *AP = const_cast<AsmPrinter*>(this); - if (InlineAsmVariant == InlineAsm::AD_ATT) - EmitGCCInlineAsmStr(AsmStr, MI, MMI, InlineAsmVariant, AsmPrinterVariant, - AP, LocCookie, OS); + if (MI->getInlineAsmDialect() == InlineAsm::AD_ATT) + EmitGCCInlineAsmStr(AsmStr, MI, MMI, AsmPrinterVariant, AP, LocCookie, OS); else - EmitMSInlineAsmStr(AsmStr, MI, MMI, InlineAsmVariant, AP, LocCookie, OS); - - // Reset SanitizeAddress based on the function's attribute. - MCTargetOptions MCOptions = TM.Options.MCOptions; - MCOptions.SanitizeAddress = - MF->getFunction().hasFnAttribute(Attribute::SanitizeAddress); + EmitMSInlineAsmStr(AsmStr, MI, MMI, AP, LocCookie, OS); // Emit warnings if we use reserved registers on the clobber list, as // that might give surprising results. @@ -566,7 +557,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { SrcMgr.PrintMessage(Loc, SourceMgr::DK_Note, Note); } - EmitInlineAsm(OS.str(), getSubtargetInfo(), MCOptions, LocMD, + EmitInlineAsm(OS.str(), getSubtargetInfo(), TM.Options.MCOptions, LocMD, MI->getInlineAsmDialect()); // Emit the #NOAPP end marker. This has to happen even if verbose-asm isn't @@ -608,32 +599,50 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS, } } +void AsmPrinter::PrintSymbolOperand(const MachineOperand &MO, raw_ostream &OS) { + assert(MO.isGlobal() && "caller should check MO.isGlobal"); + getSymbol(MO.getGlobal())->print(OS, MAI); + printOffset(MO.getOffset(), OS); +} + /// PrintAsmOperand - Print the specified operand of MI, an INLINEASM /// instruction, using the specified assembler variant. Targets should -/// override this to format as appropriate. +/// override this to format as appropriate for machine specific ExtraCodes +/// or when the arch-independent handling would be too complex otherwise. bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O) { + const char *ExtraCode, raw_ostream &O) { // Does this asm operand have a single letter operand modifier? if (ExtraCode && ExtraCode[0]) { if (ExtraCode[1] != 0) return true; // Unknown modifier. + // https://gcc.gnu.org/onlinedocs/gccint/Output-Template.html const MachineOperand &MO = MI->getOperand(OpNo); switch (ExtraCode[0]) { default: return true; // Unknown modifier. + case 'a': // Print as memory address. + if (MO.isReg()) { + PrintAsmMemoryOperand(MI, OpNo, nullptr, O); + return false; + } + LLVM_FALLTHROUGH; // GCC allows '%a' to behave like '%c' with immediates. case 'c': // Substitute immediate value without immediate syntax - if (MO.getType() != MachineOperand::MO_Immediate) - return true; - O << MO.getImm(); - return false; + if (MO.isImm()) { + O << MO.getImm(); + return false; + } + if (MO.isGlobal()) { + PrintSymbolOperand(MO, O); + return false; + } + return true; case 'n': // Negate the immediate constant. - if (MO.getType() != MachineOperand::MO_Immediate) + if (!MO.isImm()) return true; O << -MO.getImm(); return false; case 's': // The GCC deprecated s modifier - if (MO.getType() != MachineOperand::MO_Immediate) + if (!MO.isImm()) return true; O << ((32 - MO.getImm()) & 31); return false; @@ -643,7 +652,6 @@ bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, } bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) { // Target doesn't support this yet! return true; diff --git a/lib/CodeGen/AsmPrinter/ByteStreamer.h b/lib/CodeGen/AsmPrinter/ByteStreamer.h index 2163cc7e3e11..db2ff458eb2e 100644 --- a/lib/CodeGen/AsmPrinter/ByteStreamer.h +++ b/lib/CodeGen/AsmPrinter/ByteStreamer.h @@ -1,9 +1,8 @@ //===-- llvm/CodeGen/ByteStreamer.h - ByteStreamer class --------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -32,7 +31,7 @@ class ByteStreamer { // For now we're just handling the calls we need for dwarf emission/hashing. virtual void EmitInt8(uint8_t Byte, const Twine &Comment = "") = 0; virtual void EmitSLEB128(uint64_t DWord, const Twine &Comment = "") = 0; - virtual void EmitULEB128(uint64_t DWord, const Twine &Comment = "") = 0; + virtual void EmitULEB128(uint64_t DWord, const Twine &Comment = "", unsigned PadTo = 0) = 0; }; class APByteStreamer final : public ByteStreamer { @@ -49,7 +48,7 @@ public: AP.OutStreamer->AddComment(Comment); AP.EmitSLEB128(DWord); } - void EmitULEB128(uint64_t DWord, const Twine &Comment) override { + void EmitULEB128(uint64_t DWord, const Twine &Comment, unsigned PadTo) override { AP.OutStreamer->AddComment(Comment); AP.EmitULEB128(DWord); } @@ -66,7 +65,7 @@ class HashingByteStreamer final : public ByteStreamer { void EmitSLEB128(uint64_t DWord, const Twine &Comment) override { Hash.addSLEB128(DWord); } - void EmitULEB128(uint64_t DWord, const Twine &Comment) override { + void EmitULEB128(uint64_t DWord, const Twine &Comment, unsigned PadTo) override { Hash.addULEB128(DWord); } }; @@ -103,9 +102,9 @@ public: } } - void EmitULEB128(uint64_t DWord, const Twine &Comment) override { + void EmitULEB128(uint64_t DWord, const Twine &Comment, unsigned PadTo) override { raw_svector_ostream OSE(Buffer); - unsigned Length = encodeULEB128(DWord, OSE); + unsigned Length = encodeULEB128(DWord, OSE, PadTo); if (GenerateComments) { Comments.push_back(Comment.str()); // Add some empty comments to keep the Buffer and Comments vectors aligned diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index 8cabad4ad312..932959c311fa 100644 --- a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -1,9 +1,8 @@ //===- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp ----------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -42,6 +41,7 @@ #include "llvm/Config/llvm-config.h" #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" #include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/CodeViewRecordIO.h" #include "llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h" #include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h" #include "llvm/DebugInfo/CodeView/EnumTables.h" @@ -51,6 +51,7 @@ #include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/DebugInfo/CodeView/TypeTableCollection.h" +#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfoMetadata.h" @@ -67,6 +68,7 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/BinaryByteStream.h" #include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/BinaryStreamWriter.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" @@ -93,6 +95,26 @@ using namespace llvm; using namespace llvm::codeview; +namespace { +class CVMCAdapter : public CodeViewRecordStreamer { +public: + CVMCAdapter(MCStreamer &OS) : OS(&OS) {} + + void EmitBytes(StringRef Data) { OS->EmitBytes(Data); } + + void EmitIntValue(uint64_t Value, unsigned Size) { + OS->EmitIntValueInHex(Value, Size); + } + + void EmitBinaryData(StringRef Data) { OS->EmitBinaryData(Data); } + + void AddComment(const Twine &T) { OS->AddComment(T); } + +private: + MCStreamer *OS = nullptr; +}; +} // namespace + static CPUType mapArchToCVCPUType(Triple::ArchType Type) { switch (Type) { case Triple::ArchType::x86: @@ -273,7 +295,7 @@ static const DISubprogram *getQualifiedNameComponents( StringRef ScopeName = getPrettyScopeName(Scope); if (!ScopeName.empty()) QualifiedNameComponents.push_back(ScopeName); - Scope = Scope->getScope().resolve(); + Scope = Scope->getScope(); } return ClosestSubprogram; } @@ -309,7 +331,7 @@ struct CodeViewDebug::TypeLoweringScope { }; static std::string getFullyQualifiedName(const DIScope *Ty) { - const DIScope *Scope = Ty->getScope().resolve(); + const DIScope *Scope = Ty->getScope(); return getFullyQualifiedName(Scope, getPrettyScopeName(Ty)); } @@ -344,7 +366,7 @@ TypeIndex CodeViewDebug::getFuncIdForSubprogram(const DISubprogram *SP) { // MSVC. StringRef DisplayName = SP->getName().split('<').first; - const DIScope *Scope = SP->getScope().resolve(); + const DIScope *Scope = SP->getScope(); TypeIndex TI; if (const auto *Class = dyn_cast_or_null<DICompositeType>(Scope)) { // If the scope is a DICompositeType, then this must be a method. Member @@ -364,8 +386,8 @@ TypeIndex CodeViewDebug::getFuncIdForSubprogram(const DISubprogram *SP) { return recordTypeIndexForDINode(SP, TI); } -static bool isTrivial(const DICompositeType *DCTy) { - return ((DCTy->getFlags() & DINode::FlagTrivial) == DINode::FlagTrivial); +static bool isNonTrivial(const DICompositeType *DCTy) { + return ((DCTy->getFlags() & DINode::FlagNonTrivial) == DINode::FlagNonTrivial); } static FunctionOptions @@ -376,16 +398,16 @@ getFunctionOptions(const DISubroutineType *Ty, const DIType *ReturnTy = nullptr; if (auto TypeArray = Ty->getTypeArray()) { if (TypeArray.size()) - ReturnTy = TypeArray[0].resolve(); + ReturnTy = TypeArray[0]; } if (auto *ReturnDCTy = dyn_cast_or_null<DICompositeType>(ReturnTy)) { - if (!isTrivial(ReturnDCTy)) + if (isNonTrivial(ReturnDCTy)) FO |= FunctionOptions::CxxReturnUdt; } // DISubroutineType is unnamed. Use DISubprogram's i.e. SPName in comparison. - if (ClassTy && !isTrivial(ClassTy) && SPName == ClassTy->getName()) { + if (ClassTy && isNonTrivial(ClassTy) && SPName == ClassTy->getName()) { FO |= FunctionOptions::Constructor; // TODO: put the FunctionOptions::ConstructorWithVirtualBases flag. @@ -582,8 +604,9 @@ void CodeViewDebug::endModule() { clear(); } -static void emitNullTerminatedSymbolName(MCStreamer &OS, StringRef S, - unsigned MaxFixedRecordLength = 0xF00) { +static void +emitNullTerminatedSymbolName(MCStreamer &OS, StringRef S, + unsigned MaxFixedRecordLength = 0xF00) { // The maximum CV record length is 0xFF00. Most of the strings we emit appear // after a fixed length portion of the record. The fixed length portion should // always be less than 0xF00 (3840) bytes, so truncate the string so that the @@ -594,6 +617,13 @@ static void emitNullTerminatedSymbolName(MCStreamer &OS, StringRef S, OS.EmitBytes(NullTerminatedString); } +static StringRef getTypeLeafName(TypeLeafKind TypeKind) { + for (const EnumEntry<TypeLeafKind> &EE : getTypeLeafNames()) + if (EE.Value == TypeKind) + return EE.Name; + return ""; +} + void CodeViewDebug::emitTypeInformation() { if (TypeTable.empty()) return; @@ -610,31 +640,55 @@ void CodeViewDebug::emitTypeInformation() { } TypeTableCollection Table(TypeTable.records()); + SmallString<512> CommentBlock; + raw_svector_ostream CommentOS(CommentBlock); + std::unique_ptr<ScopedPrinter> SP; + std::unique_ptr<TypeDumpVisitor> TDV; + TypeVisitorCallbackPipeline Pipeline; + + if (OS.isVerboseAsm()) { + // To construct block comment describing the type record for readability. + SP = llvm::make_unique<ScopedPrinter>(CommentOS); + SP->setPrefix(CommentPrefix); + TDV = llvm::make_unique<TypeDumpVisitor>(Table, SP.get(), false); + Pipeline.addCallbackToPipeline(*TDV); + } + + // To emit type record using Codeview MCStreamer adapter + CVMCAdapter CVMCOS(OS); + TypeRecordMapping typeMapping(CVMCOS); + Pipeline.addCallbackToPipeline(typeMapping); + Optional<TypeIndex> B = Table.getFirst(); while (B) { // This will fail if the record data is invalid. CVType Record = Table.getType(*B); + CommentBlock.clear(); + + auto RecordLen = Record.length(); + auto RecordKind = Record.kind(); + if (OS.isVerboseAsm()) + CVMCOS.AddComment("Record length"); + CVMCOS.EmitIntValue(RecordLen - 2, 2); + if (OS.isVerboseAsm()) + CVMCOS.AddComment("Record kind: " + getTypeLeafName(RecordKind)); + CVMCOS.EmitIntValue(RecordKind, sizeof(RecordKind)); + + Error E = codeview::visitTypeRecord(Record, *B, Pipeline); + + if (E) { + logAllUnhandledErrors(std::move(E), errs(), "error: "); + llvm_unreachable("produced malformed type record"); + } + if (OS.isVerboseAsm()) { - // Emit a block comment describing the type record for readability. - SmallString<512> CommentBlock; - raw_svector_ostream CommentOS(CommentBlock); - ScopedPrinter SP(CommentOS); - SP.setPrefix(CommentPrefix); - TypeDumpVisitor TDV(Table, &SP, false); - - Error E = codeview::visitTypeRecord(Record, *B, TDV); - if (E) { - logAllUnhandledErrors(std::move(E), errs(), "error: "); - llvm_unreachable("produced malformed type record"); - } // emitRawComment will insert its own tab and comment string before // the first line, so strip off our first one. It also prints its own // newline. OS.emitRawComment( CommentOS.str().drop_front(CommentPrefix.size() - 1).rtrim()); } - OS.EmitBinaryData(Record.str_data()); B = Table.getNext(*B); } } @@ -700,6 +754,8 @@ static SourceLanguage MapDWLangToCVLang(unsigned DWLang) { return SourceLanguage::Java; case dwarf::DW_LANG_D: return SourceLanguage::D; + case dwarf::DW_LANG_Swift: + return SourceLanguage::Swift; default: // There's no CodeView representation for this language, and CV doesn't // have an "unknown" option for the language field, so we'll use MASM, @@ -973,8 +1029,7 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, // If we have a display name, build the fully qualified name by walking the // chain of scopes. if (!SP->getName().empty()) - FuncName = - getFullyQualifiedName(SP->getScope().resolve(), SP->getName()); + FuncName = getFullyQualifiedName(SP->getScope(), SP->getName()); // If our DISubprogram name is empty, use the mangled name. if (FuncName.empty()) @@ -1071,6 +1126,28 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, endSymbolRecord(AnnotEnd); } + for (auto HeapAllocSite : FI.HeapAllocSites) { + MCSymbol *BeginLabel = std::get<0>(HeapAllocSite); + MCSymbol *EndLabel = std::get<1>(HeapAllocSite); + + // The labels might not be defined if the instruction was replaced + // somewhere in the codegen pipeline. + if (!BeginLabel->isDefined() || !EndLabel->isDefined()) + continue; + + DIType *DITy = std::get<2>(HeapAllocSite); + MCSymbol *HeapAllocEnd = beginSymbolRecord(SymbolKind::S_HEAPALLOCSITE); + OS.AddComment("Call site offset"); + OS.EmitCOFFSecRel32(BeginLabel, /*Offset=*/0); + OS.AddComment("Call site section index"); + OS.EmitCOFFSectionIndex(BeginLabel); + OS.AddComment("Call instruction length"); + OS.emitAbsoluteSymbolDiff(EndLabel, BeginLabel, 2); + OS.AddComment("Type index"); + OS.EmitIntValue(getCompleteTypeIndex(DITy).getIndex(), 4); + endSymbolRecord(HeapAllocEnd); + } + if (SP != nullptr) emitDebugInfoForUDTs(LocalUDTs); @@ -1118,9 +1195,15 @@ void CodeViewDebug::collectVariableInfoFromMFTable( // If the variable has an attached offset expression, extract it. // FIXME: Try to handle DW_OP_deref as well. int64_t ExprOffset = 0; - if (VI.Expr) - if (!VI.Expr->extractIfOffset(ExprOffset)) + bool Deref = false; + if (VI.Expr) { + // If there is one DW_OP_deref element, use offset of 0 and keep going. + if (VI.Expr->getNumElements() == 1 && + VI.Expr->getElement(0) == llvm::dwarf::DW_OP_deref) + Deref = true; + else if (!VI.Expr->extractIfOffset(ExprOffset)) continue; + } // Get the frame register used and the offset. unsigned FrameReg = 0; @@ -1130,6 +1213,7 @@ void CodeViewDebug::collectVariableInfoFromMFTable( // Calculate the label ranges. LocalVarDefRange DefRange = createDefRangeMem(CVReg, FrameOffset + ExprOffset); + for (const InsnRange &Range : Scope->getRanges()) { const MCSymbol *Begin = getLabelBeforeInsn(Range.first); const MCSymbol *End = getLabelAfterInsn(Range.second); @@ -1140,6 +1224,9 @@ void CodeViewDebug::collectVariableInfoFromMFTable( LocalVariable Var; Var.DIVar = VI.Var; Var.DefRanges.emplace_back(std::move(DefRange)); + if (Deref) + Var.UseReferenceType = true; + recordLocalVariable(std::move(Var), Scope); } } @@ -1153,13 +1240,15 @@ static bool needsReferenceType(const DbgVariableLocation &Loc) { } void CodeViewDebug::calculateRanges( - LocalVariable &Var, const DbgValueHistoryMap::InstrRanges &Ranges) { + LocalVariable &Var, const DbgValueHistoryMap::Entries &Entries) { const TargetRegisterInfo *TRI = Asm->MF->getSubtarget().getRegisterInfo(); // Calculate the definition ranges. - for (auto I = Ranges.begin(), E = Ranges.end(); I != E; ++I) { - const InsnRange &Range = *I; - const MachineInstr *DVInst = Range.first; + for (auto I = Entries.begin(), E = Entries.end(); I != E; ++I) { + const auto &Entry = *I; + if (!Entry.isDbgValue()) + continue; + const MachineInstr *DVInst = Entry.getInstr(); assert(DVInst->isDebugValue() && "Invalid History entry"); // FIXME: Find a way to represent constant variables, since they are // relatively common. @@ -1186,7 +1275,7 @@ void CodeViewDebug::calculateRanges( // Start over using that. Var.UseReferenceType = true; Var.DefRanges.clear(); - calculateRanges(Var, Ranges); + calculateRanges(Var, Entries); return; } @@ -1214,21 +1303,15 @@ void CodeViewDebug::calculateRanges( } // Compute the label range. - const MCSymbol *Begin = getLabelBeforeInsn(Range.first); - const MCSymbol *End = getLabelAfterInsn(Range.second); - if (!End) { - // This range is valid until the next overlapping bitpiece. In the - // common case, ranges will not be bitpieces, so they will overlap. - auto J = std::next(I); - const DIExpression *DIExpr = DVInst->getDebugExpression(); - while (J != E && - !DIExpr->fragmentsOverlap(J->first->getDebugExpression())) - ++J; - if (J != E) - End = getLabelBeforeInsn(J->first); - else - End = Asm->getFunctionEnd(); - } + const MCSymbol *Begin = getLabelBeforeInsn(Entry.getInstr()); + const MCSymbol *End; + if (Entry.getEndIndex() != DbgValueHistoryMap::NoEntry) { + auto &EndingEntry = Entries[Entry.getEndIndex()]; + End = EndingEntry.isDbgValue() + ? getLabelBeforeInsn(EndingEntry.getInstr()) + : getLabelAfterInsn(EndingEntry.getInstr()); + } else + End = Asm->getFunctionEnd(); // If the last range end is our begin, just extend the last range. // Otherwise make a new range. @@ -1256,7 +1339,7 @@ void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) { const DILocation *InlinedAt = IV.second; // Instruction ranges, specifying where IV is accessible. - const auto &Ranges = I.second; + const auto &Entries = I.second; LexicalScope *Scope = nullptr; if (InlinedAt) @@ -1270,7 +1353,7 @@ void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) { LocalVariable Var; Var.DIVar = DIVar; - calculateRanges(Var, Ranges); + calculateRanges(Var, Entries); recordLocalVariable(std::move(Var), Scope); } } @@ -1340,8 +1423,8 @@ void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) { FPO |= FrameProcedureOptions::SecurityChecks; FPO |= FrameProcedureOptions(uint32_t(CurFn->EncodedLocalFramePtrReg) << 14U); FPO |= FrameProcedureOptions(uint32_t(CurFn->EncodedParamFramePtrReg) << 16U); - if (Asm->TM.getOptLevel() != CodeGenOpt::None && !GV.optForSize() && - !GV.hasFnAttribute(Attribute::OptimizeNone)) + if (Asm->TM.getOptLevel() != CodeGenOpt::None && + !GV.hasOptSize() && !GV.hasOptNone()) FPO |= FrameProcedureOptions::OptimizedForSpeed; // FIXME: Set GuardCfg when it is implemented. CurFn->FrameProcOpts = FPO; @@ -1379,7 +1462,7 @@ static bool shouldEmitUdt(const DIType *T) { // MSVC does not emit UDTs for typedefs that are scoped to classes. if (T->getTag() == dwarf::DW_TAG_typedef) { - if (DIScope *Scope = T->getScope().resolve()) { + if (DIScope *Scope = T->getScope()) { switch (Scope->getTag()) { case dwarf::DW_TAG_structure_type: case dwarf::DW_TAG_class_type: @@ -1396,7 +1479,7 @@ static bool shouldEmitUdt(const DIType *T) { const DIDerivedType *DT = dyn_cast<DIDerivedType>(T); if (!DT) return true; - T = DT->getBaseType().resolve(); + T = DT->getBaseType(); } return true; } @@ -1409,8 +1492,8 @@ void CodeViewDebug::addToUDTs(const DIType *Ty) { return; SmallVector<StringRef, 5> QualifiedNameComponents; - const DISubprogram *ClosestSubprogram = getQualifiedNameComponents( - Ty->getScope().resolve(), QualifiedNameComponents); + const DISubprogram *ClosestSubprogram = + getQualifiedNameComponents(Ty->getScope(), QualifiedNameComponents); std::string FullyQualifiedName = getQualifiedName(QualifiedNameComponents, getPrettyScopeName(Ty)); @@ -1479,8 +1562,7 @@ TypeIndex CodeViewDebug::lowerType(const DIType *Ty, const DIType *ClassTy) { } TypeIndex CodeViewDebug::lowerTypeAlias(const DIDerivedType *Ty) { - DITypeRef UnderlyingTypeRef = Ty->getBaseType(); - TypeIndex UnderlyingTypeIndex = getTypeIndex(UnderlyingTypeRef); + TypeIndex UnderlyingTypeIndex = getTypeIndex(Ty->getBaseType()); StringRef TypeName = Ty->getName(); addToUDTs(Ty); @@ -1496,14 +1578,14 @@ TypeIndex CodeViewDebug::lowerTypeAlias(const DIDerivedType *Ty) { } TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) { - DITypeRef ElementTypeRef = Ty->getBaseType(); - TypeIndex ElementTypeIndex = getTypeIndex(ElementTypeRef); + const DIType *ElementType = Ty->getBaseType(); + TypeIndex ElementTypeIndex = getTypeIndex(ElementType); // IndexType is size_t, which depends on the bitness of the target. TypeIndex IndexType = getPointerSizeInBytes() == 8 ? TypeIndex(SimpleTypeKind::UInt64Quad) : TypeIndex(SimpleTypeKind::UInt32Long); - uint64_t ElementSize = getBaseTypeSize(ElementTypeRef) / 8; + uint64_t ElementSize = getBaseTypeSize(ElementType) / 8; // Add subranges to array type. DINodeArray Elements = Ty->getElements(); @@ -1764,7 +1846,7 @@ TypeIndex CodeViewDebug::lowerTypeModifier(const DIDerivedType *Ty) { break; } if (IsModifier) - BaseTy = cast<DIDerivedType>(BaseTy)->getBaseType().resolve(); + BaseTy = cast<DIDerivedType>(BaseTy)->getBaseType(); } // Check if the inner type will use an LF_POINTER record. If so, the @@ -1797,8 +1879,8 @@ TypeIndex CodeViewDebug::lowerTypeModifier(const DIDerivedType *Ty) { TypeIndex CodeViewDebug::lowerTypeFunction(const DISubroutineType *Ty) { SmallVector<TypeIndex, 8> ReturnAndArgTypeIndices; - for (DITypeRef ArgTypeRef : Ty->getTypeArray()) - ReturnAndArgTypeIndices.push_back(getTypeIndex(ArgTypeRef)); + for (const DIType *ArgType : Ty->getTypeArray()) + ReturnAndArgTypeIndices.push_back(getTypeIndex(ArgType)); // MSVC uses type none for variadic argument. if (ReturnAndArgTypeIndices.size() > 1 && @@ -1836,7 +1918,10 @@ TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty, unsigned Index = 0; SmallVector<TypeIndex, 8> ArgTypeIndices; - TypeIndex ReturnTypeIndex = getTypeIndex(ReturnAndArgs[Index++]); + TypeIndex ReturnTypeIndex = TypeIndex::Void(); + if (ReturnAndArgs.size() > Index) { + ReturnTypeIndex = getTypeIndex(ReturnAndArgs[Index++]); + } // If the first argument is a pointer type and this isn't a static method, // treat it as the special 'this' parameter, which is encoded separately from @@ -1844,7 +1929,7 @@ TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty, TypeIndex ThisTypeIndex; if (!IsStaticMethod && ReturnAndArgs.size() > Index) { if (const DIDerivedType *PtrTy = - dyn_cast_or_null<DIDerivedType>(ReturnAndArgs[Index].resolve())) { + dyn_cast_or_null<DIDerivedType>(ReturnAndArgs[Index])) { if (PtrTy->getTag() == dwarf::DW_TAG_pointer_type) { ThisTypeIndex = getTypeIndexForThisPtr(PtrTy, Ty); Index++; @@ -1942,7 +2027,7 @@ static ClassOptions getCommonClassOptions(const DICompositeType *Ty) { // Put the Nested flag on a type if it appears immediately inside a tag type. // Do not walk the scope chain. Do not attempt to compute ContainsNestedClass // here. That flag is only set on definitions, and not forward declarations. - const DIScope *ImmediateScope = Ty->getScope().resolve(); + const DIScope *ImmediateScope = Ty->getScope(); if (ImmediateScope && isa<DICompositeType>(ImmediateScope)) CO |= ClassOptions::Nested; @@ -1955,7 +2040,7 @@ static ClassOptions getCommonClassOptions(const DICompositeType *Ty) { CO |= ClassOptions::Scoped; } else { for (const DIScope *Scope = ImmediateScope; Scope != nullptr; - Scope = Scope->getScope().resolve()) { + Scope = Scope->getScope()) { if (isa<DISubprogram>(Scope)) { CO |= ClassOptions::Scoped; break; @@ -2075,7 +2160,7 @@ void CodeViewDebug::collectMemberInfo(ClassInfo &Info, // succeeds, and drop the member if that fails. assert((DDTy->getOffsetInBits() % 8) == 0 && "Unnamed bitfield member!"); uint64_t Offset = DDTy->getOffsetInBits(); - const DIType *Ty = DDTy->getBaseType().resolve(); + const DIType *Ty = DDTy->getBaseType(); bool FullyResolved = false; while (!FullyResolved) { switch (Ty->getTag()) { @@ -2083,7 +2168,7 @@ void CodeViewDebug::collectMemberInfo(ClassInfo &Info, case dwarf::DW_TAG_volatile_type: // FIXME: we should apply the qualifier types to the indirect fields // rather than dropping them. - Ty = cast<DIDerivedType>(Ty)->getBaseType().resolve(); + Ty = cast<DIDerivedType>(Ty)->getBaseType(); break; default: FullyResolved = true; @@ -2184,6 +2269,14 @@ TypeIndex CodeViewDebug::lowerCompleteTypeClass(const DICompositeType *Ty) { if (ContainsNestedClass) CO |= ClassOptions::ContainsNestedClass; + // MSVC appears to set this flag by searching any destructor or method with + // FunctionOptions::Constructor among the emitted members. Clang AST has all + // the members, however special member functions are not yet emitted into + // debug information. For now checking a class's non-triviality seems enough. + // FIXME: not true for a nested unnamed struct. + if (isNonTrivial(Ty)) + CO |= ClassOptions::HasConstructorOrDestructor; + std::string FullName = getFullyQualifiedName(Ty); uint64_t SizeInBytes = Ty->getSizeInBits() / 8; @@ -2358,7 +2451,7 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) { // Create nested classes. for (const DIType *Nested : Info.NestedTypes) { - NestedTypeRecord R(getTypeIndex(DITypeRef(Nested)), Nested->getName()); + NestedTypeRecord R(getTypeIndex(Nested), Nested->getName()); ContinuationBuilder.writeMemberType(R); MemberCount++; } @@ -2385,10 +2478,7 @@ TypeIndex CodeViewDebug::getVBPTypeIndex() { return VBPType; } -TypeIndex CodeViewDebug::getTypeIndex(DITypeRef TypeRef, DITypeRef ClassTyRef) { - const DIType *Ty = TypeRef.resolve(); - const DIType *ClassTy = ClassTyRef.resolve(); - +TypeIndex CodeViewDebug::getTypeIndex(const DIType *Ty, const DIType *ClassTy) { // The null DIType is the void type. Don't try to hash it. if (!Ty) return TypeIndex::Void(); @@ -2431,8 +2521,7 @@ CodeViewDebug::getTypeIndexForThisPtr(const DIDerivedType *PtrTy, return recordTypeIndexForDINode(PtrTy, TI, SubroutineTy); } -TypeIndex CodeViewDebug::getTypeIndexForReferenceTo(DITypeRef TypeRef) { - DIType *Ty = TypeRef.resolve(); +TypeIndex CodeViewDebug::getTypeIndexForReferenceTo(const DIType *Ty) { PointerRecord PR(getTypeIndex(Ty), getPointerSizeInBytes() == 8 ? PointerKind::Near64 : PointerKind::Near32, @@ -2441,9 +2530,7 @@ TypeIndex CodeViewDebug::getTypeIndexForReferenceTo(DITypeRef TypeRef) { return TypeTable.writeLeafType(PR); } -TypeIndex CodeViewDebug::getCompleteTypeIndex(DITypeRef TypeRef) { - const DIType *Ty = TypeRef.resolve(); - +TypeIndex CodeViewDebug::getCompleteTypeIndex(const DIType *Ty) { // The null DIType is the void type. Don't try to hash it. if (!Ty) return TypeIndex::Void(); @@ -2454,7 +2541,7 @@ TypeIndex CodeViewDebug::getCompleteTypeIndex(DITypeRef TypeRef) { if (Ty->getTag() == dwarf::DW_TAG_typedef) (void)getTypeIndex(Ty); while (Ty->getTag() == dwarf::DW_TAG_typedef) - Ty = cast<DIDerivedType>(Ty)->getBaseType().resolve(); + Ty = cast<DIDerivedType>(Ty)->getBaseType(); // If this is a non-record type, the complete type index is the same as the // normal type index. Just call getTypeIndex. @@ -2467,11 +2554,7 @@ TypeIndex CodeViewDebug::getCompleteTypeIndex(DITypeRef TypeRef) { return getTypeIndex(Ty); } - // Check if we've already translated the complete record type. const auto *CTy = cast<DICompositeType>(Ty); - auto InsertResult = CompleteTypeIndices.insert({CTy, TypeIndex()}); - if (!InsertResult.second) - return InsertResult.first->second; TypeLoweringScope S(*this); @@ -2489,6 +2572,13 @@ TypeIndex CodeViewDebug::getCompleteTypeIndex(DITypeRef TypeRef) { return FwdDeclTI; } + // Check if we've already translated the complete record type. + // Insert the type with a null TypeIndex to signify that the type is currently + // being lowered. + auto InsertResult = CompleteTypeIndices.insert({CTy, TypeIndex()}); + if (!InsertResult.second) + return InsertResult.first->second; + TypeIndex TI; switch (CTy->getTag()) { case dwarf::DW_TAG_class_type: @@ -2799,6 +2889,7 @@ void CodeViewDebug::endFunctionImpl(const MachineFunction *MF) { } CurFn->Annotations = MF->getCodeViewAnnotations(); + CurFn->HeapAllocSites = MF->getCodeViewHeapAllocSites(); CurFn->End = Asm->getFunctionEnd(); @@ -2914,10 +3005,19 @@ void CodeViewDebug::collectGlobalVariableInfo() { for (const MDNode *Node : CUs->operands()) { const auto *CU = cast<DICompileUnit>(Node); for (const auto *GVE : CU->getGlobalVariables()) { + const DIGlobalVariable *DIGV = GVE->getVariable(); + const DIExpression *DIE = GVE->getExpression(); + + // Emit constant global variables in a global symbol section. + if (GlobalMap.count(GVE) == 0 && DIE->isConstant()) { + CVGlobalVariable CVGV = {DIGV, DIE}; + GlobalVariables.emplace_back(std::move(CVGV)); + } + const auto *GV = GlobalMap.lookup(GVE); if (!GV || GV->isDeclarationForLinker()) continue; - const DIGlobalVariable *DIGV = GVE->getVariable(); + DIScope *Scope = DIGV->getScope(); SmallVector<CVGlobalVariable, 1> *VariableList; if (Scope && isa<DILocalScope>(Scope)) { @@ -2932,7 +3032,7 @@ void CodeViewDebug::collectGlobalVariableInfo() { // Emit this global variable into a COMDAT section. VariableList = &ComdatVariables; else - // Emit this globla variable in a single global symbol section. + // Emit this global variable in a single global symbol section. VariableList = &GlobalVariables; CVGlobalVariable CVGV = {DIGV, GV}; VariableList->emplace_back(std::move(CVGV)); @@ -2955,13 +3055,14 @@ void CodeViewDebug::emitDebugInfoForGlobals() { // Second, emit each global that is in a comdat into its own .debug$S // section along with its own symbol substream. for (const CVGlobalVariable &CVGV : ComdatVariables) { - MCSymbol *GVSym = Asm->getSymbol(CVGV.GV); + const GlobalVariable *GV = CVGV.GVInfo.get<const GlobalVariable *>(); + MCSymbol *GVSym = Asm->getSymbol(GV); OS.AddComment("Symbol subsection for " + - Twine(GlobalValue::dropLLVMManglingEscape(CVGV.GV->getName()))); + Twine(GlobalValue::dropLLVMManglingEscape(GV->getName()))); switchToDebugSectionForSymbol(GVSym); MCSymbol *EndLabel = beginCVSubsection(DebugSubsectionKind::Symbols); // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions. - emitDebugInfoForGlobal(CVGV.DIGV, CVGV.GV, GVSym); + emitDebugInfoForGlobal(CVGV); endCVSubsection(EndLabel); } } @@ -2981,31 +3082,63 @@ void CodeViewDebug::emitDebugInfoForRetainedTypes() { // Emit each global variable in the specified array. void CodeViewDebug::emitGlobalVariableList(ArrayRef<CVGlobalVariable> Globals) { for (const CVGlobalVariable &CVGV : Globals) { - MCSymbol *GVSym = Asm->getSymbol(CVGV.GV); // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions. - emitDebugInfoForGlobal(CVGV.DIGV, CVGV.GV, GVSym); - } -} - -void CodeViewDebug::emitDebugInfoForGlobal(const DIGlobalVariable *DIGV, - const GlobalVariable *GV, - MCSymbol *GVSym) { - // DataSym record, see SymbolRecord.h for more info. Thread local data - // happens to have the same format as global data. - SymbolKind DataSym = GV->isThreadLocal() - ? (DIGV->isLocalToUnit() ? SymbolKind::S_LTHREAD32 - : SymbolKind::S_GTHREAD32) - : (DIGV->isLocalToUnit() ? SymbolKind::S_LDATA32 - : SymbolKind::S_GDATA32); - MCSymbol *DataEnd = beginSymbolRecord(DataSym); - OS.AddComment("Type"); - OS.EmitIntValue(getCompleteTypeIndex(DIGV->getType()).getIndex(), 4); - OS.AddComment("DataOffset"); - OS.EmitCOFFSecRel32(GVSym, /*Offset=*/0); - OS.AddComment("Segment"); - OS.EmitCOFFSectionIndex(GVSym); - OS.AddComment("Name"); - const unsigned LengthOfDataRecord = 12; - emitNullTerminatedSymbolName(OS, DIGV->getName(), LengthOfDataRecord); - endSymbolRecord(DataEnd); + emitDebugInfoForGlobal(CVGV); + } +} + +void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) { + const DIGlobalVariable *DIGV = CVGV.DIGV; + if (const GlobalVariable *GV = + CVGV.GVInfo.dyn_cast<const GlobalVariable *>()) { + // DataSym record, see SymbolRecord.h for more info. Thread local data + // happens to have the same format as global data. + MCSymbol *GVSym = Asm->getSymbol(GV); + SymbolKind DataSym = GV->isThreadLocal() + ? (DIGV->isLocalToUnit() ? SymbolKind::S_LTHREAD32 + : SymbolKind::S_GTHREAD32) + : (DIGV->isLocalToUnit() ? SymbolKind::S_LDATA32 + : SymbolKind::S_GDATA32); + MCSymbol *DataEnd = beginSymbolRecord(DataSym); + OS.AddComment("Type"); + OS.EmitIntValue(getCompleteTypeIndex(DIGV->getType()).getIndex(), 4); + OS.AddComment("DataOffset"); + OS.EmitCOFFSecRel32(GVSym, /*Offset=*/0); + OS.AddComment("Segment"); + OS.EmitCOFFSectionIndex(GVSym); + OS.AddComment("Name"); + const unsigned LengthOfDataRecord = 12; + emitNullTerminatedSymbolName(OS, DIGV->getName(), LengthOfDataRecord); + endSymbolRecord(DataEnd); + } else { + // FIXME: Currently this only emits the global variables in the IR metadata. + // This should also emit enums and static data members. + const DIExpression *DIE = CVGV.GVInfo.get<const DIExpression *>(); + assert(DIE->isConstant() && + "Global constant variables must contain a constant expression."); + uint64_t Val = DIE->getElement(1); + + MCSymbol *SConstantEnd = beginSymbolRecord(SymbolKind::S_CONSTANT); + OS.AddComment("Type"); + OS.EmitIntValue(getTypeIndex(DIGV->getType()).getIndex(), 4); + OS.AddComment("Value"); + + // Encoded integers shouldn't need more than 10 bytes. + uint8_t data[10]; + BinaryStreamWriter Writer(data, llvm::support::endianness::little); + CodeViewRecordIO IO(Writer); + cantFail(IO.mapEncodedInteger(Val)); + StringRef SRef((char *)data, Writer.getOffset()); + OS.EmitBinaryData(SRef); + + OS.AddComment("Name"); + const DIScope *Scope = DIGV->getScope(); + // For static data members, get the scope from the declaration. + if (const auto *MemberDecl = dyn_cast_or_null<DIDerivedType>( + DIGV->getRawStaticDataMemberDeclaration())) + Scope = MemberDecl->getScope(); + emitNullTerminatedSymbolName(OS, + getFullyQualifiedName(Scope, DIGV->getName())); + endSymbolRecord(SConstantEnd); + } } diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/lib/CodeGen/AsmPrinter/CodeViewDebug.h index 21557ed1be35..ce57b789d7fa 100644 --- a/lib/CodeGen/AsmPrinter/CodeViewDebug.h +++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.h @@ -1,9 +1,8 @@ //===- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h --------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -18,6 +17,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/DbgEntityHistoryCalculator.h" @@ -101,7 +101,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { struct CVGlobalVariable { const DIGlobalVariable *DIGV; - const GlobalVariable *GV; + PointerUnion<const GlobalVariable *, const DIExpression *> GVInfo; }; struct InlineSite { @@ -148,6 +148,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { SmallVector<LexicalBlock *, 1> ChildBlocks; std::vector<std::pair<MCSymbol *, MDNode *>> Annotations; + std::vector<std::tuple<MCSymbol *, MCSymbol *, DIType *>> HeapAllocSites; const MCSymbol *Begin = nullptr; const MCSymbol *End = nullptr; @@ -223,7 +224,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { codeview::TypeIndex getFuncIdForSubprogram(const DISubprogram *SP); void calculateRanges(LocalVariable &Var, - const DbgValueHistoryMap::InstrRanges &Ranges); + const DbgValueHistoryMap::Entries &Entries); static void collectInlineSiteChildren(SmallVectorImpl<unsigned> &Children, const FunctionInfo &FI, @@ -313,8 +314,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { void emitDebugInfoForGlobals(); void emitGlobalVariableList(ArrayRef<CVGlobalVariable> Globals); - void emitDebugInfoForGlobal(const DIGlobalVariable *DIGV, - const GlobalVariable *GV, MCSymbol *GVSym); + void emitDebugInfoForGlobal(const CVGlobalVariable &CVGV); /// Opens a subsection of the given kind in a .debug$S codeview section. /// Returns an end label for use with endCVSubsection when the subsection is @@ -373,14 +373,14 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { /// Translates the DIType to codeview if necessary and returns a type index /// for it. - codeview::TypeIndex getTypeIndex(DITypeRef TypeRef, - DITypeRef ClassTyRef = DITypeRef()); + codeview::TypeIndex getTypeIndex(const DIType *Ty, + const DIType *ClassTy = nullptr); codeview::TypeIndex getTypeIndexForThisPtr(const DIDerivedType *PtrTy, const DISubroutineType *SubroutineTy); - codeview::TypeIndex getTypeIndexForReferenceTo(DITypeRef TypeRef); + codeview::TypeIndex getTypeIndexForReferenceTo(const DIType *Ty); codeview::TypeIndex getMemberFunctionType(const DISubprogram *SP, const DICompositeType *Class); @@ -419,7 +419,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { /// use this entry point when generating symbol records. The complete and /// incomplete type indices only differ for record types. All other types use /// the same index. - codeview::TypeIndex getCompleteTypeIndex(DITypeRef TypeRef); + codeview::TypeIndex getCompleteTypeIndex(const DIType *Ty); codeview::TypeIndex lowerCompleteTypeClass(const DICompositeType *Ty); codeview::TypeIndex lowerCompleteTypeUnion(const DICompositeType *Ty); diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index e27659494f08..f4134da48caa 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -1,9 +1,8 @@ //===--- lib/CodeGen/DIE.cpp - DWARF Info Entries -------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -212,7 +211,7 @@ const DIE *DIE::getUnitDie() const { return nullptr; } -const DIEUnit *DIE::getUnit() const { +DIEUnit *DIE::getUnit() const { const DIE *UnitDie = getUnitDie(); if (UnitDie) return UnitDie->Owner.dyn_cast<DIEUnit*>(); @@ -507,6 +506,23 @@ LLVM_DUMP_METHOD void DIELabel::print(raw_ostream &O) const { O << "Lbl: " << Label->getName(); } //===----------------------------------------------------------------------===// +// DIEBaseTypeRef Implementation +//===----------------------------------------------------------------------===// + +void DIEBaseTypeRef::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { + uint64_t Offset = CU->ExprRefedBaseTypes[Index].Die->getOffset(); + assert(Offset < (1ULL << (ULEB128PadSize * 7)) && "Offset wont fit"); + AP->EmitULEB128(Offset, nullptr, ULEB128PadSize); +} + +unsigned DIEBaseTypeRef::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { + return ULEB128PadSize; +} + +LLVM_DUMP_METHOD +void DIEBaseTypeRef::print(raw_ostream &O) const { O << "BaseTypeRef: " << Index; } + +//===----------------------------------------------------------------------===// // DIEDelta Implementation //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/AsmPrinter/DIEHash.cpp b/lib/CodeGen/AsmPrinter/DIEHash.cpp index b8f1202494d7..bfac8850a2a6 100644 --- a/lib/CodeGen/AsmPrinter/DIEHash.cpp +++ b/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -1,9 +1,8 @@ //===-- llvm/CodeGen/DIEHash.cpp - Dwarf Hashing Framework ----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -226,7 +225,7 @@ void DIEHash::hashLocList(const DIELocList &LocList) { DwarfDebug &DD = *AP->getDwarfDebug(); const DebugLocStream &Locs = DD.getDebugLocs(); for (const auto &Entry : Locs.getEntries(Locs.getList(LocList.getValue()))) - DD.emitDebugLocEntry(Streamer, Entry); + DD.emitDebugLocEntry(Streamer, Entry, nullptr); } // Hash an individual attribute \param Attr based on the type of attribute and @@ -310,6 +309,7 @@ void DIEHash::hashAttribute(const DIEValue &Value, dwarf::Tag Tag) { // FIXME: It's uncertain whether or not we should handle this at the moment. case DIEValue::isExpr: case DIEValue::isLabel: + case DIEValue::isBaseTypeRef: case DIEValue::isDelta: llvm_unreachable("Add support for additional value types."); } diff --git a/lib/CodeGen/AsmPrinter/DIEHash.h b/lib/CodeGen/AsmPrinter/DIEHash.h index dae517ab2c29..2e49514c98be 100644 --- a/lib/CodeGen/AsmPrinter/DIEHash.h +++ b/lib/CodeGen/AsmPrinter/DIEHash.h @@ -1,9 +1,8 @@ //===-- llvm/CodeGen/DIEHash.h - Dwarf Hashing Framework -------*- C++ -*--===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp index 09867822c30a..ddd60575b6c0 100644 --- a/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp +++ b/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp @@ -1,15 +1,15 @@ //===- llvm/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "llvm/CodeGen/DbgEntityHistoryCalculator.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" @@ -31,51 +31,62 @@ using namespace llvm; #define DEBUG_TYPE "dwarfdebug" +namespace { +using EntryIndex = DbgValueHistoryMap::EntryIndex; +} + // If @MI is a DBG_VALUE with debug value described by a // defined register, returns the number of this register. // In the other case, returns 0. -static unsigned isDescribedByReg(const MachineInstr &MI) { +static Register isDescribedByReg(const MachineInstr &MI) { assert(MI.isDebugValue()); assert(MI.getNumOperands() == 4); + // If the location of variable is an entry value (DW_OP_entry_value) + // do not consider it as a register location. + if (MI.getDebugExpression()->isEntryValue()) + return 0; // If location of variable is described using a register (directly or // indirectly), this register is always a first operand. - return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0; + return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : Register(); } -void DbgValueHistoryMap::startInstrRange(InlinedEntity Var, - const MachineInstr &MI) { +bool DbgValueHistoryMap::startDbgValue(InlinedEntity Var, + const MachineInstr &MI, + EntryIndex &NewIndex) { // Instruction range should start with a DBG_VALUE instruction for the // variable. assert(MI.isDebugValue() && "not a DBG_VALUE"); - auto &Ranges = VarInstrRanges[Var]; - if (!Ranges.empty() && Ranges.back().second == nullptr && - Ranges.back().first->isIdenticalTo(MI)) { + auto &Entries = VarEntries[Var]; + if (!Entries.empty() && Entries.back().isDbgValue() && + !Entries.back().isClosed() && + Entries.back().getInstr()->isIdenticalTo(MI)) { LLVM_DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n" - << "\t" << Ranges.back().first << "\t" << MI << "\n"); - return; + << "\t" << Entries.back().getInstr() << "\t" << MI + << "\n"); + return false; } - Ranges.push_back(std::make_pair(&MI, nullptr)); + Entries.emplace_back(&MI, Entry::DbgValue); + NewIndex = Entries.size() - 1; + return true; } -void DbgValueHistoryMap::endInstrRange(InlinedEntity Var, - const MachineInstr &MI) { - auto &Ranges = VarInstrRanges[Var]; - // Verify that the current instruction range is not yet closed. - assert(!Ranges.empty() && Ranges.back().second == nullptr); - // For now, instruction ranges are not allowed to cross basic block - // boundaries. - assert(Ranges.back().first->getParent() == MI.getParent()); - Ranges.back().second = &MI; +EntryIndex DbgValueHistoryMap::startClobber(InlinedEntity Var, + const MachineInstr &MI) { + auto &Entries = VarEntries[Var]; + // If an instruction clobbers multiple registers that the variable is + // described by, then we may have already created a clobbering instruction. + if (Entries.back().isClobber() && Entries.back().getInstr() == &MI) + return Entries.size() - 1; + Entries.emplace_back(&MI, Entry::Clobber); + return Entries.size() - 1; } -unsigned DbgValueHistoryMap::getRegisterForVar(InlinedEntity Var) const { - const auto &I = VarInstrRanges.find(Var); - if (I == VarInstrRanges.end()) - return 0; - const auto &Ranges = I->second; - if (Ranges.empty() || Ranges.back().second != nullptr) - return 0; - return isDescribedByReg(*Ranges.back().first); +void DbgValueHistoryMap::Entry::endEntry(EntryIndex Index) { + // For now, instruction ranges are not allowed to cross basic block + // boundaries. + assert(isDbgValue() && "Setting end index for non-debug value"); + assert(!isClosed() && "End index has already been set"); + EndIndex = Index; } void DbgLabelInstrMap::addInstr(InlinedEntity Label, const MachineInstr &MI) { @@ -89,6 +100,12 @@ namespace { using InlinedEntity = DbgValueHistoryMap::InlinedEntity; using RegDescribedVarsMap = std::map<unsigned, SmallVector<InlinedEntity, 1>>; +// Keeps track of the debug value entries that are currently live for each +// inlined entity. As the history map entries are stored in a SmallVector, they +// may be moved at insertion of new entries, so store indices rather than +// pointers. +using DbgValueEntriesMap = std::map<InlinedEntity, SmallSet<EntryIndex, 1>>; + } // end anonymous namespace // Claim that @Var is not described by @RegNo anymore. @@ -114,16 +131,88 @@ static void addRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo, VarSet.push_back(Var); } +/// Create a clobbering entry and end all open debug value entries +/// for \p Var that are described by \p RegNo using that entry. +static void clobberRegEntries(InlinedEntity Var, unsigned RegNo, + const MachineInstr &ClobberingInstr, + DbgValueEntriesMap &LiveEntries, + DbgValueHistoryMap &HistMap) { + EntryIndex ClobberIndex = HistMap.startClobber(Var, ClobberingInstr); + + // Close all entries whose values are described by the register. + SmallVector<EntryIndex, 4> IndicesToErase; + for (auto Index : LiveEntries[Var]) { + auto &Entry = HistMap.getEntry(Var, Index); + assert(Entry.isDbgValue() && "Not a DBG_VALUE in LiveEntries"); + if (isDescribedByReg(*Entry.getInstr()) == RegNo) { + IndicesToErase.push_back(Index); + Entry.endEntry(ClobberIndex); + } + } + + // Drop all entries that have ended. + for (auto Index : IndicesToErase) + LiveEntries[Var].erase(Index); +} + +/// Add a new debug value for \p Var. Closes all overlapping debug values. +static void handleNewDebugValue(InlinedEntity Var, const MachineInstr &DV, + RegDescribedVarsMap &RegVars, + DbgValueEntriesMap &LiveEntries, + DbgValueHistoryMap &HistMap) { + EntryIndex NewIndex; + if (HistMap.startDbgValue(Var, DV, NewIndex)) { + SmallDenseMap<unsigned, bool, 4> TrackedRegs; + + // If we have created a new debug value entry, close all preceding + // live entries that overlap. + SmallVector<EntryIndex, 4> IndicesToErase; + const DIExpression *DIExpr = DV.getDebugExpression(); + for (auto Index : LiveEntries[Var]) { + auto &Entry = HistMap.getEntry(Var, Index); + assert(Entry.isDbgValue() && "Not a DBG_VALUE in LiveEntries"); + const MachineInstr &DV = *Entry.getInstr(); + bool Overlaps = DIExpr->fragmentsOverlap(DV.getDebugExpression()); + if (Overlaps) { + IndicesToErase.push_back(Index); + Entry.endEntry(NewIndex); + } + if (unsigned Reg = isDescribedByReg(DV)) + TrackedRegs[Reg] |= !Overlaps; + } + + // If the new debug value is described by a register, add tracking of + // that register if it is not already tracked. + if (unsigned NewReg = isDescribedByReg(DV)) { + if (!TrackedRegs.count(NewReg)) + addRegDescribedVar(RegVars, NewReg, Var); + LiveEntries[Var].insert(NewIndex); + TrackedRegs[NewReg] = true; + } + + // Drop tracking of registers that are no longer used. + for (auto I : TrackedRegs) + if (!I.second) + dropRegDescribedVar(RegVars, I.first, Var); + + // Drop all entries that have ended, and mark the new entry as live. + for (auto Index : IndicesToErase) + LiveEntries[Var].erase(Index); + LiveEntries[Var].insert(NewIndex); + } +} + // Terminate the location range for variables described by register at // @I by inserting @ClobberingInstr to their history. static void clobberRegisterUses(RegDescribedVarsMap &RegVars, RegDescribedVarsMap::iterator I, DbgValueHistoryMap &HistMap, + DbgValueEntriesMap &LiveEntries, const MachineInstr &ClobberingInstr) { // Iterate over all variables described by this register and add this // instruction to their history, clobbering it. for (const auto &Var : I->second) - HistMap.endInstrRange(Var, ClobberingInstr); + clobberRegEntries(Var, I->first, ClobberingInstr, LiveEntries, HistMap); RegVars.erase(I); } @@ -131,115 +220,25 @@ static void clobberRegisterUses(RegDescribedVarsMap &RegVars, // @RegNo by inserting @ClobberingInstr to their history. static void clobberRegisterUses(RegDescribedVarsMap &RegVars, unsigned RegNo, DbgValueHistoryMap &HistMap, + DbgValueEntriesMap &LiveEntries, const MachineInstr &ClobberingInstr) { const auto &I = RegVars.find(RegNo); if (I == RegVars.end()) return; - clobberRegisterUses(RegVars, I, HistMap, ClobberingInstr); -} - -// Returns the first instruction in @MBB which corresponds to -// the function epilogue, or nullptr if @MBB doesn't contain an epilogue. -static const MachineInstr *getFirstEpilogueInst(const MachineBasicBlock &MBB) { - auto LastMI = MBB.getLastNonDebugInstr(); - if (LastMI == MBB.end() || !LastMI->isReturn()) - return nullptr; - // Assume that epilogue starts with instruction having the same debug location - // as the return instruction. - DebugLoc LastLoc = LastMI->getDebugLoc(); - auto Res = LastMI; - for (MachineBasicBlock::const_reverse_iterator I = LastMI.getReverse(), - E = MBB.rend(); - I != E; ++I) { - if (I->getDebugLoc() != LastLoc) - return &*Res; - Res = &*I; - } - // If all instructions have the same debug location, assume whole MBB is - // an epilogue. - return &*MBB.begin(); -} - -// Collect registers that are modified in the function body (their -// contents is changed outside of the prologue and epilogue). -static void collectChangingRegs(const MachineFunction *MF, - const TargetRegisterInfo *TRI, - BitVector &Regs) { - for (const auto &MBB : *MF) { - auto FirstEpilogueInst = getFirstEpilogueInst(MBB); - - for (const auto &MI : MBB) { - // Avoid looking at prologue or epilogue instructions. - if (&MI == FirstEpilogueInst) - break; - if (MI.getFlag(MachineInstr::FrameSetup)) - continue; - - // Look for register defs and register masks. Register masks are - // typically on calls and they clobber everything not in the mask. - for (const MachineOperand &MO : MI.operands()) { - // Skip virtual registers since they are handled by the parent. - if (MO.isReg() && MO.isDef() && MO.getReg() && - !TRI->isVirtualRegister(MO.getReg())) { - for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); - ++AI) - Regs.set(*AI); - } else if (MO.isRegMask()) { - Regs.setBitsNotInMask(MO.getRegMask()); - } - } - } - } + clobberRegisterUses(RegVars, I, HistMap, LiveEntries, ClobberingInstr); } void llvm::calculateDbgEntityHistory(const MachineFunction *MF, const TargetRegisterInfo *TRI, DbgValueHistoryMap &DbgValues, DbgLabelInstrMap &DbgLabels) { - BitVector ChangingRegs(TRI->getNumRegs()); - collectChangingRegs(MF, TRI, ChangingRegs); - const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); + unsigned FrameReg = TRI->getFrameRegister(*MF); RegDescribedVarsMap RegVars; + DbgValueEntriesMap LiveEntries; for (const auto &MBB : *MF) { for (const auto &MI : MBB) { - if (!MI.isDebugInstr()) { - // Not a DBG_VALUE instruction. It may clobber registers which describe - // some variables. - for (const MachineOperand &MO : MI.operands()) { - if (MO.isReg() && MO.isDef() && MO.getReg()) { - // Ignore call instructions that claim to clobber SP. The AArch64 - // backend does this for aggregate function arguments. - if (MI.isCall() && MO.getReg() == SP) - continue; - // If this is a virtual register, only clobber it since it doesn't - // have aliases. - if (TRI->isVirtualRegister(MO.getReg())) - clobberRegisterUses(RegVars, MO.getReg(), DbgValues, MI); - // If this is a register def operand, it may end a debug value - // range. - else { - for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); - ++AI) - if (ChangingRegs.test(*AI)) - clobberRegisterUses(RegVars, *AI, DbgValues, MI); - } - } else if (MO.isRegMask()) { - // If this is a register mask operand, clobber all debug values in - // non-CSRs. - for (unsigned I : ChangingRegs.set_bits()) { - // Don't consider SP to be clobbered by register masks. - if (unsigned(I) != SP && TRI->isPhysicalRegister(I) && - MO.clobbersPhysReg(I)) { - clobberRegisterUses(RegVars, I, DbgValues, MI); - } - } - } - } - continue; - } - if (MI.isDebugValue()) { assert(MI.getNumOperands() > 1 && "Invalid DBG_VALUE instruction!"); // Use the base variable (without any DW_OP_piece expressions) @@ -250,13 +249,7 @@ void llvm::calculateDbgEntityHistory(const MachineFunction *MF, "Expected inlined-at fields to agree"); InlinedEntity Var(RawVar, MI.getDebugLoc()->getInlinedAt()); - if (unsigned PrevReg = DbgValues.getRegisterForVar(Var)) - dropRegDescribedVar(RegVars, PrevReg, Var); - - DbgValues.startInstrRange(Var, MI); - - if (unsigned NewReg = isDescribedByReg(MI)) - addRegDescribedVar(RegVars, NewReg, Var); + handleNewDebugValue(Var, MI, RegVars, LiveEntries, DbgValues); } else if (MI.isDebugLabel()) { assert(MI.getNumOperands() == 1 && "Invalid DBG_LABEL instruction!"); const DILabel *RawLabel = MI.getDebugLabel(); @@ -268,18 +261,75 @@ void llvm::calculateDbgEntityHistory(const MachineFunction *MF, InlinedEntity L(RawLabel, MI.getDebugLoc()->getInlinedAt()); DbgLabels.addInstr(L, MI); } - } - // Make sure locations for register-described variables are valid only - // until the end of the basic block (unless it's the last basic block, in - // which case let their liveness run off to the end of the function). + if (MI.isDebugInstr()) + continue; + + // Not a DBG_VALUE instruction. It may clobber registers which describe + // some variables. + for (const MachineOperand &MO : MI.operands()) { + if (MO.isReg() && MO.isDef() && MO.getReg()) { + // Ignore call instructions that claim to clobber SP. The AArch64 + // backend does this for aggregate function arguments. + if (MI.isCall() && MO.getReg() == SP) + continue; + // If this is a virtual register, only clobber it since it doesn't + // have aliases. + if (TRI->isVirtualRegister(MO.getReg())) + clobberRegisterUses(RegVars, MO.getReg(), DbgValues, LiveEntries, + MI); + // If this is a register def operand, it may end a debug value + // range. Ignore frame-register defs in the epilogue and prologue, + // we expect debuggers to understand that stack-locations are + // invalid outside of the function body. + else if (MO.getReg() != FrameReg || + (!MI.getFlag(MachineInstr::FrameDestroy) && + !MI.getFlag(MachineInstr::FrameSetup))) { + for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); + ++AI) + clobberRegisterUses(RegVars, *AI, DbgValues, LiveEntries, MI); + } + } else if (MO.isRegMask()) { + // If this is a register mask operand, clobber all debug values in + // non-CSRs. + SmallVector<unsigned, 32> RegsToClobber; + // Don't consider SP to be clobbered by register masks. + for (auto It : RegVars) { + unsigned int Reg = It.first; + if (Reg != SP && TRI->isPhysicalRegister(Reg) && + MO.clobbersPhysReg(Reg)) + RegsToClobber.push_back(Reg); + } + + for (unsigned Reg : RegsToClobber) { + clobberRegisterUses(RegVars, Reg, DbgValues, LiveEntries, MI); + } + } + } // End MO loop. + } // End instr loop. + + // Make sure locations for all variables are valid only until the end of + // the basic block (unless it's the last basic block, in which case let + // their liveness run off to the end of the function). if (!MBB.empty() && &MBB != &MF->back()) { - for (auto I = RegVars.begin(), E = RegVars.end(); I != E;) { - auto CurElem = I++; // CurElem can be erased below. - if (TRI->isVirtualRegister(CurElem->first) || - ChangingRegs.test(CurElem->first)) - clobberRegisterUses(RegVars, CurElem, DbgValues, MBB.back()); + // Iterate over all variables that have open debug values. + for (auto &Pair : LiveEntries) { + if (Pair.second.empty()) + continue; + + // Create a clobbering entry. + EntryIndex ClobIdx = DbgValues.startClobber(Pair.first, MBB.back()); + + // End all entries. + for (EntryIndex Idx : Pair.second) { + DbgValueHistoryMap::Entry &Ent = DbgValues.getEntry(Pair.first, Idx); + assert(Ent.isDbgValue() && !Ent.isClosed()); + Ent.endEntry(ClobIdx); + } } + + LiveEntries.clear(); + RegVars.clear(); } } } @@ -289,7 +339,7 @@ LLVM_DUMP_METHOD void DbgValueHistoryMap::dump() const { dbgs() << "DbgValueHistoryMap:\n"; for (const auto &VarRangePair : *this) { const InlinedEntity &Var = VarRangePair.first; - const InstrRanges &Ranges = VarRangePair.second; + const Entries &Entries = VarRangePair.second; const DILocalVariable *LocalVar = cast<DILocalVariable>(Var.first); const DILocation *Location = Var.second; @@ -304,10 +354,20 @@ LLVM_DUMP_METHOD void DbgValueHistoryMap::dump() const { dbgs() << " --\n"; - for (const InstrRange &Range : Ranges) { - dbgs() << " Begin: " << *Range.first; - if (Range.second) - dbgs() << " End : " << *Range.second; + for (const auto &E : enumerate(Entries)) { + const auto &Entry = E.value(); + dbgs() << " Entry[" << E.index() << "]: "; + if (Entry.isDbgValue()) + dbgs() << "Debug value\n"; + else + dbgs() << "Clobber\n"; + dbgs() << " Instr: " << *Entry.getInstr(); + if (Entry.isDbgValue()) { + if (Entry.getEndIndex() == NoEntry) + dbgs() << " - Valid until end of function\n"; + else + dbgs() << " - Closed by Entry[" << Entry.getEndIndex() << "]\n"; + } dbgs() << "\n"; } } diff --git a/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp index 551cd36d1984..22f458e4b03e 100644 --- a/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp +++ b/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp @@ -1,9 +1,8 @@ //===-- llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp -------*- C++ -*--===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -141,10 +140,9 @@ DebugHandlerBase::getFunctionLocalOffsetAfterInsn(const MachineInstr *MI) { } /// If this type is derived from a base type then return base type size. -uint64_t DebugHandlerBase::getBaseTypeSize(const DITypeRef TyRef) { - DIType *Ty = TyRef.resolve(); +uint64_t DebugHandlerBase::getBaseTypeSize(const DIType *Ty) { assert(Ty); - DIDerivedType *DDTy = dyn_cast<DIDerivedType>(Ty); + const DIDerivedType *DDTy = dyn_cast<DIDerivedType>(Ty); if (!DDTy) return Ty->getSizeInBits(); @@ -155,7 +153,7 @@ uint64_t DebugHandlerBase::getBaseTypeSize(const DITypeRef TyRef) { Tag != dwarf::DW_TAG_restrict_type && Tag != dwarf::DW_TAG_atomic_type) return DDTy->getSizeInBits(); - DIType *BaseType = DDTy->getBaseType().resolve(); + DIType *BaseType = DDTy->getBaseType(); if (!BaseType) return 0; @@ -212,36 +210,58 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) { // Request labels for the full history. for (const auto &I : DbgValues) { - const auto &Ranges = I.second; - if (Ranges.empty()) + const auto &Entries = I.second; + if (Entries.empty()) continue; - // The first mention of a function argument gets the CurrentFnBegin - // label, so arguments are visible when breaking at function entry. - const DILocalVariable *DIVar = Ranges.front().first->getDebugVariable(); + auto IsDescribedByReg = [](const MachineInstr *MI) { + return MI->getOperand(0).isReg() && MI->getOperand(0).getReg(); + }; + + // The first mention of a function argument gets the CurrentFnBegin label, + // so arguments are visible when breaking at function entry. + // + // We do not change the label for values that are described by registers, + // as that could place them above their defining instructions. We should + // ideally not change the labels for constant debug values either, since + // doing that violates the ranges that are calculated in the history map. + // However, we currently do not emit debug values for constant arguments + // directly at the start of the function, so this code is still useful. + const DILocalVariable *DIVar = + Entries.front().getInstr()->getDebugVariable(); if (DIVar->isParameter() && getDISubprogram(DIVar->getScope())->describes(&MF->getFunction())) { - LabelsBeforeInsn[Ranges.front().first] = Asm->getFunctionBegin(); - if (Ranges.front().first->getDebugExpression()->isFragment()) { + if (!IsDescribedByReg(Entries.front().getInstr())) + LabelsBeforeInsn[Entries.front().getInstr()] = Asm->getFunctionBegin(); + if (Entries.front().getInstr()->getDebugExpression()->isFragment()) { // Mark all non-overlapping initial fragments. - for (auto I = Ranges.begin(); I != Ranges.end(); ++I) { - const DIExpression *Fragment = I->first->getDebugExpression(); - if (std::all_of(Ranges.begin(), I, - [&](DbgValueHistoryMap::InstrRange Pred) { - return !Fragment->fragmentsOverlap( - Pred.first->getDebugExpression()); + for (auto I = Entries.begin(); I != Entries.end(); ++I) { + if (!I->isDbgValue()) + continue; + const DIExpression *Fragment = I->getInstr()->getDebugExpression(); + if (std::any_of(Entries.begin(), I, + [&](DbgValueHistoryMap::Entry Pred) { + return Pred.isDbgValue() && + Fragment->fragmentsOverlap( + Pred.getInstr()->getDebugExpression()); })) - LabelsBeforeInsn[I->first] = Asm->getFunctionBegin(); - else break; + // The code that generates location lists for DWARF assumes that the + // entries' start labels are monotonically increasing, and since we + // don't change the label for fragments that are described by + // registers, we must bail out when encountering such a fragment. + if (IsDescribedByReg(I->getInstr())) + break; + LabelsBeforeInsn[I->getInstr()] = Asm->getFunctionBegin(); } } } - for (const auto &Range : Ranges) { - requestLabelBeforeInsn(Range.first); - if (Range.second) - requestLabelAfterInsn(Range.second); + for (const auto &Entry : Entries) { + if (Entry.isDbgValue()) + requestLabelBeforeInsn(Entry.getInstr()); + else + requestLabelAfterInsn(Entry.getInstr()); } } diff --git a/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/lib/CodeGen/AsmPrinter/DebugLocEntry.h index befa4b941c8d..17e39b3d3268 100644 --- a/lib/CodeGen/AsmPrinter/DebugLocEntry.h +++ b/lib/CodeGen/AsmPrinter/DebugLocEntry.h @@ -1,9 +1,8 @@ //===-- llvm/CodeGen/DebugLocEntry.h - Entry in debug_loc list -*- C++ -*--===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -21,6 +20,73 @@ namespace llvm { class AsmPrinter; +/// A single location or constant. +class DbgValueLoc { + /// Any complex address location expression for this DbgValueLoc. + const DIExpression *Expression; + + /// Type of entry that this represents. + enum EntryType { E_Location, E_Integer, E_ConstantFP, E_ConstantInt }; + enum EntryType EntryKind; + + /// Either a constant, + union { + int64_t Int; + const ConstantFP *CFP; + const ConstantInt *CIP; + } Constant; + + /// Or a location in the machine frame. + MachineLocation Loc; + +public: + DbgValueLoc(const DIExpression *Expr, int64_t i) + : Expression(Expr), EntryKind(E_Integer) { + Constant.Int = i; + } + DbgValueLoc(const DIExpression *Expr, const ConstantFP *CFP) + : Expression(Expr), EntryKind(E_ConstantFP) { + Constant.CFP = CFP; + } + DbgValueLoc(const DIExpression *Expr, const ConstantInt *CIP) + : Expression(Expr), EntryKind(E_ConstantInt) { + Constant.CIP = CIP; + } + DbgValueLoc(const DIExpression *Expr, MachineLocation Loc) + : Expression(Expr), EntryKind(E_Location), Loc(Loc) { + assert(cast<DIExpression>(Expr)->isValid()); + } + + bool isLocation() const { return EntryKind == E_Location; } + bool isInt() const { return EntryKind == E_Integer; } + bool isConstantFP() const { return EntryKind == E_ConstantFP; } + bool isConstantInt() const { return EntryKind == E_ConstantInt; } + int64_t getInt() const { return Constant.Int; } + const ConstantFP *getConstantFP() const { return Constant.CFP; } + const ConstantInt *getConstantInt() const { return Constant.CIP; } + MachineLocation getLoc() const { return Loc; } + bool isFragment() const { return getExpression()->isFragment(); } + bool isEntryVal() const { return getExpression()->isEntryValue(); } + const DIExpression *getExpression() const { return Expression; } + friend bool operator==(const DbgValueLoc &, const DbgValueLoc &); + friend bool operator<(const DbgValueLoc &, const DbgValueLoc &); +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + LLVM_DUMP_METHOD void dump() const { + if (isLocation()) { + llvm::dbgs() << "Loc = { reg=" << Loc.getReg() << " "; + if (Loc.isIndirect()) + llvm::dbgs() << "+0"; + llvm::dbgs() << "} "; + } else if (isConstantInt()) + Constant.CIP->dump(); + else if (isConstantFP()) + Constant.CFP->dump(); + if (Expression) + Expression->dump(); + } +#endif +}; + /// This struct describes location entries emitted in the .debug_loc /// section. class DebugLocEntry { @@ -28,90 +94,20 @@ class DebugLocEntry { const MCSymbol *Begin; const MCSymbol *End; -public: - /// A single location or constant. - struct Value { - Value(const DIExpression *Expr, int64_t i) - : Expression(Expr), EntryKind(E_Integer) { - Constant.Int = i; - } - Value(const DIExpression *Expr, const ConstantFP *CFP) - : Expression(Expr), EntryKind(E_ConstantFP) { - Constant.CFP = CFP; - } - Value(const DIExpression *Expr, const ConstantInt *CIP) - : Expression(Expr), EntryKind(E_ConstantInt) { - Constant.CIP = CIP; - } - Value(const DIExpression *Expr, MachineLocation Loc) - : Expression(Expr), EntryKind(E_Location), Loc(Loc) { - assert(cast<DIExpression>(Expr)->isValid()); - } - - /// Any complex address location expression for this Value. - const DIExpression *Expression; - - /// Type of entry that this represents. - enum EntryType { E_Location, E_Integer, E_ConstantFP, E_ConstantInt }; - enum EntryType EntryKind; - - /// Either a constant, - union { - int64_t Int; - const ConstantFP *CFP; - const ConstantInt *CIP; - } Constant; - - // Or a location in the machine frame. - MachineLocation Loc; - - bool isLocation() const { return EntryKind == E_Location; } - bool isInt() const { return EntryKind == E_Integer; } - bool isConstantFP() const { return EntryKind == E_ConstantFP; } - bool isConstantInt() const { return EntryKind == E_ConstantInt; } - int64_t getInt() const { return Constant.Int; } - const ConstantFP *getConstantFP() const { return Constant.CFP; } - const ConstantInt *getConstantInt() const { return Constant.CIP; } - MachineLocation getLoc() const { return Loc; } - bool isFragment() const { return getExpression()->isFragment(); } - const DIExpression *getExpression() const { return Expression; } - friend bool operator==(const Value &, const Value &); - friend bool operator<(const Value &, const Value &); -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - LLVM_DUMP_METHOD void dump() const { - if (isLocation()) { - llvm::dbgs() << "Loc = { reg=" << Loc.getReg() << " "; - if (Loc.isIndirect()) - llvm::dbgs() << "+0"; - llvm::dbgs() << "} "; - } - else if (isConstantInt()) - Constant.CIP->dump(); - else if (isConstantFP()) - Constant.CFP->dump(); - if (Expression) - Expression->dump(); - } -#endif - }; - -private: /// A nonempty list of locations/constants belonging to this entry, /// sorted by offset. - SmallVector<Value, 1> Values; + SmallVector<DbgValueLoc, 1> Values; public: - DebugLocEntry(const MCSymbol *B, const MCSymbol *E, Value Val) - : Begin(B), End(E) { - Values.push_back(std::move(Val)); + /// Create a location list entry for the range [\p Begin, \p End). + /// + /// \param Vals One or more values describing (parts of) the variable. + DebugLocEntry(const MCSymbol *Begin, const MCSymbol *End, + ArrayRef<DbgValueLoc> Vals) + : Begin(Begin), End(End) { + addValues(Vals); } - /// If this and Next are describing different pieces of the same - /// variable, merge them by appending Next's values to the current - /// list of values. - /// Return true if the merge was successful. - bool MergeValues(const DebugLocEntry &Next); - /// Attempt to merge this DebugLocEntry with Next and return /// true if the merge was successful. Entries can be merged if they /// share the same Loc/Constant and if Next immediately follows this @@ -127,35 +123,36 @@ public: const MCSymbol *getBeginSym() const { return Begin; } const MCSymbol *getEndSym() const { return End; } - ArrayRef<Value> getValues() const { return Values; } - void addValues(ArrayRef<DebugLocEntry::Value> Vals) { + ArrayRef<DbgValueLoc> getValues() const { return Values; } + void addValues(ArrayRef<DbgValueLoc> Vals) { Values.append(Vals.begin(), Vals.end()); sortUniqueValues(); - assert(all_of(Values, [](DebugLocEntry::Value V) { - return V.isFragment(); - }) && "value must be a piece"); + assert((Values.size() == 1 || all_of(Values, [](DbgValueLoc V) { + return V.isFragment(); + })) && "must either have a single value or multiple pieces"); } // Sort the pieces by offset. // Remove any duplicate entries by dropping all but the first. void sortUniqueValues() { llvm::sort(Values); - Values.erase( - std::unique( - Values.begin(), Values.end(), [](const Value &A, const Value &B) { - return A.getExpression() == B.getExpression(); - }), - Values.end()); + Values.erase(std::unique(Values.begin(), Values.end(), + [](const DbgValueLoc &A, const DbgValueLoc &B) { + return A.getExpression() == B.getExpression(); + }), + Values.end()); } /// Lower this entry into a DWARF expression. - void finalize(const AsmPrinter &AP, DebugLocStream::ListBuilder &List, - const DIBasicType *BT); + void finalize(const AsmPrinter &AP, + DebugLocStream::ListBuilder &List, + const DIBasicType *BT, + DwarfCompileUnit &TheCU); }; -/// Compare two Values for equality. -inline bool operator==(const DebugLocEntry::Value &A, - const DebugLocEntry::Value &B) { +/// Compare two DbgValueLocs for equality. +inline bool operator==(const DbgValueLoc &A, + const DbgValueLoc &B) { if (A.EntryKind != B.EntryKind) return false; @@ -163,21 +160,21 @@ inline bool operator==(const DebugLocEntry::Value &A, return false; switch (A.EntryKind) { - case DebugLocEntry::Value::E_Location: + case DbgValueLoc::E_Location: return A.Loc == B.Loc; - case DebugLocEntry::Value::E_Integer: + case DbgValueLoc::E_Integer: return A.Constant.Int == B.Constant.Int; - case DebugLocEntry::Value::E_ConstantFP: + case DbgValueLoc::E_ConstantFP: return A.Constant.CFP == B.Constant.CFP; - case DebugLocEntry::Value::E_ConstantInt: + case DbgValueLoc::E_ConstantInt: return A.Constant.CIP == B.Constant.CIP; } llvm_unreachable("unhandled EntryKind"); } /// Compare two fragments based on their offset. -inline bool operator<(const DebugLocEntry::Value &A, - const DebugLocEntry::Value &B) { +inline bool operator<(const DbgValueLoc &A, + const DbgValueLoc &B) { return A.getExpression()->getFragmentInfo()->OffsetInBits < B.getExpression()->getFragmentInfo()->OffsetInBits; } diff --git a/lib/CodeGen/AsmPrinter/DebugLocStream.cpp b/lib/CodeGen/AsmPrinter/DebugLocStream.cpp index 7e8ed7104af3..f483d532ff07 100644 --- a/lib/CodeGen/AsmPrinter/DebugLocStream.cpp +++ b/lib/CodeGen/AsmPrinter/DebugLocStream.cpp @@ -1,9 +1,8 @@ //===- DebugLocStream.cpp - DWARF debug_loc stream --------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/AsmPrinter/DebugLocStream.h b/lib/CodeGen/AsmPrinter/DebugLocStream.h index 8dcf5cbc1889..789291771b5a 100644 --- a/lib/CodeGen/AsmPrinter/DebugLocStream.h +++ b/lib/CodeGen/AsmPrinter/DebugLocStream.h @@ -1,9 +1,8 @@ //===--- lib/CodeGen/DebugLocStream.h - DWARF debug_loc stream --*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index 1990456cc555..207a7284dafa 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -1,9 +1,8 @@ //===-- CodeGen/AsmPrinter/DwarfException.cpp - Dwarf Exception Impl ------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 1dca3f0fce5b..9548ad9918c1 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -1,9 +1,8 @@ //===- llvm/CodeGen/DwarfCompileUnit.cpp - Dwarf Compile Units ------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -18,6 +17,7 @@ #include "DwarfUnit.h" #include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/Dwarf.h" @@ -104,7 +104,7 @@ unsigned DwarfCompileUnit::getOrCreateSourceID(const DIFile *File) { // extend .file to support this. unsigned CUID = Asm->OutStreamer->hasRawTextSupport() ? 0 : getUniqueID(); if (!File) - return Asm->OutStreamer->EmitDwarfFileDirective(0, "", "", nullptr, None, CUID); + return Asm->OutStreamer->EmitDwarfFileDirective(0, "", "", None, None, CUID); return Asm->OutStreamer->EmitDwarfFileDirective( 0, File->getDirectory(), File->getFilename(), getMD5AsBytes(File), File->getSource(), CUID); @@ -119,17 +119,19 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE( assert(GV); auto *GVContext = GV->getScope(); - auto *GTy = DD->resolve(GV->getType()); + const DIType *GTy = GV->getType(); // Construct the context before querying for the existence of the DIE in // case such construction creates the DIE. - DIE *ContextDIE = getOrCreateContextDIE(GVContext); + auto *CB = GVContext ? dyn_cast<DICommonBlock>(GVContext) : nullptr; + DIE *ContextDIE = CB ? getOrCreateCommonBlock(CB, GlobalExprs) + : getOrCreateContextDIE(GVContext); // Add to map. DIE *VariableDIE = &createAndAddDIE(GV->getTag(), *ContextDIE, GV); DIScope *DeclContext; if (auto *SDMDecl = GV->getStaticDataMemberDeclaration()) { - DeclContext = resolve(SDMDecl->getScope()); + DeclContext = SDMDecl->getScope(); assert(SDMDecl->isStaticMember() && "Expected static member decl"); assert(GV->isDefinition()); // We need the declaration DIE that is in the static member's class. @@ -137,7 +139,7 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE( addDIEEntry(*VariableDIE, dwarf::DW_AT_specification, *VariableSpecDIE); // If the global variable's type is different from the one in the class // member type, assume that it's more specific and also emit it. - if (GTy != DD->resolve(SDMDecl->getBaseType())) + if (GTy != SDMDecl->getBaseType()) addType(*VariableDIE, GTy); } else { DeclContext = GV->getScope(); @@ -166,8 +168,16 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE( addTemplateParams(*VariableDIE, DINodeArray(TP)); // Add location. + addLocationAttribute(VariableDIE, GV, GlobalExprs); + + return VariableDIE; +} + +void DwarfCompileUnit::addLocationAttribute( + DIE *VariableDIE, const DIGlobalVariable *GV, ArrayRef<GlobalExpr> GlobalExprs) { bool addToAccelTable = false; DIELoc *Loc = nullptr; + Optional<unsigned> NVPTXAddressSpace; std::unique_ptr<DIEDwarfExpression> DwarfExpr; for (const auto &GE : GlobalExprs) { const GlobalVariable *Global = GE.Var; @@ -201,8 +211,24 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE( DwarfExpr = llvm::make_unique<DIEDwarfExpression>(*Asm, *this, *Loc); } - if (Expr) + if (Expr) { + // According to + // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf + // cuda-gdb requires DW_AT_address_class for all variables to be able to + // correctly interpret address space of the variable address. + // Decode DW_OP_constu <DWARF Address Space> DW_OP_swap DW_OP_xderef + // sequence for the NVPTX + gdb target. + unsigned LocalNVPTXAddressSpace; + if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) { + const DIExpression *NewExpr = + DIExpression::extractAddressClass(Expr, LocalNVPTXAddressSpace); + if (NewExpr != Expr) { + Expr = NewExpr; + NVPTXAddressSpace = LocalNVPTXAddressSpace; + } + } DwarfExpr->addFragmentOffset(Expr); + } if (Global) { const MCSymbol *Sym = Asm->getSymbol(Global); @@ -247,6 +273,15 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE( DwarfExpr->setMemoryLocationKind(); DwarfExpr->addExpression(Expr); } + if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) { + // According to + // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf + // cuda-gdb requires DW_AT_address_class for all variables to be able to + // correctly interpret address space of the variable address. + const unsigned NVPTX_ADDR_global_space = 5; + addUInt(*VariableDIE, dwarf::DW_AT_address_class, dwarf::DW_FORM_data1, + NVPTXAddressSpace ? *NVPTXAddressSpace : NVPTX_ADDR_global_space); + } if (Loc) addBlock(*VariableDIE, dwarf::DW_AT_location, DwarfExpr->finalize()); @@ -262,8 +297,25 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE( DD->useAllLinkageNames()) DD->addAccelName(*CUNode, GV->getLinkageName(), *VariableDIE); } +} - return VariableDIE; +DIE *DwarfCompileUnit::getOrCreateCommonBlock( + const DICommonBlock *CB, ArrayRef<GlobalExpr> GlobalExprs) { + // Construct the context before querying for the existence of the DIE in case + // such construction creates the DIE. + DIE *ContextDIE = getOrCreateContextDIE(CB->getScope()); + + if (DIE *NDie = getDIE(CB)) + return NDie; + DIE &NDie = createAndAddDIE(dwarf::DW_TAG_common_block, *ContextDIE, CB); + StringRef Name = CB->getName().empty() ? "_BLNK_" : CB->getName(); + addString(NDie, dwarf::DW_AT_name, Name); + addGlobalName(Name, NDie, CB->getScope()); + if (CB->getFile()) + addSourceLine(NDie, CB->getLineNo(), CB->getFile()); + if (DIGlobalVariable *V = CB->getDecl()) + getCU().addLocationAttribute(&NDie, V, GlobalExprs); + return &NDie; } void DwarfCompileUnit::addRange(RangeSpan Range) { @@ -491,6 +543,8 @@ DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) { addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None, getOrCreateSourceID(IA->getFile())); addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, IA->getLine()); + if (IA->getColumn()) + addUInt(*ScopeDIE, dwarf::DW_AT_call_column, None, IA->getColumn()); if (IA->getDiscriminator() && DD->getDwarfVersion() >= 4) addUInt(*ScopeDIE, dwarf::DW_AT_GNU_discriminator, None, IA->getDiscriminator()); @@ -555,36 +609,27 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, return VariableDie; } - // Check if variable is described by a DBG_VALUE instruction. - if (const MachineInstr *DVInsn = DV.getMInsn()) { - assert(DVInsn->getNumOperands() == 4); - if (DVInsn->getOperand(0).isReg()) { - auto RegOp = DVInsn->getOperand(0); - auto Op1 = DVInsn->getOperand(1); - // If the second operand is an immediate, this is an indirect value. - assert((!Op1.isImm() || (Op1.getImm() == 0)) && "unexpected offset"); - MachineLocation Location(RegOp.getReg(), Op1.isImm()); - addVariableAddress(DV, *VariableDie, Location); - } else if (DVInsn->getOperand(0).isImm()) { - // This variable is described by a single constant. - // Check whether it has a DIExpression. + // Check if variable has a single location description. + if (auto *DVal = DV.getValueLoc()) { + if (DVal->isLocation()) + addVariableAddress(DV, *VariableDie, DVal->getLoc()); + else if (DVal->isInt()) { auto *Expr = DV.getSingleExpression(); if (Expr && Expr->getNumElements()) { DIELoc *Loc = new (DIEValueAllocator) DIELoc; DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); // If there is an expression, emit raw unsigned bytes. DwarfExpr.addFragmentOffset(Expr); - DwarfExpr.addUnsignedConstant(DVInsn->getOperand(0).getImm()); + DwarfExpr.addUnsignedConstant(DVal->getInt()); DwarfExpr.addExpression(Expr); addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize()); } else - addConstantValue(*VariableDie, DVInsn->getOperand(0), DV.getType()); - } else if (DVInsn->getOperand(0).isFPImm()) - addConstantFPValue(*VariableDie, DVInsn->getOperand(0)); - else if (DVInsn->getOperand(0).isCImm()) - addConstantValue(*VariableDie, DVInsn->getOperand(0).getCImm(), - DV.getType()); - + addConstantValue(*VariableDie, DVal->getInt(), DV.getType()); + } else if (DVal->isConstantFP()) { + addConstantFPValue(*VariableDie, DVal->getConstantFP()); + } else if (DVal->isConstantInt()) { + addConstantValue(*VariableDie, DVal->getConstantInt(), DV.getType()); + } return VariableDie; } @@ -592,6 +637,7 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, if (!DV.hasFrameIndexExprs()) return VariableDie; + Optional<unsigned> NVPTXAddressSpace; DIELoc *Loc = new (DIEValueAllocator) DIELoc; DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); for (auto &Fragment : DV.getFrameIndexExprs()) { @@ -603,7 +649,23 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, SmallVector<uint64_t, 8> Ops; Ops.push_back(dwarf::DW_OP_plus_uconst); Ops.push_back(Offset); - Ops.append(Expr->elements_begin(), Expr->elements_end()); + // According to + // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf + // cuda-gdb requires DW_AT_address_class for all variables to be able to + // correctly interpret address space of the variable address. + // Decode DW_OP_constu <DWARF Address Space> DW_OP_swap DW_OP_xderef + // sequence for the NVPTX + gdb target. + unsigned LocalNVPTXAddressSpace; + if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) { + const DIExpression *NewExpr = + DIExpression::extractAddressClass(Expr, LocalNVPTXAddressSpace); + if (NewExpr != Expr) { + Expr = NewExpr; + NVPTXAddressSpace = LocalNVPTXAddressSpace; + } + } + if (Expr) + Ops.append(Expr->elements_begin(), Expr->elements_end()); DIExpressionCursor Cursor(Ops); DwarfExpr.setMemoryLocationKind(); if (const MCSymbol *FrameSymbol = Asm->getFunctionFrameSymbol()) @@ -613,7 +675,19 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, *Asm->MF->getSubtarget().getRegisterInfo(), Cursor, FrameReg); DwarfExpr.addExpression(std::move(Cursor)); } + if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) { + // According to + // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf + // cuda-gdb requires DW_AT_address_class for all variables to be able to + // correctly interpret address space of the variable address. + const unsigned NVPTX_ADDR_local_space = 6; + addUInt(*VariableDie, dwarf::DW_AT_address_class, dwarf::DW_FORM_data1, + NVPTXAddressSpace ? *NVPTXAddressSpace : NVPTX_ADDR_local_space); + } addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize()); + if (DwarfExpr.TagOffset) + addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1, + *DwarfExpr.TagOffset); return VariableDie; } @@ -800,7 +874,7 @@ void DwarfCompileUnit::constructAbstractSubprogramScopeDIE( ContextDIE = &getUnitDie(); getOrCreateSubprogramDIE(SPDecl); } else { - ContextDIE = getOrCreateContextDIE(resolve(SP->getScope())); + ContextDIE = getOrCreateContextDIE(SP->getScope()); // The scope may be shared with a subprogram that has already been // constructed in another CU, in which case we need to construct this // subprogram in the same CU. @@ -849,7 +923,7 @@ DIE *DwarfCompileUnit::constructImportedEntityDIE( DIE *IMDie = DIE::get(DIEValueAllocator, (dwarf::Tag)Module->getTag()); insertDIE(Module, IMDie); DIE *EntityDie; - auto *Entity = resolve(Module->getEntity()); + auto *Entity = Module->getEntity(); if (auto *NS = dyn_cast<DINamespace>(Entity)) EntityDie = getOrCreateNameSpace(NS); else if (auto *M = dyn_cast<DIModule>(Entity)) @@ -958,7 +1032,9 @@ bool DwarfCompileUnit::hasDwarfPubSections() const { return true; case DICompileUnit::DebugNameTableKind::Default: return DD->tuneForGDB() && !includeMinimalInlineScopes() && - !CUNode->isDebugDirectivesOnly(); + !CUNode->isDebugDirectivesOnly() && + DD->getAccelTableKind() != AccelTableKind::Apple && + DD->getDwarfVersion() < 5; } llvm_unreachable("Unhandled DICompileUnit::DebugNameTableKind enum"); } @@ -1054,6 +1130,12 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die, DwarfExpr.setMemoryLocationKind(); DIExpressionCursor Cursor(DIExpr); + + if (DIExpr->isEntryValue()) { + DwarfExpr.setEntryValueFlag(); + DwarfExpr.addEntryValueExpression(Cursor); + } + const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo(); if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg())) return; @@ -1112,7 +1194,7 @@ void DwarfCompileUnit::addAddressExpr(DIE &Die, dwarf::Attribute Attribute, void DwarfCompileUnit::applySubprogramAttributesToDefinition( const DISubprogram *SP, DIE &SPDie) { auto *SPDecl = SP->getDeclaration(); - auto *Context = resolve(SPDecl ? SPDecl->getScope() : SP->getScope()); + auto *Context = SPDecl ? SPDecl->getScope() : SP->getScope(); applySubprogramAttributes(SP, SPDie, includeMinimalInlineScopes()); addGlobalName(SP->getName(), SPDie, Context); } @@ -1121,6 +1203,10 @@ bool DwarfCompileUnit::isDwoUnit() const { return DD->useSplitDwarf() && Skeleton; } +void DwarfCompileUnit::finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) { + constructTypeDIE(D, CTy); +} + bool DwarfCompileUnit::includeMinimalInlineScopes() const { return getCUNode()->getEmissionKind() == DICompileUnit::LineTablesOnly || (DD->useSplitDwarf() && !Skeleton); @@ -1134,3 +1220,27 @@ void DwarfCompileUnit::addAddrTableBase() { : dwarf::DW_AT_GNU_addr_base, Label, TLOF.getDwarfAddrSection()->getBeginSymbol()); } + +void DwarfCompileUnit::addBaseTypeRef(DIEValueList &Die, int64_t Idx) { + Die.addValue(DIEValueAllocator, (dwarf::Attribute)0, dwarf::DW_FORM_udata, + new (DIEValueAllocator) DIEBaseTypeRef(this, Idx)); +} + +void DwarfCompileUnit::createBaseTypeDIEs() { + // Insert the base_type DIEs directly after the CU so that their offsets will + // fit in the fixed size ULEB128 used inside the location expressions. + // Maintain order by iterating backwards and inserting to the front of CU + // child list. + for (auto &Btr : reverse(ExprRefedBaseTypes)) { + DIE &Die = getUnitDie().addChildFront( + DIE::get(DIEValueAllocator, dwarf::DW_TAG_base_type)); + SmallString<32> Str; + addString(Die, dwarf::DW_AT_name, + Twine(dwarf::AttributeEncodingString(Btr.Encoding) + + "_" + Twine(Btr.BitSize)).toStringRef(Str)); + addUInt(Die, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, Btr.Encoding); + addUInt(Die, dwarf::DW_AT_byte_size, None, Btr.BitSize / 8); + + Btr.Die = &Die; + } +} diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 9ec22f68c12f..ea980dfda17e 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -1,9 +1,8 @@ //===- llvm/CodeGen/DwarfCompileUnit.h - Dwarf Compile Unit -----*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -101,6 +100,8 @@ class DwarfCompileUnit final : public DwarfUnit { return DU->getAbstractEntities(); } + void finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) override; + public: DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU); @@ -125,11 +126,27 @@ public: const DIExpression *Expr; }; + struct BaseTypeRef { + BaseTypeRef(unsigned BitSize, dwarf::TypeKind Encoding) : + BitSize(BitSize), Encoding(Encoding) {} + unsigned BitSize; + dwarf::TypeKind Encoding; + DIE *Die = nullptr; + }; + + std::vector<BaseTypeRef> ExprRefedBaseTypes; + /// Get or create global variable DIE. DIE * getOrCreateGlobalVariableDIE(const DIGlobalVariable *GV, ArrayRef<GlobalExpr> GlobalExprs); + DIE *getOrCreateCommonBlock(const DICommonBlock *CB, + ArrayRef<GlobalExpr> GlobalExprs); + + void addLocationAttribute(DIE *ToDIE, const DIGlobalVariable *GV, + ArrayRef<GlobalExpr> GlobalExprs); + /// addLabelAddress - Add a dwarf label attribute data and value using /// either DW_FORM_addr or DW_FORM_GNU_addr_index. void addLabelAddress(DIE &Die, dwarf::Attribute Attribute, @@ -200,6 +217,8 @@ public: SmallVectorImpl<DIE *> &Children, bool *HasNonScopeChildren = nullptr); + void createBaseTypeDIEs(); + /// Construct a DIE for this subprogram scope. DIE &constructSubprogramScopeDIE(const DISubprogram *Sub, LexicalScope *Scope); @@ -314,6 +333,8 @@ public: void setDWOId(uint64_t DwoId) { DWOId = DwoId; } bool hasDwarfPubSections() const; + + void addBaseTypeRef(DIEValueList &Die, int64_t Idx); }; } // end namespace llvm diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 1de2ffb6cfa1..71bb2b0858cc 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -1,9 +1,8 @@ //===- llvm/CodeGen/DwarfDebug.cpp - Dwarf Debug Framework ----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -42,6 +41,8 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/DebugInfo/DWARF/DWARFExpression.h" +#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" @@ -162,6 +163,7 @@ static const char *const DWARFGroupName = "dwarf"; static const char *const DWARFGroupDescription = "DWARF Emission"; static const char *const DbgTimerName = "writer"; static const char *const DbgTimerDescription = "DWARF Debug Writer"; +static constexpr unsigned ULEB128PadSize = 4; void DebugLocDwarfExpression::emitOp(uint8_t Op, const char *Comment) { BS.EmitInt8( @@ -177,6 +179,15 @@ void DebugLocDwarfExpression::emitUnsigned(uint64_t Value) { BS.EmitULEB128(Value, Twine(Value)); } +void DebugLocDwarfExpression::emitData1(uint8_t Value) { + BS.EmitInt8(Value, Twine(Value)); +} + +void DebugLocDwarfExpression::emitBaseTypeRef(uint64_t Idx) { + assert(Idx < (1ULL << (ULEB128PadSize * 7)) && "Idx wont fit"); + BS.EmitULEB128(Idx, Twine(Idx), ULEB128PadSize); +} + bool DebugLocDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI, unsigned MachineReg) { // This information is not available while emitting .debug_loc entries. @@ -185,11 +196,11 @@ bool DebugLocDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI, bool DbgVariable::isBlockByrefVariable() const { assert(getVariable() && "Invalid complex DbgVariable!"); - return getVariable()->getType().resolve()->isBlockByrefStruct(); + return getVariable()->getType()->isBlockByrefStruct(); } const DIType *DbgVariable::getType() const { - DIType *Ty = getVariable()->getType().resolve(); + DIType *Ty = getVariable()->getType(); // FIXME: isBlockByrefVariable should be reformulated in terms of complex // addresses instead. if (Ty->isBlockByrefStruct()) { @@ -221,18 +232,55 @@ const DIType *DbgVariable::getType() const { uint16_t tag = Ty->getTag(); if (tag == dwarf::DW_TAG_pointer_type) - subType = resolve(cast<DIDerivedType>(Ty)->getBaseType()); + subType = cast<DIDerivedType>(Ty)->getBaseType(); auto Elements = cast<DICompositeType>(subType)->getElements(); for (unsigned i = 0, N = Elements.size(); i < N; ++i) { auto *DT = cast<DIDerivedType>(Elements[i]); if (getName() == DT->getName()) - return resolve(DT->getBaseType()); + return DT->getBaseType(); } } return Ty; } +/// Get .debug_loc entry for the instruction range starting at MI. +static DbgValueLoc getDebugLocValue(const MachineInstr *MI) { + const DIExpression *Expr = MI->getDebugExpression(); + assert(MI->getNumOperands() == 4); + if (MI->getOperand(0).isReg()) { + auto RegOp = MI->getOperand(0); + auto Op1 = MI->getOperand(1); + // If the second operand is an immediate, this is a + // register-indirect address. + assert((!Op1.isImm() || (Op1.getImm() == 0)) && "unexpected offset"); + MachineLocation MLoc(RegOp.getReg(), Op1.isImm()); + return DbgValueLoc(Expr, MLoc); + } + if (MI->getOperand(0).isImm()) + return DbgValueLoc(Expr, MI->getOperand(0).getImm()); + if (MI->getOperand(0).isFPImm()) + return DbgValueLoc(Expr, MI->getOperand(0).getFPImm()); + if (MI->getOperand(0).isCImm()) + return DbgValueLoc(Expr, MI->getOperand(0).getCImm()); + + llvm_unreachable("Unexpected 4-operand DBG_VALUE instruction!"); +} + +void DbgVariable::initializeDbgValue(const MachineInstr *DbgValue) { + assert(FrameIndexExprs.empty() && "Already initialized?"); + assert(!ValueLoc.get() && "Already initialized?"); + + assert(getVariable() == DbgValue->getDebugVariable() && "Wrong variable"); + assert(getInlinedAt() == DbgValue->getDebugLoc()->getInlinedAt() && + "Wrong inlined-at"); + + ValueLoc = llvm::make_unique<DbgValueLoc>(getDebugLocValue(DbgValue)); + if (auto *E = DbgValue->getDebugExpression()) + if (E->getNumElements()) + FrameIndexExprs.push_back({0, E}); +} + ArrayRef<DbgVariable::FrameIndexExpr> DbgVariable::getFrameIndexExprs() const { if (FrameIndexExprs.size() == 1) return FrameIndexExprs; @@ -252,8 +300,8 @@ ArrayRef<DbgVariable::FrameIndexExpr> DbgVariable::getFrameIndexExprs() const { } void DbgVariable::addMMIEntry(const DbgVariable &V) { - assert(DebugLocListIndex == ~0U && !MInsn && "not an MMI entry"); - assert(V.DebugLocListIndex == ~0U && !V.MInsn && "not an MMI entry"); + assert(DebugLocListIndex == ~0U && !ValueLoc.get() && "not an MMI entry"); + assert(V.DebugLocListIndex == ~0U && !V.ValueLoc.get() && "not an MMI entry"); assert(V.getVariable() == getVariable() && "conflicting variable"); assert(V.getInlinedAt() == getInlinedAt() && "conflicting inlined-at location"); @@ -315,7 +363,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) IsDarwin(A->TM.getTargetTriple().isOSDarwin()) { const Triple &TT = Asm->TM.getTargetTriple(); - // Make sure we know our "debugger tuning." The target option takes + // Make sure we know our "debugger tuning". The target option takes // precedence; fall back to triple-based defaults. if (Asm->TM.Options.DebuggerTuning != DebuggerKind::Default) DebuggerTuning = Asm->TM.Options.DebuggerTuning; @@ -658,6 +706,11 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) { NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection()); } + // Create DIEs for function declarations used for call site debug info. + for (auto Scope : DIUnit->getRetainedTypes()) + if (auto *SP = dyn_cast_or_null<DISubprogram>(Scope)) + NewCU.getOrCreateSubprogramDIE(SP); + CUMap.insert({DIUnit, &NewCU}); CUDieMap.insert({&NewCU.getUnitDie(), &NewCU}); return NewCU; @@ -890,13 +943,6 @@ void DwarfDebug::finalizeModuleInfo() { // ranges for all subprogram DIEs for mach-o. DwarfCompileUnit &U = SkCU ? *SkCU : TheCU; - // We don't keep track of which addresses are used in which CU so this - // is a bit pessimistic under LTO. - if (!AddrPool.isEmpty() && - (getDwarfVersion() >= 5 || - (SkCU && !empty(TheCU.getUnitDie().children())))) - U.addAddrTableBase(); - if (unsigned NumRanges = TheCU.getRanges().size()) { if (NumRanges > 1 && useRangesSection()) // A DW_AT_low_pc attribute may also be specified in combination with @@ -909,6 +955,13 @@ void DwarfDebug::finalizeModuleInfo() { U.attachRangesOrLowHighPC(U.getUnitDie(), TheCU.takeRanges()); } + // We don't keep track of which addresses are used in which CU so this + // is a bit pessimistic under LTO. + if (!AddrPool.isEmpty() && + (getDwarfVersion() >= 5 || + (SkCU && !empty(TheCU.getUnitDie().children())))) + U.addAddrTableBase(); + if (getDwarfVersion() >= 5) { if (U.hasRangeLists()) U.addRnglistsBase(); @@ -941,6 +994,11 @@ void DwarfDebug::endModule() { assert(CurFn == nullptr); assert(CurMI == nullptr); + for (const auto &P : CUMap) { + auto &CU = *P.second; + CU.createBaseTypeDIEs(); + } + // If we aren't actually generating debug info (check beginModule - // conditionalized on !DisableDebugInfoPrinting and the presence of the // llvm.dbg.cu metadata node) @@ -1059,161 +1117,177 @@ void DwarfDebug::collectVariableInfoFromMFTable( } } -// Get .debug_loc entry for the instruction range starting at MI. -static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) { - const DIExpression *Expr = MI->getDebugExpression(); - assert(MI->getNumOperands() == 4); - if (MI->getOperand(0).isReg()) { - auto RegOp = MI->getOperand(0); - auto Op1 = MI->getOperand(1); - // If the second operand is an immediate, this is a - // register-indirect address. - assert((!Op1.isImm() || (Op1.getImm() == 0)) && "unexpected offset"); - MachineLocation MLoc(RegOp.getReg(), Op1.isImm()); - return DebugLocEntry::Value(Expr, MLoc); - } - if (MI->getOperand(0).isImm()) - return DebugLocEntry::Value(Expr, MI->getOperand(0).getImm()); - if (MI->getOperand(0).isFPImm()) - return DebugLocEntry::Value(Expr, MI->getOperand(0).getFPImm()); - if (MI->getOperand(0).isCImm()) - return DebugLocEntry::Value(Expr, MI->getOperand(0).getCImm()); - - llvm_unreachable("Unexpected 4-operand DBG_VALUE instruction!"); -} +/// Determine whether a *singular* DBG_VALUE is valid for the entirety of its +/// enclosing lexical scope. The check ensures there are no other instructions +/// in the same lexical scope preceding the DBG_VALUE and that its range is +/// either open or otherwise rolls off the end of the scope. +static bool validThroughout(LexicalScopes &LScopes, + const MachineInstr *DbgValue, + const MachineInstr *RangeEnd) { + assert(DbgValue->getDebugLoc() && "DBG_VALUE without a debug location"); + auto MBB = DbgValue->getParent(); + auto DL = DbgValue->getDebugLoc(); + auto *LScope = LScopes.findLexicalScope(DL); + // Scope doesn't exist; this is a dead DBG_VALUE. + if (!LScope) + return false; + auto &LSRange = LScope->getRanges(); + if (LSRange.size() == 0) + return false; -/// If this and Next are describing different fragments of the same -/// variable, merge them by appending Next's values to the current -/// list of values. -/// Return true if the merge was successful. -bool DebugLocEntry::MergeValues(const DebugLocEntry &Next) { - if (Begin == Next.Begin) { - auto *FirstExpr = cast<DIExpression>(Values[0].Expression); - auto *FirstNextExpr = cast<DIExpression>(Next.Values[0].Expression); - if (!FirstExpr->isFragment() || !FirstNextExpr->isFragment()) + // Determine if the DBG_VALUE is valid at the beginning of its lexical block. + const MachineInstr *LScopeBegin = LSRange.front().first; + // Early exit if the lexical scope begins outside of the current block. + if (LScopeBegin->getParent() != MBB) + return false; + MachineBasicBlock::const_reverse_iterator Pred(DbgValue); + for (++Pred; Pred != MBB->rend(); ++Pred) { + if (Pred->getFlag(MachineInstr::FrameSetup)) + break; + auto PredDL = Pred->getDebugLoc(); + if (!PredDL || Pred->isMetaInstruction()) + continue; + // Check whether the instruction preceding the DBG_VALUE is in the same + // (sub)scope as the DBG_VALUE. + if (DL->getScope() == PredDL->getScope()) + return false; + auto *PredScope = LScopes.findLexicalScope(PredDL); + if (!PredScope || LScope->dominates(PredScope)) return false; + } - // We can only merge entries if none of the fragments overlap any others. - // In doing so, we can take advantage of the fact that both lists are - // sorted. - for (unsigned i = 0, j = 0; i < Values.size(); ++i) { - for (; j < Next.Values.size(); ++j) { - int res = cast<DIExpression>(Values[i].Expression)->fragmentCmp( - cast<DIExpression>(Next.Values[j].Expression)); - if (res == 0) // The two expressions overlap, we can't merge. - return false; - // Values[i] is entirely before Next.Values[j], - // so go back to the next entry of Values. - else if (res == -1) - break; - // Next.Values[j] is entirely before Values[i], so go on to the - // next entry of Next.Values. - } - } + // If the range of the DBG_VALUE is open-ended, report success. + if (!RangeEnd) + return true; - addValues(Next.Values); - End = Next.End; + // Fail if there are instructions belonging to our scope in another block. + const MachineInstr *LScopeEnd = LSRange.back().second; + if (LScopeEnd->getParent() != MBB) + return false; + + // Single, constant DBG_VALUEs in the prologue are promoted to be live + // throughout the function. This is a hack, presumably for DWARF v2 and not + // necessarily correct. It would be much better to use a dbg.declare instead + // if we know the constant is live throughout the scope. + if (DbgValue->getOperand(0).isImm() && MBB->pred_empty()) return true; - } + return false; } /// Build the location list for all DBG_VALUEs in the function that -/// describe the same variable. If the ranges of several independent -/// fragments of the same variable overlap partially, split them up and -/// combine the ranges. The resulting DebugLocEntries are will have +/// describe the same variable. The resulting DebugLocEntries will have /// strict monotonically increasing begin addresses and will never -/// overlap. +/// overlap. If the resulting list has only one entry that is valid +/// throughout variable's scope return true. +// +// See the definition of DbgValueHistoryMap::Entry for an explanation of the +// different kinds of history map entries. One thing to be aware of is that if +// a debug value is ended by another entry (rather than being valid until the +// end of the function), that entry's instruction may or may not be included in +// the range, depending on if the entry is a clobbering entry (it has an +// instruction that clobbers one or more preceding locations), or if it is an +// (overlapping) debug value entry. This distinction can be seen in the example +// below. The first debug value is ended by the clobbering entry 2, and the +// second and third debug values are ended by the overlapping debug value entry +// 4. // // Input: // -// Ranges History [var, loc, fragment ofs size] -// 0 | [x, (reg0, fragment 0, 32)] -// 1 | | [x, (reg1, fragment 32, 32)] <- IsFragmentOfPrevEntry -// 2 | | ... -// 3 | [clobber reg0] -// 4 [x, (mem, fragment 0, 64)] <- overlapping with both previous fragments of -// x. +// History map entries [type, end index, mi] // -// Output: +// 0 | [DbgValue, 2, DBG_VALUE $reg0, [...] (fragment 0, 32)] +// 1 | | [DbgValue, 4, DBG_VALUE $reg1, [...] (fragment 32, 32)] +// 2 | | [Clobber, $reg0 = [...], -, -] +// 3 | | [DbgValue, 4, DBG_VALUE 123, [...] (fragment 64, 32)] +// 4 [DbgValue, ~0, DBG_VALUE @g, [...] (fragment 0, 96)] // -// [0-1] [x, (reg0, fragment 0, 32)] -// [1-3] [x, (reg0, fragment 0, 32), (reg1, fragment 32, 32)] -// [3-4] [x, (reg1, fragment 32, 32)] -// [4- ] [x, (mem, fragment 0, 64)] -void -DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, - const DbgValueHistoryMap::InstrRanges &Ranges) { - SmallVector<DebugLocEntry::Value, 4> OpenRanges; - - for (auto I = Ranges.begin(), E = Ranges.end(); I != E; ++I) { - const MachineInstr *Begin = I->first; - const MachineInstr *End = I->second; - assert(Begin->isDebugValue() && "Invalid History entry"); - - // Check if a variable is inaccessible in this range. - if (Begin->getNumOperands() > 1 && - Begin->getOperand(0).isReg() && !Begin->getOperand(0).getReg()) { - OpenRanges.clear(); - continue; - } - - // If this fragment overlaps with any open ranges, truncate them. - const DIExpression *DIExpr = Begin->getDebugExpression(); - auto Last = remove_if(OpenRanges, [&](DebugLocEntry::Value R) { - return DIExpr->fragmentsOverlap(R.getExpression()); - }); +// Output [start, end) [Value...]: +// +// [0-1) [(reg0, fragment 0, 32)] +// [1-3) [(reg0, fragment 0, 32), (reg1, fragment 32, 32)] +// [3-4) [(reg1, fragment 32, 32), (123, fragment 64, 32)] +// [4-) [(@g, fragment 0, 96)] +bool DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, + const DbgValueHistoryMap::Entries &Entries) { + using OpenRange = + std::pair<DbgValueHistoryMap::EntryIndex, DbgValueLoc>; + SmallVector<OpenRange, 4> OpenRanges; + bool isSafeForSingleLocation = true; + const MachineInstr *StartDebugMI = nullptr; + const MachineInstr *EndMI = nullptr; + + for (auto EB = Entries.begin(), EI = EB, EE = Entries.end(); EI != EE; ++EI) { + const MachineInstr *Instr = EI->getInstr(); + + // Remove all values that are no longer live. + size_t Index = std::distance(EB, EI); + auto Last = + remove_if(OpenRanges, [&](OpenRange &R) { return R.first <= Index; }); OpenRanges.erase(Last, OpenRanges.end()); - const MCSymbol *StartLabel = getLabelBeforeInsn(Begin); - assert(StartLabel && "Forgot label before DBG_VALUE starting a range!"); + // If we are dealing with a clobbering entry, this iteration will result in + // a location list entry starting after the clobbering instruction. + const MCSymbol *StartLabel = + EI->isClobber() ? getLabelAfterInsn(Instr) : getLabelBeforeInsn(Instr); + assert(StartLabel && + "Forgot label before/after instruction starting a range!"); const MCSymbol *EndLabel; - if (End != nullptr) - EndLabel = getLabelAfterInsn(End); - else if (std::next(I) == Ranges.end()) + if (std::next(EI) == Entries.end()) { EndLabel = Asm->getFunctionEnd(); + if (EI->isClobber()) + EndMI = EI->getInstr(); + } + else if (std::next(EI)->isClobber()) + EndLabel = getLabelAfterInsn(std::next(EI)->getInstr()); else - EndLabel = getLabelBeforeInsn(std::next(I)->first); + EndLabel = getLabelBeforeInsn(std::next(EI)->getInstr()); assert(EndLabel && "Forgot label after instruction ending a range!"); - LLVM_DEBUG(dbgs() << "DotDebugLoc: " << *Begin << "\n"); + if (EI->isDbgValue()) + LLVM_DEBUG(dbgs() << "DotDebugLoc: " << *Instr << "\n"); + + // If this history map entry has a debug value, add that to the list of + // open ranges and check if its location is valid for a single value + // location. + if (EI->isDbgValue()) { + // Do not add undef debug values, as they are redundant information in + // the location list entries. An undef debug results in an empty location + // description. If there are any non-undef fragments then padding pieces + // with empty location descriptions will automatically be inserted, and if + // all fragments are undef then the whole location list entry is + // redundant. + if (!Instr->isUndefDebugValue()) { + auto Value = getDebugLocValue(Instr); + OpenRanges.emplace_back(EI->getEndIndex(), Value); + + // TODO: Add support for single value fragment locations. + if (Instr->getDebugExpression()->isFragment()) + isSafeForSingleLocation = false; + + if (!StartDebugMI) + StartDebugMI = Instr; + } else { + isSafeForSingleLocation = false; + } + } - auto Value = getDebugLocValue(Begin); + // Location list entries with empty location descriptions are redundant + // information in DWARF, so do not emit those. + if (OpenRanges.empty()) + continue; // Omit entries with empty ranges as they do not have any effect in DWARF. if (StartLabel == EndLabel) { - // If this is a fragment, we must still add the value to the list of - // open ranges, since it may describe non-overlapping parts of the - // variable. - if (DIExpr->isFragment()) - OpenRanges.push_back(Value); LLVM_DEBUG(dbgs() << "Omitting location list entry with empty range.\n"); continue; } - DebugLocEntry Loc(StartLabel, EndLabel, Value); - bool couldMerge = false; - - // If this is a fragment, it may belong to the current DebugLocEntry. - if (DIExpr->isFragment()) { - // Add this value to the list of open ranges. - OpenRanges.push_back(Value); - - // Attempt to add the fragment to the last entry. - if (!DebugLoc.empty()) - if (DebugLoc.back().MergeValues(Loc)) - couldMerge = true; - } - - if (!couldMerge) { - // Need to add a new DebugLocEntry. Add all values from still - // valid non-overlapping fragments. - if (OpenRanges.size()) - Loc.addValues(OpenRanges); - - DebugLoc.push_back(std::move(Loc)); - } + SmallVector<DbgValueLoc, 4> Values; + for (auto &R : OpenRanges) + Values.push_back(R.second); + DebugLoc.emplace_back(StartLabel, EndLabel, Values); // Attempt to coalesce the ranges of two otherwise identical // DebugLocEntries. @@ -1229,6 +1303,9 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, if (PrevEntry != DebugLoc.rend() && PrevEntry->MergeRanges(*CurEntry)) DebugLoc.pop_back(); } + + return DebugLoc.size() == 1 && isSafeForSingleLocation && + validThroughout(LScopes, StartDebugMI, EndMI); } DbgEntity *DwarfDebug::createConcreteEntity(DwarfCompileUnit &TheCU, @@ -1253,64 +1330,6 @@ DbgEntity *DwarfDebug::createConcreteEntity(DwarfCompileUnit &TheCU, return ConcreteEntities.back().get(); } -/// Determine whether a *singular* DBG_VALUE is valid for the entirety of its -/// enclosing lexical scope. The check ensures there are no other instructions -/// in the same lexical scope preceding the DBG_VALUE and that its range is -/// either open or otherwise rolls off the end of the scope. -static bool validThroughout(LexicalScopes &LScopes, - const MachineInstr *DbgValue, - const MachineInstr *RangeEnd) { - assert(DbgValue->getDebugLoc() && "DBG_VALUE without a debug location"); - auto MBB = DbgValue->getParent(); - auto DL = DbgValue->getDebugLoc(); - auto *LScope = LScopes.findLexicalScope(DL); - // Scope doesn't exist; this is a dead DBG_VALUE. - if (!LScope) - return false; - auto &LSRange = LScope->getRanges(); - if (LSRange.size() == 0) - return false; - - // Determine if the DBG_VALUE is valid at the beginning of its lexical block. - const MachineInstr *LScopeBegin = LSRange.front().first; - // Early exit if the lexical scope begins outside of the current block. - if (LScopeBegin->getParent() != MBB) - return false; - MachineBasicBlock::const_reverse_iterator Pred(DbgValue); - for (++Pred; Pred != MBB->rend(); ++Pred) { - if (Pred->getFlag(MachineInstr::FrameSetup)) - break; - auto PredDL = Pred->getDebugLoc(); - if (!PredDL || Pred->isMetaInstruction()) - continue; - // Check whether the instruction preceding the DBG_VALUE is in the same - // (sub)scope as the DBG_VALUE. - if (DL->getScope() == PredDL->getScope()) - return false; - auto *PredScope = LScopes.findLexicalScope(PredDL); - if (!PredScope || LScope->dominates(PredScope)) - return false; - } - - // If the range of the DBG_VALUE is open-ended, report success. - if (!RangeEnd) - return true; - - // Fail if there are instructions belonging to our scope in another block. - const MachineInstr *LScopeEnd = LSRange.back().second; - if (LScopeEnd->getParent() != MBB) - return false; - - // Single, constant DBG_VALUEs in the prologue are promoted to be live - // throughout the function. This is a hack, presumably for DWARF v2 and not - // necessarily correct. It would be much better to use a dbg.declare instead - // if we know the constant is live throughout the scope. - if (DbgValue->getOperand(0).isImm() && MBB->pred_empty()) - return true; - - return false; -} - // Find variables for each lexical scope. void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU, const DISubprogram *SP, @@ -1324,8 +1343,8 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU, continue; // Instruction ranges, specifying where IV is accessible. - const auto &Ranges = I.second; - if (Ranges.empty()) + const auto &HistoryMapEntries = I.second; + if (HistoryMapEntries.empty()) continue; LexicalScope *Scope = nullptr; @@ -1342,15 +1361,24 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU, DbgVariable *RegVar = cast<DbgVariable>(createConcreteEntity(TheCU, *Scope, LocalVar, IV.second)); - const MachineInstr *MInsn = Ranges.front().first; + const MachineInstr *MInsn = HistoryMapEntries.front().getInstr(); assert(MInsn->isDebugValue() && "History must begin with debug value"); // Check if there is a single DBG_VALUE, valid throughout the var's scope. - if (Ranges.size() == 1 && - validThroughout(LScopes, MInsn, Ranges.front().second)) { - RegVar->initializeDbgValue(MInsn); - continue; + // If the history map contains a single debug value, there may be an + // additional entry which clobbers the debug value. + size_t HistSize = HistoryMapEntries.size(); + bool SingleValueWithClobber = + HistSize == 2 && HistoryMapEntries[1].isClobber(); + if (HistSize == 1 || SingleValueWithClobber) { + const auto *End = + SingleValueWithClobber ? HistoryMapEntries[1].getInstr() : nullptr; + if (validThroughout(LScopes, MInsn, End)) { + RegVar->initializeDbgValue(MInsn); + continue; + } } + // Do not emit location lists if .debug_loc secton is disabled. if (!useLocSection()) continue; @@ -1360,7 +1388,15 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU, // Build the location list for this variable. SmallVector<DebugLocEntry, 8> Entries; - buildLocationList(Entries, Ranges); + bool isValidSingleLocation = buildLocationList(Entries, HistoryMapEntries); + + // Check whether buildLocationList managed to merge all locations to one + // that is valid throughout the variable's scope. If so, produce single + // value location. + if (isValidSingleLocation) { + RegVar->initializeDbgValue(Entries[0].getValues()[0]); + continue; + } // If the variable has a DIBasicType, extract it. Basic types cannot have // unique identifiers, so don't bother resolving the type with the @@ -1370,7 +1406,7 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU, // Finalize the entry by lowering it into a DWARF bytestream. for (auto &Entry : Entries) - Entry.finalize(*Asm, List, BT); + Entry.finalize(*Asm, List, BT, TheCU); } // For each InlinedEntity collected from DBG_LABEL instructions, convert to @@ -1489,7 +1525,7 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { // We have an explicit location, different from the previous location. // Don't repeat a line-0 record, but otherwise emit the new location. // (The new location might be an explicit line 0, which we do emit.) - if (PrevInstLoc && DL.getLine() == 0 && LastAsmLine == 0) + if (DL.getLine() == 0 && LastAsmLine == 0) return; unsigned Flags = 0; if (DL == PrologEndLoc) { @@ -1521,6 +1557,46 @@ static DebugLoc findPrologueEndLoc(const MachineFunction *MF) { return DebugLoc(); } +/// Register a source line with debug info. Returns the unique label that was +/// emitted and which provides correspondence to the source line list. +static void recordSourceLine(AsmPrinter &Asm, unsigned Line, unsigned Col, + const MDNode *S, unsigned Flags, unsigned CUID, + uint16_t DwarfVersion, + ArrayRef<std::unique_ptr<DwarfCompileUnit>> DCUs) { + StringRef Fn; + unsigned FileNo = 1; + unsigned Discriminator = 0; + if (auto *Scope = cast_or_null<DIScope>(S)) { + Fn = Scope->getFilename(); + if (Line != 0 && DwarfVersion >= 4) + if (auto *LBF = dyn_cast<DILexicalBlockFile>(Scope)) + Discriminator = LBF->getDiscriminator(); + + FileNo = static_cast<DwarfCompileUnit &>(*DCUs[CUID]) + .getOrCreateSourceID(Scope->getFile()); + } + Asm.OutStreamer->EmitDwarfLocDirective(FileNo, Line, Col, Flags, 0, + Discriminator, Fn); +} + +DebugLoc DwarfDebug::emitInitialLocDirective(const MachineFunction &MF, + unsigned CUID) { + // Get beginning of function. + if (DebugLoc PrologEndLoc = findPrologueEndLoc(&MF)) { + // Ensure the compile unit is created if the function is called before + // beginFunction(). + (void)getOrCreateDwarfCompileUnit( + MF.getFunction().getSubprogram()->getUnit()); + // We'd like to list the prologue as "not statements" but GDB behaves + // poorly if we do that. Revisit this with caution/GDB (7.5+) testing. + const DISubprogram *SP = PrologEndLoc->getInlinedAtScope()->getSubprogram(); + ::recordSourceLine(*Asm, SP->getScopeLine(), 0, SP, DWARF2_FLAG_IS_STMT, + CUID, getDwarfVersion(), getUnits()); + return PrologEndLoc; + } + return DebugLoc(); +} + // Gather pre-function debug information. Assumes being called immediately // after the function entry point has been emitted. void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) { @@ -1543,13 +1619,8 @@ void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) { Asm->OutStreamer->getContext().setDwarfCompileUnitID(CU.getUniqueID()); // Record beginning of function. - PrologEndLoc = findPrologueEndLoc(MF); - if (PrologEndLoc) { - // We'd like to list the prologue as "not statements" but GDB behaves - // poorly if we do that. Revisit this with caution/GDB (7.5+) testing. - auto *SP = PrologEndLoc->getInlinedAtScope()->getSubprogram(); - recordSourceLine(SP->getScopeLine(), 0, SP, DWARF2_FLAG_IS_STMT); - } + PrologEndLoc = emitInitialLocDirective( + *MF, Asm->OutStreamer->getContext().getDwarfCompileUnitID()); } void DwarfDebug::skippedNonDebugFunction() { @@ -1647,21 +1718,9 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) { // emitted and which provides correspondence to the source line list. void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S, unsigned Flags) { - StringRef Fn; - unsigned FileNo = 1; - unsigned Discriminator = 0; - if (auto *Scope = cast_or_null<DIScope>(S)) { - Fn = Scope->getFilename(); - if (Line != 0 && getDwarfVersion() >= 4) - if (auto *LBF = dyn_cast<DILexicalBlockFile>(Scope)) - Discriminator = LBF->getDiscriminator(); - - unsigned CUID = Asm->OutStreamer->getContext().getDwarfCompileUnitID(); - FileNo = static_cast<DwarfCompileUnit &>(*InfoHolder.getUnits()[CUID]) - .getOrCreateSourceID(Scope->getFile()); - } - Asm->OutStreamer->EmitDwarfLocDirective(FileNo, Line, Col, Flags, 0, - Discriminator, Fn); + ::recordSourceLine(*Asm, Line, Col, S, Flags, + Asm->OutStreamer->getContext().getDwarfCompileUnitID(), + getDwarfVersion(), getUnits()); } //===----------------------------------------------------------------------===// @@ -1890,17 +1949,59 @@ void DwarfDebug::emitDebugStr() { } void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer, - const DebugLocStream::Entry &Entry) { + const DebugLocStream::Entry &Entry, + const DwarfCompileUnit *CU) { auto &&Comments = DebugLocs.getComments(Entry); auto Comment = Comments.begin(); auto End = Comments.end(); - for (uint8_t Byte : DebugLocs.getBytes(Entry)) - Streamer.EmitInt8(Byte, Comment != End ? *(Comment++) : ""); + + // The expressions are inserted into a byte stream rather early (see + // DwarfExpression::addExpression) so for those ops (e.g. DW_OP_convert) that + // need to reference a base_type DIE the offset of that DIE is not yet known. + // To deal with this we instead insert a placeholder early and then extract + // it here and replace it with the real reference. + unsigned PtrSize = Asm->MAI->getCodePointerSize(); + DWARFDataExtractor Data(StringRef(DebugLocs.getBytes(Entry).data(), + DebugLocs.getBytes(Entry).size()), + Asm->getDataLayout().isLittleEndian(), PtrSize); + DWARFExpression Expr(Data, getDwarfVersion(), PtrSize); + + using Encoding = DWARFExpression::Operation::Encoding; + uint32_t Offset = 0; + for (auto &Op : Expr) { + assert(Op.getCode() != dwarf::DW_OP_const_type && + "3 operand ops not yet supported"); + Streamer.EmitInt8(Op.getCode(), Comment != End ? *(Comment++) : ""); + Offset++; + for (unsigned I = 0; I < 2; ++I) { + if (Op.getDescription().Op[I] == Encoding::SizeNA) + continue; + if (Op.getDescription().Op[I] == Encoding::BaseTypeRef) { + if (CU) { + uint64_t Offset = CU->ExprRefedBaseTypes[Op.getRawOperand(I)].Die->getOffset(); + assert(Offset < (1ULL << (ULEB128PadSize * 7)) && "Offset wont fit"); + Asm->EmitULEB128(Offset, nullptr, ULEB128PadSize); + } else { + // Emit a reference to the 'generic type'. + Asm->EmitULEB128(0, nullptr, ULEB128PadSize); + } + // Make sure comments stay aligned. + for (unsigned J = 0; J < ULEB128PadSize; ++J) + if (Comment != End) + Comment++; + } else { + for (uint32_t J = Offset; J < Op.getOperandEndOffset(I); ++J) + Streamer.EmitInt8(Data.getData()[J], Comment != End ? *(Comment++) : ""); + } + Offset = Op.getOperandEndOffset(I); + } + assert(Offset == Op.getEndOffset()); + } } -static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT, - const DebugLocEntry::Value &Value, - DwarfExpression &DwarfExpr) { +void DwarfDebug::emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT, + const DbgValueLoc &Value, + DwarfExpression &DwarfExpr) { auto *DIExpr = Value.getExpression(); DIExpressionCursor ExprCursor(DIExpr); DwarfExpr.addFragmentOffset(DIExpr); @@ -1916,6 +2017,12 @@ static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT, if (Location.isIndirect()) DwarfExpr.setMemoryLocationKind(); DIExpressionCursor Cursor(DIExpr); + + if (DIExpr->isEntryValue()) { + DwarfExpr.setEntryValueFlag(); + DwarfExpr.addEntryValueExpression(Cursor); + } + const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo(); if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg())) return; @@ -1929,38 +2036,50 @@ static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT, void DebugLocEntry::finalize(const AsmPrinter &AP, DebugLocStream::ListBuilder &List, - const DIBasicType *BT) { + const DIBasicType *BT, + DwarfCompileUnit &TheCU) { + assert(!Values.empty() && + "location list entries without values are redundant"); assert(Begin != End && "unexpected location list entry with empty range"); DebugLocStream::EntryBuilder Entry(List, Begin, End); BufferByteStreamer Streamer = Entry.getStreamer(); - DebugLocDwarfExpression DwarfExpr(AP.getDwarfVersion(), Streamer); - const DebugLocEntry::Value &Value = Values[0]; + DebugLocDwarfExpression DwarfExpr(AP.getDwarfVersion(), Streamer, TheCU); + const DbgValueLoc &Value = Values[0]; if (Value.isFragment()) { // Emit all fragments that belong to the same variable and range. - assert(llvm::all_of(Values, [](DebugLocEntry::Value P) { + assert(llvm::all_of(Values, [](DbgValueLoc P) { return P.isFragment(); }) && "all values are expected to be fragments"); assert(std::is_sorted(Values.begin(), Values.end()) && "fragments are expected to be sorted"); for (auto Fragment : Values) - emitDebugLocValue(AP, BT, Fragment, DwarfExpr); + DwarfDebug::emitDebugLocValue(AP, BT, Fragment, DwarfExpr); } else { assert(Values.size() == 1 && "only fragments may have >1 value"); - emitDebugLocValue(AP, BT, Value, DwarfExpr); + DwarfDebug::emitDebugLocValue(AP, BT, Value, DwarfExpr); } DwarfExpr.finalize(); } -void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry) { +void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry, + const DwarfCompileUnit *CU) { // Emit the size. Asm->OutStreamer->AddComment("Loc expr size"); - Asm->emitInt16(DebugLocs.getBytes(Entry).size()); - + if (getDwarfVersion() >= 5) + Asm->EmitULEB128(DebugLocs.getBytes(Entry).size()); + else if (DebugLocs.getBytes(Entry).size() <= std::numeric_limits<uint16_t>::max()) + Asm->emitInt16(DebugLocs.getBytes(Entry).size()); + else { + // The entry is too big to fit into 16 bit, drop it as there is nothing we + // can do. + Asm->emitInt16(0); + return; + } // Emit the entry. APByteStreamer Streamer(*Asm); - emitDebugLocEntry(Streamer, Entry); + emitDebugLocEntry(Streamer, Entry, CU); } // Emit the common part of the DWARF 5 range/locations list tables header. @@ -2060,7 +2179,7 @@ void DwarfDebug::emitDebugLoc() { Asm->EmitLabelDifference(Entry.EndSym, Base, Size); } - emitDebugLocEntryLocation(Entry); + emitDebugLocEntryLocation(Entry, CU); continue; } @@ -2081,7 +2200,7 @@ void DwarfDebug::emitDebugLoc() { Asm->OutStreamer->EmitSymbolValue(Entry.EndSym, Size); } - emitDebugLocEntryLocation(Entry); + emitDebugLocEntryLocation(Entry, CU); } if (IsLocLists) { @@ -2100,9 +2219,9 @@ void DwarfDebug::emitDebugLoc() { } void DwarfDebug::emitDebugLocDWO() { - Asm->OutStreamer->SwitchSection( - Asm->getObjFileLowering().getDwarfLocDWOSection()); for (const auto &List : DebugLocs.getLists()) { + Asm->OutStreamer->SwitchSection( + Asm->getObjFileLowering().getDwarfLocDWOSection()); Asm->OutStreamer->EmitLabel(List.Label); for (const auto &Entry : DebugLocs.getEntries(List)) { // GDB only supports startx_length in pre-standard split-DWARF. @@ -2117,7 +2236,7 @@ void DwarfDebug::emitDebugLocDWO() { Asm->EmitULEB128(idx); Asm->EmitLabelDifference(Entry.EndSym, Entry.BeginSym, 4); - emitDebugLocEntryLocation(Entry); + emitDebugLocEntryLocation(Entry, List.CU); } Asm->emitInt8(dwarf::DW_LLE_end_of_list); } @@ -2170,19 +2289,18 @@ void DwarfDebug::emitDebugARanges() { } // Sort the symbols by offset within the section. - std::stable_sort( - List.begin(), List.end(), [&](const SymbolCU &A, const SymbolCU &B) { - unsigned IA = A.Sym ? Asm->OutStreamer->GetSymbolOrder(A.Sym) : 0; - unsigned IB = B.Sym ? Asm->OutStreamer->GetSymbolOrder(B.Sym) : 0; - - // Symbols with no order assigned should be placed at the end. - // (e.g. section end labels) - if (IA == 0) - return false; - if (IB == 0) - return true; - return IA < IB; - }); + llvm::stable_sort(List, [&](const SymbolCU &A, const SymbolCU &B) { + unsigned IA = A.Sym ? Asm->OutStreamer->GetSymbolOrder(A.Sym) : 0; + unsigned IB = B.Sym ? Asm->OutStreamer->GetSymbolOrder(B.Sym) : 0; + + // Symbols with no order assigned should be placed at the end. + // (e.g. section end labels) + if (IA == 0) + return false; + if (IB == 0) + return true; + return IA < IB; + }); // Insert a final terminator. List.push_back(SymbolCU(nullptr, Asm->OutStreamer->endSection(Section))); @@ -2687,6 +2805,22 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, CU.addDIETypeSignature(RefDie, Signature); } +DwarfDebug::NonTypeUnitContext::NonTypeUnitContext(DwarfDebug *DD) + : DD(DD), + TypeUnitsUnderConstruction(std::move(DD->TypeUnitsUnderConstruction)) { + DD->TypeUnitsUnderConstruction.clear(); + assert(TypeUnitsUnderConstruction.empty() || !DD->AddrPool.hasBeenUsed()); +} + +DwarfDebug::NonTypeUnitContext::~NonTypeUnitContext() { + DD->TypeUnitsUnderConstruction = std::move(TypeUnitsUnderConstruction); + DD->AddrPool.resetUsedFlag(); +} + +DwarfDebug::NonTypeUnitContext DwarfDebug::enterNonTypeUnitContext() { + return NonTypeUnitContext(this); +} + // Add the Name along with its companion DIE to the appropriate accelerator // table (for AccelTableKind::Dwarf it's always AccelDebugNames, for // AccelTableKind::Apple, we use the table we got as an argument). If @@ -2699,7 +2833,7 @@ void DwarfDebug::addAccelNameImpl(const DICompileUnit &CU, return; if (getAccelTableKind() != AccelTableKind::Apple && - CU.getNameTableKind() == DICompileUnit::DebugNameTableKind::None) + CU.getNameTableKind() != DICompileUnit::DebugNameTableKind::Default) return; DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 8a31e989b289..3ac474e2bdda 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -1,9 +1,8 @@ //===- llvm/CodeGen/DwarfDebug.h - Dwarf Debug Framework --------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -16,6 +15,7 @@ #include "AddressPool.h" #include "DebugLocStream.h" +#include "DebugLocEntry.h" #include "DwarfFile.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" @@ -52,6 +52,7 @@ class ByteStreamer; class DebugLocEntry; class DIE; class DwarfCompileUnit; +class DwarfExpression; class DwarfTypeUnit; class DwarfUnit; class LexicalScope; @@ -111,12 +112,14 @@ public: /// /// Variables can be created from \c DBG_VALUE instructions. Those whose /// location changes over time use \a DebugLocListIndex, while those with a -/// single instruction use \a MInsn and (optionally) a single entry of \a Expr. +/// single location use \a ValueLoc and (optionally) a single entry of \a Expr. /// /// Variables that have been optimized out use none of these fields. class DbgVariable : public DbgEntity { - unsigned DebugLocListIndex = ~0u; /// Offset in DebugLocs. - const MachineInstr *MInsn = nullptr; /// DBG_VALUE instruction. + /// Offset in DebugLocs. + unsigned DebugLocListIndex = ~0u; + /// Single value location description. + std::unique_ptr<DbgValueLoc> ValueLoc = nullptr; struct FrameIndexExpr { int FI; @@ -136,7 +139,7 @@ public: /// Initialize from the MMI table. void initializeMMI(const DIExpression *E, int FI) { assert(FrameIndexExprs.empty() && "Already initialized?"); - assert(!MInsn && "Already initialized?"); + assert(!ValueLoc.get() && "Already initialized?"); assert((!E || E->isValid()) && "Expected valid expression"); assert(FI != std::numeric_limits<int>::max() && "Expected valid index"); @@ -144,35 +147,35 @@ public: FrameIndexExprs.push_back({FI, E}); } - /// Initialize from a DBG_VALUE instruction. - void initializeDbgValue(const MachineInstr *DbgValue) { + // Initialize variable's location. + void initializeDbgValue(DbgValueLoc Value) { assert(FrameIndexExprs.empty() && "Already initialized?"); - assert(!MInsn && "Already initialized?"); + assert(!ValueLoc && "Already initialized?"); + assert(!Value.getExpression()->isFragment() && "Fragments not supported."); - assert(getVariable() == DbgValue->getDebugVariable() && "Wrong variable"); - assert(getInlinedAt() == DbgValue->getDebugLoc()->getInlinedAt() && - "Wrong inlined-at"); - - MInsn = DbgValue; - if (auto *E = DbgValue->getDebugExpression()) + ValueLoc = llvm::make_unique<DbgValueLoc>(Value); + if (auto *E = ValueLoc->getExpression()) if (E->getNumElements()) FrameIndexExprs.push_back({0, E}); } + /// Initialize from a DBG_VALUE instruction. + void initializeDbgValue(const MachineInstr *DbgValue); + // Accessors. const DILocalVariable *getVariable() const { return cast<DILocalVariable>(getEntity()); } const DIExpression *getSingleExpression() const { - assert(MInsn && FrameIndexExprs.size() <= 1); + assert(ValueLoc.get() && FrameIndexExprs.size() <= 1); return FrameIndexExprs.size() ? FrameIndexExprs[0].Expr : nullptr; } void setDebugLocListIndex(unsigned O) { DebugLocListIndex = O; } unsigned getDebugLocListIndex() const { return DebugLocListIndex; } StringRef getName() const { return getVariable()->getName(); } - const MachineInstr *getMInsn() const { return MInsn; } + const DbgValueLoc *getValueLoc() const { return ValueLoc.get(); } /// Get the FI entries, sorted by fragment offset. ArrayRef<FrameIndexExpr> getFrameIndexExprs() const; bool hasFrameIndexExprs() const { return !FrameIndexExprs.empty(); } @@ -205,7 +208,7 @@ public: } bool hasComplexAddress() const { - assert(MInsn && "Expected DBG_VALUE, not MMI variable"); + assert(ValueLoc.get() && "Expected DBG_VALUE, not MMI variable"); assert((FrameIndexExprs.empty() || (FrameIndexExprs.size() == 1 && FrameIndexExprs[0].Expr->getNumElements())) && @@ -219,11 +222,6 @@ public: static bool classof(const DbgEntity *N) { return N->getDbgEntityID() == DbgVariableKind; } - -private: - template <typename T> T *resolve(TypedDINodeRef<T> Ref) const { - return Ref.resolve(); - } }; //===----------------------------------------------------------------------===// @@ -254,11 +252,6 @@ public: static bool classof(const DbgEntity *N) { return N->getDbgEntityID() == DbgLabelKind; } - -private: - template <typename T> T *resolve(TypedDINodeRef<T> Ref) const { - return Ref.resolve(); - } }; /// Helper used to pair up a symbol and its DWARF compile unit. @@ -558,9 +551,11 @@ class DwarfDebug : public DebugHandlerBase { DenseSet<InlinedEntity> &ProcessedVars); /// Build the location list for all DBG_VALUEs in the - /// function that describe the same variable. - void buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, - const DbgValueHistoryMap::InstrRanges &Ranges); + /// function that describe the same variable. If the resulting + /// list has only one entry that is valid for entire variable's + /// scope return true. + bool buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, + const DbgValueHistoryMap::Entries &Entries); /// Collect variable information from the side table maintained by MF. void collectVariableInfoFromMFTable(DwarfCompileUnit &TheCU, @@ -593,6 +588,9 @@ public: /// Emit all Dwarf sections that should come after the content. void endModule() override; + /// Emits inital debug location directive. + DebugLoc emitInitialLocDirective(const MachineFunction &MF, unsigned CUID); + /// Process beginning of an instruction. void beginInstruction(const MachineInstr *MI) override; @@ -604,6 +602,19 @@ public: void addDwarfTypeUnitType(DwarfCompileUnit &CU, StringRef Identifier, DIE &Die, const DICompositeType *CTy); + friend class NonTypeUnitContext; + class NonTypeUnitContext { + DwarfDebug *DD; + decltype(DwarfDebug::TypeUnitsUnderConstruction) TypeUnitsUnderConstruction; + friend class DwarfDebug; + NonTypeUnitContext(DwarfDebug *DD); + public: + NonTypeUnitContext(NonTypeUnitContext&&) = default; + ~NonTypeUnitContext(); + }; + + NonTypeUnitContext enterNonTypeUnitContext(); + /// Add a label so that arange data can be generated for it. void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); } @@ -680,15 +691,12 @@ public: /// Emit an entry for the debug loc section. This can be used to /// handle an entry that's going to be emitted into the debug loc section. void emitDebugLocEntry(ByteStreamer &Streamer, - const DebugLocStream::Entry &Entry); + const DebugLocStream::Entry &Entry, + const DwarfCompileUnit *CU); /// Emit the location for a debug loc entry, including the size header. - void emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry); - - /// Find the MDNode for the given reference. - template <typename T> T *resolve(TypedDINodeRef<T> Ref) const { - return Ref.resolve(); - } + void emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry, + const DwarfCompileUnit *CU); void addSubprogramNames(const DICompileUnit &CU, const DISubprogram *SP, DIE &Die); @@ -728,6 +736,10 @@ public: void addSectionLabel(const MCSymbol *Sym); const MCSymbol *getSectionLabel(const MCSection *S); + + static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT, + const DbgValueLoc &Value, + DwarfExpression &DwarfExpr); }; } // end namespace llvm diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h index b57ea8fc6322..24bbf58b91ec 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/lib/CodeGen/AsmPrinter/DwarfException.h @@ -1,9 +1,8 @@ //===-- DwarfException.h - Dwarf Exception Framework -----------*- C++ -*--===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index 19c350afbf17..2858afaa1cf1 100644 --- a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -1,9 +1,8 @@ //===- llvm/CodeGen/DwarfExpression.cpp - Dwarf Debug Framework -----------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -12,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "DwarfExpression.h" +#include "DwarfCompileUnit.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/BinaryFormat/Dwarf.h" @@ -40,7 +40,7 @@ void DwarfExpression::emitConstu(uint64_t Value) { void DwarfExpression::addReg(int DwarfReg, const char *Comment) { assert(DwarfReg >= 0 && "invalid negative dwarf register number"); - assert((LocationKind == Unknown || LocationKind == Register) && + assert((isUnknownLocation() || isRegisterLocation()) && "location description already locked down"); LocationKind = Register; if (DwarfReg < 32) { @@ -53,7 +53,7 @@ void DwarfExpression::addReg(int DwarfReg, const char *Comment) { void DwarfExpression::addBReg(int DwarfReg, int Offset) { assert(DwarfReg >= 0 && "invalid negative dwarf register number"); - assert(LocationKind != Register && "location description already locked down"); + assert(!isRegisterLocation() && "location description already locked down"); if (DwarfReg < 32) { emitOp(dwarf::DW_OP_breg0 + DwarfReg); } else { @@ -184,20 +184,20 @@ void DwarfExpression::addStackValue() { } void DwarfExpression::addSignedConstant(int64_t Value) { - assert(LocationKind == Implicit || LocationKind == Unknown); + assert(isImplicitLocation() || isUnknownLocation()); LocationKind = Implicit; emitOp(dwarf::DW_OP_consts); emitSigned(Value); } void DwarfExpression::addUnsignedConstant(uint64_t Value) { - assert(LocationKind == Implicit || LocationKind == Unknown); + assert(isImplicitLocation() || isUnknownLocation()); LocationKind = Implicit; emitConstu(Value); } void DwarfExpression::addUnsignedConstant(const APInt &Value) { - assert(LocationKind == Implicit || LocationKind == Unknown); + assert(isImplicitLocation() || isUnknownLocation()); LocationKind = Implicit; unsigned Size = Value.getBitWidth(); @@ -242,12 +242,16 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI, } // Handle simple register locations. - if (LocationKind != Memory && !HasComplexExpression) { + if (!isMemoryLocation() && !HasComplexExpression) { for (auto &Reg : DwarfRegs) { if (Reg.DwarfRegNo >= 0) addReg(Reg.DwarfRegNo, Reg.Comment); addOpPiece(Reg.Size); } + + if (isEntryValue() && DwarfVersion >= 4) + emitOp(dwarf::DW_OP_stack_value); + DwarfRegs.clear(); return true; } @@ -296,6 +300,19 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI, return true; } +void DwarfExpression::addEntryValueExpression(DIExpressionCursor &ExprCursor) { + auto Op = ExprCursor.take(); + assert(Op && Op->getOp() == dwarf::DW_OP_entry_value); + assert(!isMemoryLocation() && + "We don't support entry values of memory locations yet"); + + if (DwarfVersion >= 5) + emitOp(dwarf::DW_OP_entry_value); + else + emitOp(dwarf::DW_OP_GNU_entry_value); + emitUnsigned(Op->getArg(0)); +} + /// Assuming a well-formed expression, match "DW_OP_deref* DW_OP_LLVM_fragment?". static bool isMemoryLocation(DIExpressionCursor ExprCursor) { while (ExprCursor) { @@ -319,6 +336,8 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, if (SubRegisterSizeInBits && N && (N->getOp() != dwarf::DW_OP_LLVM_fragment)) maskSubRegister(); + Optional<DIExpression::ExprOperand> PrevConvertOp = None; + while (ExprCursor) { auto Op = ExprCursor.take(); switch (Op->getOp()) { @@ -341,7 +360,7 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, SizeInBits = std::min<unsigned>(SizeInBits, SubRegisterSizeInBits); // Emit a DW_OP_stack_value for implicit location descriptions. - if (LocationKind == Implicit) + if (isImplicitLocation()) addStackValue(); // Emit the DW_OP_piece. @@ -352,7 +371,7 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, return; } case dwarf::DW_OP_plus_uconst: - assert(LocationKind != Register); + assert(!isRegisterLocation()); emitOp(dwarf::DW_OP_plus_uconst); emitUnsigned(Op->getArg(0)); break; @@ -373,8 +392,8 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, emitOp(Op->getOp()); break; case dwarf::DW_OP_deref: - assert(LocationKind != Register); - if (LocationKind != Memory && ::isMemoryLocation(ExprCursor)) + assert(!isRegisterLocation()); + if (!isMemoryLocation() && ::isMemoryLocation(ExprCursor)) // Turning this into a memory location description makes the deref // implicit. LocationKind = Memory; @@ -382,26 +401,69 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, emitOp(dwarf::DW_OP_deref); break; case dwarf::DW_OP_constu: - assert(LocationKind != Register); + assert(!isRegisterLocation()); emitConstu(Op->getArg(0)); break; + case dwarf::DW_OP_LLVM_convert: { + unsigned BitSize = Op->getArg(0); + dwarf::TypeKind Encoding = static_cast<dwarf::TypeKind>(Op->getArg(1)); + if (DwarfVersion >= 5) { + emitOp(dwarf::DW_OP_convert); + // Reuse the base_type if we already have one in this CU otherwise we + // create a new one. + unsigned I = 0, E = CU.ExprRefedBaseTypes.size(); + for (; I != E; ++I) + if (CU.ExprRefedBaseTypes[I].BitSize == BitSize && + CU.ExprRefedBaseTypes[I].Encoding == Encoding) + break; + + if (I == E) + CU.ExprRefedBaseTypes.emplace_back(BitSize, Encoding); + + // If targeting a location-list; simply emit the index into the raw + // byte stream as ULEB128, DwarfDebug::emitDebugLocEntry has been + // fitted with means to extract it later. + // If targeting a inlined DW_AT_location; insert a DIEBaseTypeRef + // (containing the index and a resolve mechanism during emit) into the + // DIE value list. + emitBaseTypeRef(I); + } else { + if (PrevConvertOp && PrevConvertOp->getArg(0) < BitSize) { + if (Encoding == dwarf::DW_ATE_signed) + emitLegacySExt(PrevConvertOp->getArg(0)); + else if (Encoding == dwarf::DW_ATE_unsigned) + emitLegacyZExt(PrevConvertOp->getArg(0)); + PrevConvertOp = None; + } else { + PrevConvertOp = Op; + } + } + break; + } case dwarf::DW_OP_stack_value: LocationKind = Implicit; break; case dwarf::DW_OP_swap: - assert(LocationKind != Register); + assert(!isRegisterLocation()); emitOp(dwarf::DW_OP_swap); break; case dwarf::DW_OP_xderef: - assert(LocationKind != Register); + assert(!isRegisterLocation()); emitOp(dwarf::DW_OP_xderef); break; + case dwarf::DW_OP_deref_size: + emitOp(dwarf::DW_OP_deref_size); + emitData1(Op->getArg(0)); + break; + case dwarf::DW_OP_LLVM_tag_offset: + TagOffset = Op->getArg(0); + break; default: llvm_unreachable("unhandled opcode found in expression"); } } - if (LocationKind == Implicit) + if (isImplicitLocation()) // Turn this into an implicit location description. addStackValue(); } @@ -437,3 +499,25 @@ void DwarfExpression::addFragmentOffset(const DIExpression *Expr) { addOpPiece(FragmentOffset - OffsetInBits); OffsetInBits = FragmentOffset; } + +void DwarfExpression::emitLegacySExt(unsigned FromBits) { + // (((X >> (FromBits - 1)) * (~0)) << FromBits) | X + emitOp(dwarf::DW_OP_dup); + emitOp(dwarf::DW_OP_constu); + emitUnsigned(FromBits - 1); + emitOp(dwarf::DW_OP_shr); + emitOp(dwarf::DW_OP_lit0); + emitOp(dwarf::DW_OP_not); + emitOp(dwarf::DW_OP_mul); + emitOp(dwarf::DW_OP_constu); + emitUnsigned(FromBits); + emitOp(dwarf::DW_OP_shl); + emitOp(dwarf::DW_OP_or); +} + +void DwarfExpression::emitLegacyZExt(unsigned FromBits) { + // (X & (1 << FromBits - 1)) + emitOp(dwarf::DW_OP_constu); + emitUnsigned((1ULL << FromBits) - 1); + emitOp(dwarf::DW_OP_and); +} diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.h b/lib/CodeGen/AsmPrinter/DwarfExpression.h index 91568ba6d107..ec2ef6e575f7 100644 --- a/lib/CodeGen/AsmPrinter/DwarfExpression.h +++ b/lib/CodeGen/AsmPrinter/DwarfExpression.h @@ -1,9 +1,8 @@ //===- llvm/CodeGen/DwarfExpression.h - Dwarf Compile Unit ------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -28,7 +27,7 @@ namespace llvm { class AsmPrinter; class APInt; class ByteStreamer; -class DwarfUnit; +class DwarfCompileUnit; class DIELoc; class TargetRegisterInfo; @@ -105,23 +104,56 @@ protected: const char *Comment; }; + DwarfCompileUnit &CU; + /// The register location, if any. SmallVector<Register, 2> DwarfRegs; /// Current Fragment Offset in Bits. uint64_t OffsetInBits = 0; - unsigned DwarfVersion; /// Sometimes we need to add a DW_OP_bit_piece to describe a subregister. - unsigned SubRegisterSizeInBits = 0; - unsigned SubRegisterOffsetInBits = 0; + unsigned SubRegisterSizeInBits : 16; + unsigned SubRegisterOffsetInBits : 16; /// The kind of location description being produced. - enum { Unknown = 0, Register, Memory, Implicit } LocationKind = Unknown; + enum { Unknown = 0, Register, Memory, Implicit }; + + /// The flags of location description being produced. + enum { EntryValue = 1 }; + + unsigned LocationKind : 3; + unsigned LocationFlags : 2; + unsigned DwarfVersion : 4; + +public: + bool isUnknownLocation() const { + return LocationKind == Unknown; + } + + bool isMemoryLocation() const { + return LocationKind == Memory; + } + + bool isRegisterLocation() const { + return LocationKind == Register; + } + + bool isImplicitLocation() const { + return LocationKind == Implicit; + } + + bool isEntryValue() const { + return LocationFlags & EntryValue; + } + Optional<uint8_t> TagOffset; + +protected: /// Push a DW_OP_piece / DW_OP_bit_piece for emitting later, if one is needed /// to represent a subregister. void setSubRegisterPiece(unsigned SizeInBits, unsigned OffsetInBits) { + assert(SizeInBits < 65536 && OffsetInBits < 65536); SubRegisterSizeInBits = SizeInBits; SubRegisterOffsetInBits = OffsetInBits; } @@ -138,6 +170,10 @@ protected: /// Emit a raw unsigned value. virtual void emitUnsigned(uint64_t Value) = 0; + virtual void emitData1(uint8_t Value) = 0; + + virtual void emitBaseTypeRef(uint64_t Idx) = 0; + /// Emit a normalized unsigned constant. void emitConstu(uint64_t Value); @@ -200,7 +236,10 @@ protected: ~DwarfExpression() = default; public: - DwarfExpression(unsigned DwarfVersion) : DwarfVersion(DwarfVersion) {} + DwarfExpression(unsigned DwarfVersion, DwarfCompileUnit &CU) + : CU(CU), SubRegisterSizeInBits(0), SubRegisterOffsetInBits(0), + LocationKind(Unknown), LocationFlags(Unknown), + DwarfVersion(DwarfVersion) {} /// This needs to be called last to commit any pending changes. void finalize(); @@ -214,15 +253,17 @@ public: /// Emit an unsigned constant. void addUnsignedConstant(const APInt &Value); - bool isMemoryLocation() const { return LocationKind == Memory; } - bool isUnknownLocation() const { return LocationKind == Unknown; } - /// Lock this down to become a memory location description. void setMemoryLocationKind() { - assert(LocationKind == Unknown); + assert(isUnknownLocation()); LocationKind = Memory; } + /// Lock this down to become an entry value location. + void setEntryValueFlag() { + LocationFlags |= EntryValue; + } + /// Emit a machine register location. As an optimization this may also consume /// the prefix of a DwarfExpression if a more efficient representation for /// combining the register location and the first operation exists. @@ -237,6 +278,9 @@ public: DIExpressionCursor &Expr, unsigned MachineReg, unsigned FragmentOffsetInBits = 0); + /// Emit entry value dwarf operation. + void addEntryValueExpression(DIExpressionCursor &ExprCursor); + /// Emit all remaining operations in the DIExpressionCursor. /// /// \param FragmentOffsetInBits If this is one fragment out of multiple @@ -248,6 +292,9 @@ public: /// If applicable, emit an empty DW_OP_piece / DW_OP_bit_piece to advance to /// the fragment described by \c Expr. void addFragmentOffset(const DIExpression *Expr); + + void emitLegacySExt(unsigned FromBits); + void emitLegacyZExt(unsigned FromBits); }; /// DwarfExpression implementation for .debug_loc entries. @@ -257,27 +304,30 @@ class DebugLocDwarfExpression final : public DwarfExpression { void emitOp(uint8_t Op, const char *Comment = nullptr) override; void emitSigned(int64_t Value) override; void emitUnsigned(uint64_t Value) override; + void emitData1(uint8_t Value) override; + void emitBaseTypeRef(uint64_t Idx) override; bool isFrameRegister(const TargetRegisterInfo &TRI, unsigned MachineReg) override; public: - DebugLocDwarfExpression(unsigned DwarfVersion, ByteStreamer &BS) - : DwarfExpression(DwarfVersion), BS(BS) {} + DebugLocDwarfExpression(unsigned DwarfVersion, ByteStreamer &BS, DwarfCompileUnit &CU) + : DwarfExpression(DwarfVersion, CU), BS(BS) {} }; /// DwarfExpression implementation for singular DW_AT_location. class DIEDwarfExpression final : public DwarfExpression { const AsmPrinter &AP; - DwarfUnit &DU; DIELoc &DIE; void emitOp(uint8_t Op, const char *Comment = nullptr) override; void emitSigned(int64_t Value) override; void emitUnsigned(uint64_t Value) override; + void emitData1(uint8_t Value) override; + void emitBaseTypeRef(uint64_t Idx) override; bool isFrameRegister(const TargetRegisterInfo &TRI, unsigned MachineReg) override; public: - DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU, DIELoc &DIE); + DIEDwarfExpression(const AsmPrinter &AP, DwarfCompileUnit &CU, DIELoc &DIE); DIELoc *finalize() { DwarfExpression::finalize(); diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/lib/CodeGen/AsmPrinter/DwarfFile.cpp index 78ccad481411..e3c9095d1343 100644 --- a/lib/CodeGen/AsmPrinter/DwarfFile.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfFile.cpp @@ -1,9 +1,8 @@ //===- llvm/CodeGen/DwarfFile.cpp - Dwarf Debug Framework -----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -44,6 +43,11 @@ void DwarfFile::emitUnit(DwarfUnit *TheU, bool UseOffsets) { if (!S) return; + // Skip CUs that ended up not being needed (split CUs that were abandoned + // because they added no information beyond the non-split CU) + if (llvm::empty(TheU->getUnitDie().values())) + return; + Asm->OutStreamer->SwitchSection(S); TheU->emitHeader(UseOffsets); Asm->emitDwarfDIE(TheU->getUnitDie()); @@ -63,6 +67,11 @@ void DwarfFile::computeSizeAndOffsets() { if (TheU->getCUNode()->isDebugDirectivesOnly()) continue; + // Skip CUs that ended up not being needed (split CUs that were abandoned + // because they added no information beyond the non-split CU) + if (llvm::empty(TheU->getUnitDie().values())) + return; + TheU->setDebugSectionOffset(SecOffset); SecOffset += computeSizeAndOffsetsForUnit(TheU.get()); } diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.h b/lib/CodeGen/AsmPrinter/DwarfFile.h index 51acca8c1e53..244678ce9dc1 100644 --- a/lib/CodeGen/AsmPrinter/DwarfFile.h +++ b/lib/CodeGen/AsmPrinter/DwarfFile.h @@ -1,9 +1,8 @@ //===- llvm/CodeGen/DwarfFile.h - Dwarf Debug Framework ---------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -59,7 +58,6 @@ public: MCSymbol *getSym() const { return RangeSym; } const DwarfCompileUnit &getCU() const { return *CU; } const SmallVectorImpl<RangeSpan> &getRanges() const { return Ranges; } - void addRange(RangeSpan Range) { Ranges.push_back(Range); } }; class DwarfFile { @@ -148,7 +146,7 @@ public: void emitUnits(bool UseOffsets); /// Emit the given unit to its section. - void emitUnit(DwarfUnit *U, bool UseOffsets); + void emitUnit(DwarfUnit *TheU, bool UseOffsets); /// Emit a set of abbreviations to the specific section. void emitAbbrevs(MCSection *); diff --git a/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp index 02016534a774..2a76dcb1b082 100644 --- a/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp @@ -1,9 +1,8 @@ //===- llvm/CodeGen/DwarfStringPool.cpp - Dwarf Debug Framework -----------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/AsmPrinter/DwarfStringPool.h b/lib/CodeGen/AsmPrinter/DwarfStringPool.h index f484540d8d37..c5f5637fdae3 100644 --- a/lib/CodeGen/AsmPrinter/DwarfStringPool.h +++ b/lib/CodeGen/AsmPrinter/DwarfStringPool.h @@ -1,9 +1,8 @@ //===- llvm/CodeGen/DwarfStringPool.h - Dwarf Debug Framework ---*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 80b365f1aa43..991ab94b50ab 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -1,9 +1,8 @@ //===-- llvm/CodeGen/DwarfUnit.cpp - Dwarf Type and Compile Units ---------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -47,21 +46,30 @@ using namespace llvm; #define DEBUG_TYPE "dwarfdebug" -DIEDwarfExpression::DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU, +DIEDwarfExpression::DIEDwarfExpression(const AsmPrinter &AP, + DwarfCompileUnit &CU, DIELoc &DIE) - : DwarfExpression(AP.getDwarfVersion()), AP(AP), DU(DU), + : DwarfExpression(AP.getDwarfVersion(), CU), AP(AP), DIE(DIE) {} void DIEDwarfExpression::emitOp(uint8_t Op, const char* Comment) { - DU.addUInt(DIE, dwarf::DW_FORM_data1, Op); + CU.addUInt(DIE, dwarf::DW_FORM_data1, Op); } void DIEDwarfExpression::emitSigned(int64_t Value) { - DU.addSInt(DIE, dwarf::DW_FORM_sdata, Value); + CU.addSInt(DIE, dwarf::DW_FORM_sdata, Value); } void DIEDwarfExpression::emitUnsigned(uint64_t Value) { - DU.addUInt(DIE, dwarf::DW_FORM_udata, Value); + CU.addUInt(DIE, dwarf::DW_FORM_udata, Value); +} + +void DIEDwarfExpression::emitData1(uint8_t Value) { + CU.addUInt(DIE, dwarf::DW_FORM_data1, Value); +} + +void DIEDwarfExpression::emitBaseTypeRef(uint64_t Idx) { + CU.addBaseTypeRef(DIE, Idx); } bool DIEDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI, @@ -285,21 +293,21 @@ void DwarfUnit::addSectionOffset(DIE &Die, dwarf::Attribute Attribute, addUInt(Die, Attribute, dwarf::DW_FORM_data4, Integer); } -MD5::MD5Result *DwarfUnit::getMD5AsBytes(const DIFile *File) const { +Optional<MD5::MD5Result> DwarfUnit::getMD5AsBytes(const DIFile *File) const { assert(File); if (DD->getDwarfVersion() < 5) - return nullptr; + return None; Optional<DIFile::ChecksumInfo<StringRef>> Checksum = File->getChecksum(); if (!Checksum || Checksum->Kind != DIFile::CSK_MD5) - return nullptr; + return None; // Convert the string checksum to an MD5Result for the streamer. // The verifier validates the checksum so we assume it's okay. // An MD5 checksum is 16 bytes. std::string ChecksumString = fromHex(Checksum->Value); - void *CKMem = Asm->OutStreamer->getContext().allocate(16, 1); - memcpy(CKMem, ChecksumString.data(), 16); - return reinterpret_cast<MD5::MD5Result *>(CKMem); + MD5::MD5Result CKMem; + std::copy(ChecksumString.begin(), ChecksumString.end(), CKMem.Bytes.data()); + return CKMem; } unsigned DwarfTypeUnit::getOrCreateSourceID(const DIFile *File) { @@ -311,7 +319,9 @@ unsigned DwarfTypeUnit::getOrCreateSourceID(const DIFile *File) { addSectionOffset(getUnitDie(), dwarf::DW_AT_stmt_list, 0); } return SplitLineTable->getFile(File->getDirectory(), File->getFilename(), - getMD5AsBytes(File), File->getSource()); + getMD5AsBytes(File), + Asm->OutContext.getDwarfVersion(), + File->getSource()); } void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) { @@ -393,7 +403,6 @@ void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, const DIFile *File) { return; unsigned FileID = getOrCreateSourceID(File); - assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); addUInt(Die, dwarf::DW_AT_decl_line, None, Line); } @@ -462,9 +471,8 @@ static bool isUnsignedDIType(DwarfDebug *DD, const DIType *Ty) { assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type || T == dwarf::DW_TAG_volatile_type || T == dwarf::DW_TAG_restrict_type || T == dwarf::DW_TAG_atomic_type); - DITypeRef Deriv = DTy->getBaseType(); - assert(Deriv && "Expected valid base type"); - return isUnsignedDIType(DD, DD->resolve(Deriv)); + assert(DTy->getBaseType() && "Expected valid base type"); + return isUnsignedDIType(DD, DTy->getBaseType()); } auto *BTy = cast<DIBasicType>(Ty); @@ -523,6 +531,10 @@ void DwarfUnit::addConstantValue(DIE &Die, const MachineOperand &MO, addConstantValue(Die, isUnsignedDIType(DD, Ty), MO.getImm()); } +void DwarfUnit::addConstantValue(DIE &Die, uint64_t Val, const DIType *Ty) { + addConstantValue(Die, isUnsignedDIType(DD, Ty), Val); +} + void DwarfUnit::addConstantValue(DIE &Die, bool Unsigned, uint64_t Val) { // FIXME: This is a bit conservative/simple - it emits negative values always // sign extended to 64 bits rather than minimizing the number of bytes. @@ -603,8 +615,8 @@ DIE *DwarfUnit::getOrCreateContextDIE(const DIScope *Context) { return getDIE(Context); } -DIE *DwarfTypeUnit::createTypeDIE(const DICompositeType *Ty) { - auto *Context = resolve(Ty->getScope()); +DIE *DwarfUnit::createTypeDIE(const DICompositeType *Ty) { + auto *Context = Ty->getScope(); DIE *ContextDIE = getOrCreateContextDIE(Context); if (DIE *TyDIE = getDIE(Ty)) @@ -619,6 +631,37 @@ DIE *DwarfTypeUnit::createTypeDIE(const DICompositeType *Ty) { return &TyDIE; } +DIE *DwarfUnit::createTypeDIE(const DIScope *Context, DIE &ContextDIE, + const DIType *Ty) { + // Create new type. + DIE &TyDIE = createAndAddDIE(Ty->getTag(), ContextDIE, Ty); + + updateAcceleratorTables(Context, Ty, TyDIE); + + if (auto *BT = dyn_cast<DIBasicType>(Ty)) + constructTypeDIE(TyDIE, BT); + else if (auto *STy = dyn_cast<DISubroutineType>(Ty)) + constructTypeDIE(TyDIE, STy); + else if (auto *CTy = dyn_cast<DICompositeType>(Ty)) { + if (DD->generateTypeUnits() && !Ty->isForwardDecl() && + (Ty->getRawName() || CTy->getRawIdentifier())) { + // Skip updating the accelerator tables since this is not the full type. + if (MDString *TypeId = CTy->getRawIdentifier()) + DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy); + else { + auto X = DD->enterNonTypeUnitContext(); + finishNonUnitTypeDIE(TyDIE, CTy); + } + return &TyDIE; + } + constructTypeDIE(TyDIE, CTy); + } else { + constructTypeDIE(TyDIE, cast<DIDerivedType>(Ty)); + } + + return &TyDIE; +} + DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) { if (!TyNode) return nullptr; @@ -627,43 +670,23 @@ DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) { // DW_TAG_restrict_type is not supported in DWARF2 if (Ty->getTag() == dwarf::DW_TAG_restrict_type && DD->getDwarfVersion() <= 2) - return getOrCreateTypeDIE(resolve(cast<DIDerivedType>(Ty)->getBaseType())); + return getOrCreateTypeDIE(cast<DIDerivedType>(Ty)->getBaseType()); // DW_TAG_atomic_type is not supported in DWARF < 5 if (Ty->getTag() == dwarf::DW_TAG_atomic_type && DD->getDwarfVersion() < 5) - return getOrCreateTypeDIE(resolve(cast<DIDerivedType>(Ty)->getBaseType())); + return getOrCreateTypeDIE(cast<DIDerivedType>(Ty)->getBaseType()); // Construct the context before querying for the existence of the DIE in case // such construction creates the DIE. - auto *Context = resolve(Ty->getScope()); + auto *Context = Ty->getScope(); DIE *ContextDIE = getOrCreateContextDIE(Context); assert(ContextDIE); if (DIE *TyDIE = getDIE(Ty)) return TyDIE; - // Create new type. - DIE &TyDIE = createAndAddDIE(Ty->getTag(), *ContextDIE, Ty); - - updateAcceleratorTables(Context, Ty, TyDIE); - - if (auto *BT = dyn_cast<DIBasicType>(Ty)) - constructTypeDIE(TyDIE, BT); - else if (auto *STy = dyn_cast<DISubroutineType>(Ty)) - constructTypeDIE(TyDIE, STy); - else if (auto *CTy = dyn_cast<DICompositeType>(Ty)) { - if (DD->generateTypeUnits() && !Ty->isForwardDecl()) - if (MDString *TypeId = CTy->getRawIdentifier()) { - DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy); - // Skip updating the accelerator tables since this is not the full type. - return &TyDIE; - } - constructTypeDIE(TyDIE, CTy); - } else { - constructTypeDIE(TyDIE, cast<DIDerivedType>(Ty)); - } - - return &TyDIE; + return static_cast<DwarfUnit *>(ContextDIE->getUnit()) + ->createTypeDIE(Context, *ContextDIE, Ty); } void DwarfUnit::updateAcceleratorTables(const DIScope *Context, @@ -679,7 +702,7 @@ void DwarfUnit::updateAcceleratorTables(const DIScope *Context, DD->addAccelType(*CUNode, Ty->getName(), TyDIE, Flags); if (!Context || isa<DICompileUnit>(Context) || isa<DIFile>(Context) || - isa<DINamespace>(Context)) + isa<DINamespace>(Context) || isa<DICommonBlock>(Context)) addGlobalType(Ty, TyDIE, Context); } } @@ -702,8 +725,8 @@ std::string DwarfUnit::getParentContextString(const DIScope *Context) const { SmallVector<const DIScope *, 1> Parents; while (!isa<DICompileUnit>(Context)) { Parents.push_back(Context); - if (Context->getScope()) - Context = resolve(Context->getScope()); + if (const DIScope *S = Context->getScope()) + Context = S; else // Structure, etc types will have a NULL context if they're at the top // level. @@ -754,7 +777,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) { uint16_t Tag = Buffer.getTag(); // Map to main type, void will not have a type. - const DIType *FromTy = resolve(DTy->getBaseType()); + const DIType *FromTy = DTy->getBaseType(); if (FromTy) addType(Buffer, FromTy); @@ -770,24 +793,23 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) { addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size); if (Tag == dwarf::DW_TAG_ptr_to_member_type) - addDIEEntry( - Buffer, dwarf::DW_AT_containing_type, - *getOrCreateTypeDIE(resolve(cast<DIDerivedType>(DTy)->getClassType()))); + addDIEEntry(Buffer, dwarf::DW_AT_containing_type, + *getOrCreateTypeDIE(cast<DIDerivedType>(DTy)->getClassType())); // Add source line info if available and TyDesc is not a forward declaration. if (!DTy->isForwardDecl()) addSourceLine(Buffer, DTy); - // If DWARF address space value is other than None, add it for pointer and - // reference types as DW_AT_address_class. - if (DTy->getDWARFAddressSpace() && (Tag == dwarf::DW_TAG_pointer_type || - Tag == dwarf::DW_TAG_reference_type)) + // If DWARF address space value is other than None, add it. The IR + // verifier checks that DWARF address space only exists for pointer + // or reference types. + if (DTy->getDWARFAddressSpace()) addUInt(Buffer, dwarf::DW_AT_address_class, dwarf::DW_FORM_data4, DTy->getDWARFAddressSpace().getValue()); } void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DITypeRefArray Args) { for (unsigned i = 1, N = Args.size(); i < N; ++i) { - const DIType *Ty = resolve(Args[i]); + const DIType *Ty = Args[i]; if (!Ty) { assert(i == N-1 && "Unspecified parameter must be the last argument"); createAndAddDIE(dwarf::DW_TAG_unspecified_parameters, Buffer); @@ -804,7 +826,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy) { // Add return type. A void return won't have a type. auto Elements = cast<DISubroutineType>(CTy)->getTypeArray(); if (Elements.size()) - if (auto RTy = resolve(Elements[0])) + if (auto RTy = Elements[0]) addType(Buffer, RTy); bool isPrototyped = true; @@ -875,7 +897,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) { else if (auto *DDTy = dyn_cast<DIDerivedType>(Element)) { if (DDTy->getTag() == dwarf::DW_TAG_friend) { DIE &ElemDie = createAndAddDIE(dwarf::DW_TAG_friend, Buffer); - addType(ElemDie, resolve(DDTy->getBaseType()), dwarf::DW_AT_friend); + addType(ElemDie, DDTy->getBaseType(), dwarf::DW_AT_friend); } else if (DDTy->isStaticMember()) { getOrCreateStaticMemberDIE(DDTy); } else if (Tag == dwarf::DW_TAG_variant_part) { @@ -884,7 +906,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) { DIE &Variant = createAndAddDIE(dwarf::DW_TAG_variant, Buffer); if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DDTy->getDiscriminantValue())) { - if (isUnsignedDIType(DD, resolve(Discriminator->getBaseType()))) + if (isUnsignedDIType(DD, Discriminator->getBaseType())) addUInt(Variant, dwarf::DW_AT_discr_value, None, CI->getZExtValue()); else addSInt(Variant, dwarf::DW_AT_discr_value, None, CI->getSExtValue()); @@ -898,7 +920,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) { StringRef PropertyName = Property->getName(); addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName); if (Property->getType()) - addType(ElemDie, resolve(Property->getType())); + addType(ElemDie, Property->getType()); addSourceLine(ElemDie, Property); StringRef GetterName = Property->getGetterName(); if (!GetterName.empty()) @@ -924,7 +946,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) { // inside C++ composite types to point to the base class with the vtable. // Rust uses DW_AT_containing_type to link a vtable to the type // for which it was created. - if (auto *ContainingType = resolve(CTy->getVTableHolder())) + if (auto *ContainingType = CTy->getVTableHolder()) addDIEEntry(Buffer, dwarf::DW_AT_containing_type, *getOrCreateTypeDIE(ContainingType)); @@ -994,7 +1016,7 @@ void DwarfUnit::constructTemplateTypeParameterDIE( createAndAddDIE(dwarf::DW_TAG_template_type_parameter, Buffer); // Add the type if it exists, it could be void and therefore no type. if (TP->getType()) - addType(ParamDIE, resolve(TP->getType())); + addType(ParamDIE, TP->getType()); if (!TP->getName().empty()) addString(ParamDIE, dwarf::DW_AT_name, TP->getName()); } @@ -1006,12 +1028,12 @@ void DwarfUnit::constructTemplateValueParameterDIE( // Add the type if there is one, template template and template parameter // packs will not have a type. if (VP->getTag() == dwarf::DW_TAG_template_value_parameter) - addType(ParamDIE, resolve(VP->getType())); + addType(ParamDIE, VP->getType()); if (!VP->getName().empty()) addString(ParamDIE, dwarf::DW_AT_name, VP->getName()); if (Metadata *Val = VP->getValue()) { if (ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Val)) - addConstantValue(ParamDIE, CI, resolve(VP->getType())); + addConstantValue(ParamDIE, CI, VP->getType()); else if (GlobalValue *GV = mdconst::dyn_extract<GlobalValue>(Val)) { // We cannot describe the location of dllimport'd entities: the // computation of their address requires loads from the IAT. @@ -1085,7 +1107,7 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(const DISubprogram *SP, bool Minimal) { // such construction creates the DIE (as is the case for member function // declarations). DIE *ContextDIE = - Minimal ? &getUnitDie() : getOrCreateContextDIE(resolve(SP->getScope())); + Minimal ? &getUnitDie() : getOrCreateContextDIE(SP->getScope()); if (DIE *SPDie = getDIE(SP)) return SPDie; @@ -1107,7 +1129,8 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(const DISubprogram *SP, bool Minimal) { if (SP->isDefinition()) return &SPDie; - applySubprogramAttributes(SP, SPDie); + static_cast<DwarfUnit *>(SPDie.getUnit()) + ->applySubprogramAttributes(SP, SPDie); return &SPDie; } @@ -1197,7 +1220,7 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie, // Add a return type. If this is a type like a C/C++ void type we don't add a // return type. if (Args.size()) - if (auto Ty = resolve(Args[0])) + if (auto Ty = Args[0]) addType(SPDie, Ty); unsigned VK = SP->getVirtuality(); @@ -1209,8 +1232,7 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie, addUInt(*Block, dwarf::DW_FORM_udata, SP->getVirtualIndex()); addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, Block); } - ContainingTypeMap.insert( - std::make_pair(&SPDie, resolve(SP->getContainingType()))); + ContainingTypeMap.insert(std::make_pair(&SPDie, SP->getContainingType())); } if (!SP->isDefinition()) { @@ -1261,6 +1283,12 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie, if (SP->isMainSubprogram()) addFlag(SPDie, dwarf::DW_AT_main_subprogram); + if (SP->isPure()) + addFlag(SPDie, dwarf::DW_AT_pure); + if (SP->isElemental()) + addFlag(SPDie, dwarf::DW_AT_elemental); + if (SP->isRecursive()) + addFlag(SPDie, dwarf::DW_AT_recursive); } void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR, @@ -1310,7 +1338,7 @@ static bool hasVectorBeenPadded(const DICompositeType *CTy) { const uint64_t ActualSize = CTy->getSizeInBits(); // Obtain the size of each element in the vector. - DIType *BaseTy = CTy->getBaseType().resolve(); + DIType *BaseTy = CTy->getBaseType(); assert(BaseTy && "Unknown vector element type."); const uint64_t ElementSize = BaseTy->getSizeInBits(); @@ -1338,7 +1366,7 @@ void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy) { } // Emit the element type. - addType(Buffer, resolve(CTy->getBaseType())); + addType(Buffer, CTy->getBaseType()); // Get an anonymous type for index type. // FIXME: This type should be passed down from the front end @@ -1356,7 +1384,7 @@ void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy) { } void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) { - const DIType *DTy = resolve(CTy->getBaseType()); + const DIType *DTy = CTy->getBaseType(); bool IsUnsigned = DTy && isUnsignedDIType(DD, DTy); if (DTy) { if (DD->getDwarfVersion() >= 3) @@ -1365,6 +1393,9 @@ void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) { addFlag(Buffer, dwarf::DW_AT_enum_class); } + auto *Context = CTy->getScope(); + bool IndexEnumerators = !Context || isa<DICompileUnit>(Context) || isa<DIFile>(Context) || + isa<DINamespace>(Context) || isa<DICommonBlock>(Context); DINodeArray Elements = CTy->getElements(); // Add enumerators to enumeration type. @@ -1376,6 +1407,8 @@ void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) { addString(Enumerator, dwarf::DW_AT_name, Name); auto Value = static_cast<uint64_t>(Enum->getValue()); addConstantValue(Enumerator, IsUnsigned, Value); + if (IndexEnumerators) + addGlobalName(Name, Enumerator, Context); } } } @@ -1400,7 +1433,7 @@ DIE &DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) { if (!Name.empty()) addString(MemberDie, dwarf::DW_AT_name, Name); - if (DIType *Resolved = resolve(DT->getBaseType())) + if (DIType *Resolved = DT->getBaseType()) addType(MemberDie, Resolved); addSourceLine(MemberDie, DT); @@ -1509,7 +1542,7 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) { // Construct the context before querying for the existence of the DIE in case // such construction creates the DIE. - DIE *ContextDIE = getOrCreateContextDIE(resolve(DT->getScope())); + DIE *ContextDIE = getOrCreateContextDIE(DT->getScope()); assert(dwarf::isType(ContextDIE->getTag()) && "Static member should belong to a type."); @@ -1518,7 +1551,7 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) { DIE &StaticMemberDIE = createAndAddDIE(DT->getTag(), *ContextDIE, DT); - const DIType *Ty = resolve(DT->getBaseType()); + const DIType *Ty = DT->getBaseType(); addString(StaticMemberDIE, dwarf::DW_AT_name, DT->getName()); addType(StaticMemberDIE, Ty); @@ -1671,3 +1704,11 @@ void DwarfUnit::addLoclistsBase() { DU->getLoclistsTableBaseSym(), TLOF.getDwarfLoclistsSection()->getBeginSymbol()); } + +void DwarfTypeUnit::finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) { + addFlag(D, dwarf::DW_AT_declaration); + StringRef Name = CTy->getName(); + if (!Name.empty()) + addString(D, dwarf::DW_AT_name, Name); + getCU().createTypeDIE(CTy); +} diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h index a59ebb7c1465..56c934a35ae8 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -1,9 +1,8 @@ //===-- llvm/CodeGen/DwarfUnit.h - Dwarf Compile Unit ---*- C++ -*--===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -199,6 +198,7 @@ public: void addConstantValue(DIE &Die, const ConstantInt *CI, const DIType *Ty); void addConstantValue(DIE &Die, const APInt &Val, const DIType *Ty); void addConstantValue(DIE &Die, const APInt &Val, bool Unsigned); + void addConstantValue(DIE &Die, uint64_t Val, const DIType *Ty); void addConstantValue(DIE &Die, bool Unsigned, uint64_t Val); /// Add constant value entry in variable DIE. @@ -237,6 +237,9 @@ public: void applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie, bool SkipSPAttributes = false); + /// Creates type DIE with specific context. + DIE *createTypeDIE(const DIScope *Context, DIE &ContextDIE, const DIType *Ty); + /// Find existing DIE or create new DIE for the given type. DIE *getOrCreateTypeDIE(const MDNode *TyNode); @@ -294,7 +297,10 @@ public: /// If the \p File has an MD5 checksum, return it as an MD5Result /// allocated in the MCContext. - MD5::MD5Result *getMD5AsBytes(const DIFile *File) const; + Optional<MD5::MD5Result> getMD5AsBytes(const DIFile *File) const; + + /// Get context owner's DIE. + DIE *createTypeDIE(const DICompositeType *Ty); protected: ~DwarfUnit(); @@ -306,17 +312,6 @@ protected: /// create a new ID and insert it in the line table. virtual unsigned getOrCreateSourceID(const DIFile *File) = 0; - /// Look in the DwarfDebug map for the MDNode that corresponds to the - /// reference. - template <typename T> T *resolve(TypedDINodeRef<T> Ref) const { - return Ref.resolve(); - } - - /// If this is a named finished type then include it in the list of types for - /// the accelerator tables. - void updateAcceleratorTables(const DIScope *Context, const DIType *Ty, - const DIE &TyDIE); - /// Emit the common part of the header for this unit. void emitCommonHeader(bool UseOffsets, dwarf::UnitType UT); @@ -344,6 +339,13 @@ private: /// Set D as anonymous type for index which can be reused later. void setIndexTyDie(DIE *D) { IndexTyDie = D; } + virtual void finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) = 0; + + /// If this is a named finished type then include it in the list of types for + /// the accelerator tables. + void updateAcceleratorTables(const DIScope *Context, const DIType *Ty, + const DIE &TyDIE); + virtual bool isDwoUnit() const = 0; const MCSymbol *getCrossSectionRelativeBaseAddress() const override; }; @@ -356,6 +358,7 @@ class DwarfTypeUnit final : public DwarfUnit { bool UsedLineTable = false; unsigned getOrCreateSourceID(const DIFile *File) override; + void finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) override; bool isDwoUnit() const override; public: @@ -365,9 +368,6 @@ public: void setTypeSignature(uint64_t Signature) { TypeSignature = Signature; } void setType(const DIE *Ty) { this->Ty = Ty; } - /// Get context owner's DIE. - DIE *createTypeDIE(const DICompositeType *Ty); - /// Emit the header for this unit, not including the initial length field. void emitHeader(bool UseOffsets) override; unsigned getHeaderSize() const override { diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp index 7599121de2b0..99e3687b36b8 100644 --- a/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -1,9 +1,8 @@ //===- CodeGen/AsmPrinter/EHStreamer.cpp - Exception Directive Streamer ---===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -379,7 +378,8 @@ MCSymbol *EHStreamer::emitExceptionTable() { bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj; bool IsWasm = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Wasm; unsigned CallSiteEncoding = - IsSJLJ ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_uleb128; + IsSJLJ ? static_cast<unsigned>(dwarf::DW_EH_PE_udata4) : + Asm->getObjFileLowering().getCallSiteEncoding(); bool HaveTTData = !TypeInfos.empty() || !FilterIds.empty(); // Type infos. @@ -524,24 +524,24 @@ MCSymbol *EHStreamer::emitExceptionTable() { // Offset of the call site relative to the start of the procedure. if (VerboseAsm) Asm->OutStreamer->AddComment(">> Call Site " + Twine(++Entry) + " <<"); - Asm->EmitLabelDifferenceAsULEB128(BeginLabel, EHFuncBeginSym); + Asm->EmitCallSiteOffset(BeginLabel, EHFuncBeginSym, CallSiteEncoding); if (VerboseAsm) Asm->OutStreamer->AddComment(Twine(" Call between ") + BeginLabel->getName() + " and " + EndLabel->getName()); - Asm->EmitLabelDifferenceAsULEB128(EndLabel, BeginLabel); + Asm->EmitCallSiteOffset(EndLabel, BeginLabel, CallSiteEncoding); // Offset of the landing pad relative to the start of the procedure. if (!S.LPad) { if (VerboseAsm) Asm->OutStreamer->AddComment(" has no landing pad"); - Asm->EmitULEB128(0); + Asm->EmitCallSiteValue(0, CallSiteEncoding); } else { if (VerboseAsm) Asm->OutStreamer->AddComment(Twine(" jumps to ") + S.LPad->LandingPadLabel->getName()); - Asm->EmitLabelDifferenceAsULEB128(S.LPad->LandingPadLabel, - EHFuncBeginSym); + Asm->EmitCallSiteOffset(S.LPad->LandingPadLabel, EHFuncBeginSym, + CallSiteEncoding); } // Offset of the first associated action record, relative to the start of diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.h b/lib/CodeGen/AsmPrinter/EHStreamer.h index ce912d032c6d..e62cf17a05d4 100644 --- a/lib/CodeGen/AsmPrinter/EHStreamer.h +++ b/lib/CodeGen/AsmPrinter/EHStreamer.h @@ -1,9 +1,8 @@ //===- EHStreamer.h - Exception Handling Directive Streamer -----*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp index 34677ecc9e69..39392b79e960 100644 --- a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp @@ -1,9 +1,8 @@ //===- ErlangGCPrinter.cpp - Erlang/OTP frametable emitter ----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index 3479a00def23..3145cc90dc73 100644 --- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -1,9 +1,8 @@ //===- OcamlGCPrinter.cpp - Ocaml frametable emitter ----------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/AsmPrinter/WasmException.cpp b/lib/CodeGen/AsmPrinter/WasmException.cpp index 527e5ae50146..444b0ed17b6d 100644 --- a/lib/CodeGen/AsmPrinter/WasmException.cpp +++ b/lib/CodeGen/AsmPrinter/WasmException.cpp @@ -1,9 +1,8 @@ //===-- CodeGen/AsmPrinter/WasmException.cpp - Wasm Exception Impl --------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -19,10 +18,10 @@ using namespace llvm; void WasmException::endModule() { - // This is the symbol used in 'throw' and 'if_except' instruction to denote + // This is the symbol used in 'throw' and 'br_on_exn' instruction to denote // this is a C++ exception. This symbol has to be emitted somewhere once in // the module. Check if the symbol has already been created, i.e., we have at - // least one 'throw' or 'if_except' instruction in the module, and emit the + // least one 'throw' or 'br_on_exn' instruction in the module, and emit the // symbol only if so. SmallString<60> NameStr; Mangler::getNameWithPrefix(NameStr, "__cpp_exception", Asm->getDataLayout()); diff --git a/lib/CodeGen/AsmPrinter/WasmException.h b/lib/CodeGen/AsmPrinter/WasmException.h index cbdb42457cf8..1893b6b2df43 100644 --- a/lib/CodeGen/AsmPrinter/WasmException.h +++ b/lib/CodeGen/AsmPrinter/WasmException.h @@ -1,9 +1,8 @@ //===-- WasmException.h - Wasm Exception Framework -------------*- C++ -*--===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/AsmPrinter/WinCFGuard.cpp b/lib/CodeGen/AsmPrinter/WinCFGuard.cpp index 18d37caf57ee..290be81c6baa 100644 --- a/lib/CodeGen/AsmPrinter/WinCFGuard.cpp +++ b/lib/CodeGen/AsmPrinter/WinCFGuard.cpp @@ -1,9 +1,8 @@ //===-- CodeGen/AsmPrinter/WinCFGuard.cpp - Control Flow Guard Impl ------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/AsmPrinter/WinCFGuard.h b/lib/CodeGen/AsmPrinter/WinCFGuard.h index 28f119e35966..def0a59ab007 100644 --- a/lib/CodeGen/AsmPrinter/WinCFGuard.h +++ b/lib/CodeGen/AsmPrinter/WinCFGuard.h @@ -1,9 +1,8 @@ //===-- WinCFGuard.h - Windows Control Flow Guard Handling ----*- C++ -*--===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/AsmPrinter/WinException.cpp b/lib/CodeGen/AsmPrinter/WinException.cpp index cf8e8c69bc2a..155e91ce61a1 100644 --- a/lib/CodeGen/AsmPrinter/WinException.cpp +++ b/lib/CodeGen/AsmPrinter/WinException.cpp @@ -1,9 +1,8 @@ //===-- CodeGen/AsmPrinter/WinException.cpp - Dwarf Exception Impl ------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -110,6 +109,12 @@ void WinException::beginFunction(const MachineFunction *MF) { beginFunclet(MF->front(), Asm->CurrentFnSym); } +void WinException::markFunctionEnd() { + if (isAArch64 && CurrentFuncletEntry && + (shouldEmitMoves || shouldEmitPersonality)) + Asm->OutStreamer->EmitWinCFIFuncletOrFuncEnd(); +} + /// endFunction - Gather and emit post-function exception information. /// void WinException::endFunction(const MachineFunction *MF) { @@ -129,7 +134,7 @@ void WinException::endFunction(const MachineFunction *MF) { NonConstMF->tidyLandingPads(); } - endFunclet(); + endFuncletImpl(); // endFunclet will emit the necessary .xdata tables for x64 SEH. if (Per == EHPersonality::MSVC_Win64SEH && MF->hasEHFunclets()) @@ -232,6 +237,15 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB, } void WinException::endFunclet() { + if (isAArch64 && CurrentFuncletEntry && + (shouldEmitMoves || shouldEmitPersonality)) { + Asm->OutStreamer->SwitchSection(CurrentFuncletTextSection); + Asm->OutStreamer->EmitWinCFIFuncletOrFuncEnd(); + } + endFuncletImpl(); +} + +void WinException::endFuncletImpl() { // No funclet to process? Great, we have nothing to do. if (!CurrentFuncletEntry) return; @@ -247,8 +261,6 @@ void WinException::endFunclet() { // to EmitWinEHHandlerData below can calculate the size of the funclet or // function. if (isAArch64) { - Asm->OutStreamer->SwitchSection(CurrentFuncletTextSection); - Asm->OutStreamer->EmitWinCFIFuncletOrFuncEnd(); MCSection *XData = Asm->OutStreamer->getAssociatedXDataSection( Asm->OutStreamer->getCurrentSectionOnly()); Asm->OutStreamer->SwitchSection(XData); @@ -545,15 +557,17 @@ void WinException::emitCSpecificHandlerTable(const MachineFunction *MF) { OS.AddComment(Comment); }; - // Emit a label assignment with the SEH frame offset so we can use it for - // llvm.eh.recoverfp. - StringRef FLinkageName = - GlobalValue::dropLLVMManglingEscape(MF->getFunction().getName()); - MCSymbol *ParentFrameOffset = - Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName); - const MCExpr *MCOffset = - MCConstantExpr::create(FuncInfo.SEHSetFrameOffset, Ctx); - Asm->OutStreamer->EmitAssignment(ParentFrameOffset, MCOffset); + if (!isAArch64) { + // Emit a label assignment with the SEH frame offset so we can use it for + // llvm.eh.recoverfp. + StringRef FLinkageName = + GlobalValue::dropLLVMManglingEscape(MF->getFunction().getName()); + MCSymbol *ParentFrameOffset = + Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName); + const MCExpr *MCOffset = + MCConstantExpr::create(FuncInfo.SEHSetFrameOffset, Ctx); + Asm->OutStreamer->EmitAssignment(ParentFrameOffset, MCOffset); + } // Use the assembler to compute the number of table entries through label // difference and division. @@ -936,8 +950,7 @@ void WinException::emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo, int FI = FuncInfo.EHRegNodeFrameIndex; if (FI != INT_MAX) { const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering(); - unsigned UnusedReg; - Offset = TFI->getFrameIndexReference(*Asm->MF, FI, UnusedReg); + Offset = TFI->getNonLocalFrameIndexReference(*Asm->MF, FI); } MCContext &Ctx = Asm->OutContext; diff --git a/lib/CodeGen/AsmPrinter/WinException.h b/lib/CodeGen/AsmPrinter/WinException.h index 37c796f89765..dc5036302131 100644 --- a/lib/CodeGen/AsmPrinter/WinException.h +++ b/lib/CodeGen/AsmPrinter/WinException.h @@ -1,9 +1,8 @@ //===-- WinException.h - Windows Exception Handling ----------*- C++ -*--===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -86,6 +85,7 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer { /// only), it is relative to the frame pointer. int getFrameIndexOffset(int FrameIndex, const WinEHFuncInfo &FuncInfo); + void endFuncletImpl(); public: //===--------------------------------------------------------------------===// // Main entry points. @@ -100,6 +100,8 @@ public: /// immediately after the function entry point. void beginFunction(const MachineFunction *MF) override; + void markFunctionEnd() override; + /// Gather and emit post-function exception information. void endFunction(const MachineFunction *) override; diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp index 95581c09dd1c..dc7eaf6a5fe7 100644 --- a/lib/CodeGen/AtomicExpandPass.cpp +++ b/lib/CodeGen/AtomicExpandPass.cpp @@ -1,9 +1,8 @@ //===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -361,7 +360,7 @@ bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) { /// Get the iX type with the same bitwidth as T. IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T, const DataLayout &DL) { - EVT VT = TLI->getValueType(DL, T); + EVT VT = TLI->getMemValueType(DL, T); unsigned BitWidth = VT.getStoreSizeInBits(); assert(BitWidth == VT.getSizeInBits() && "must be a power of two"); return IntegerType::get(T->getContext(), BitWidth); @@ -382,7 +381,7 @@ LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) { Addr->getType()->getPointerAddressSpace()); Value *NewAddr = Builder.CreateBitCast(Addr, PT); - auto *NewLI = Builder.CreateLoad(NewAddr); + auto *NewLI = Builder.CreateLoad(NewTy, NewAddr); NewLI->setAlignment(LI->getAlignment()); NewLI->setVolatile(LI->isVolatile()); NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID()); @@ -431,6 +430,9 @@ bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) { bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) { IRBuilder<> Builder(LI); AtomicOrdering Order = LI->getOrdering(); + if (Order == AtomicOrdering::Unordered) + Order = AtomicOrdering::Monotonic; + Value *Addr = LI->getPointerOperand(); Type *Ty = cast<PointerType>(Addr->getType())->getElementType(); Constant *DummyVal = Constant::getNullValue(Ty); @@ -496,11 +498,26 @@ static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr, Value *Loaded, Value *NewVal, AtomicOrdering MemOpOrder, Value *&Success, Value *&NewLoaded) { + Type *OrigTy = NewVal->getType(); + + // This code can go away when cmpxchg supports FP types. + bool NeedBitcast = OrigTy->isFloatingPointTy(); + if (NeedBitcast) { + IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits()); + unsigned AS = Addr->getType()->getPointerAddressSpace(); + Addr = Builder.CreateBitCast(Addr, IntTy->getPointerTo(AS)); + NewVal = Builder.CreateBitCast(NewVal, IntTy); + Loaded = Builder.CreateBitCast(Loaded, IntTy); + } + Value* Pair = Builder.CreateAtomicCmpXchg( Addr, Loaded, NewVal, MemOpOrder, AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder)); Success = Builder.CreateExtractValue(Pair, 1, "success"); NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); + + if (NeedBitcast) + NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy); } /// Emit IR to implement the given atomicrmw operation on values in registers, @@ -535,6 +552,10 @@ static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, case AtomicRMWInst::UMin: NewVal = Builder.CreateICmpULE(Loaded, Inc); return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); + case AtomicRMWInst::FAdd: + return Builder.CreateFAdd(Loaded, Inc, "new"); + case AtomicRMWInst::FSub: + return Builder.CreateFSub(Loaded, Inc, "new"); default: llvm_unreachable("Unknown atomic op"); } @@ -564,6 +585,10 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; unsigned ValueSize = getAtomicOpSize(AI); if (ValueSize < MinCASSize) { + // TODO: Handle atomicrmw fadd/fsub + if (AI->getType()->isFloatingPointTy()) + return false; + expandPartwordAtomicRMW(AI, TargetLoweringBase::AtomicExpansionKind::CmpXChg); } else { @@ -1090,11 +1115,11 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic && SuccessOrder != AtomicOrdering::Monotonic && SuccessOrder != AtomicOrdering::Acquire && - !F->optForMinSize(); + !F->hasMinSize(); // There's no overhead for sinking the release barrier in a weak cmpxchg, so // do it even on minsize. - bool UseUnconditionalReleaseBarrier = F->optForMinSize() && !CI->isWeak(); + bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak(); // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord // @@ -1533,6 +1558,8 @@ static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) { case AtomicRMWInst::Min: case AtomicRMWInst::UMax: case AtomicRMWInst::UMin: + case AtomicRMWInst::FAdd: + case AtomicRMWInst::FSub: // No atomic libcalls are available for max/min/umax/umin. return {}; } @@ -1671,16 +1698,25 @@ bool AtomicExpand::expandAtomicOpToLibcall( } // 'ptr' argument. - Value *PtrVal = - Builder.CreateBitCast(PointerOperand, Type::getInt8PtrTy(Ctx)); + // note: This assumes all address spaces share a common libfunc + // implementation and that addresses are convertable. For systems without + // that property, we'd need to extend this mechanism to support AS-specific + // families of atomic intrinsics. + auto PtrTypeAS = PointerOperand->getType()->getPointerAddressSpace(); + Value *PtrVal = Builder.CreateBitCast(PointerOperand, + Type::getInt8PtrTy(Ctx, PtrTypeAS)); + PtrVal = Builder.CreateAddrSpaceCast(PtrVal, Type::getInt8PtrTy(Ctx)); Args.push_back(PtrVal); // 'expected' argument, if present. if (CASExpected) { AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType()); AllocaCASExpected->setAlignment(AllocaAlignment); + unsigned AllocaAS = AllocaCASExpected->getType()->getPointerAddressSpace(); + AllocaCASExpected_i8 = - Builder.CreateBitCast(AllocaCASExpected, Type::getInt8PtrTy(Ctx)); + Builder.CreateBitCast(AllocaCASExpected, + Type::getInt8PtrTy(Ctx, AllocaAS)); Builder.CreateLifetimeStart(AllocaCASExpected_i8, SizeVal64); Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment); Args.push_back(AllocaCASExpected_i8); @@ -1707,8 +1743,9 @@ bool AtomicExpand::expandAtomicOpToLibcall( if (!CASExpected && HasResult && !UseSizedLibcall) { AllocaResult = AllocaBuilder.CreateAlloca(I->getType()); AllocaResult->setAlignment(AllocaAlignment); + unsigned AllocaAS = AllocaResult->getType()->getPointerAddressSpace(); AllocaResult_i8 = - Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx)); + Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx, AllocaAS)); Builder.CreateLifetimeStart(AllocaResult_i8, SizeVal64); Args.push_back(AllocaResult_i8); } @@ -1734,7 +1771,7 @@ bool AtomicExpand::expandAtomicOpToLibcall( for (Value *Arg : Args) ArgTys.push_back(Arg->getType()); FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false); - Constant *LibcallFn = + FunctionCallee LibcallFn = M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr); CallInst *Call = Builder.CreateCall(LibcallFn, Args); Call->setAttributes(Attr); @@ -1749,8 +1786,8 @@ bool AtomicExpand::expandAtomicOpToLibcall( // from call} Type *FinalResultTy = I->getType(); Value *V = UndefValue::get(FinalResultTy); - Value *ExpectedOut = - Builder.CreateAlignedLoad(AllocaCASExpected, AllocaAlignment); + Value *ExpectedOut = Builder.CreateAlignedLoad( + CASExpected->getType(), AllocaCASExpected, AllocaAlignment); Builder.CreateLifetimeEnd(AllocaCASExpected_i8, SizeVal64); V = Builder.CreateInsertValue(V, ExpectedOut, 0); V = Builder.CreateInsertValue(V, Result, 1); @@ -1760,7 +1797,8 @@ bool AtomicExpand::expandAtomicOpToLibcall( if (UseSizedLibcall) V = Builder.CreateBitOrPointerCast(Result, I->getType()); else { - V = Builder.CreateAlignedLoad(AllocaResult, AllocaAlignment); + V = Builder.CreateAlignedLoad(I->getType(), AllocaResult, + AllocaAlignment); Builder.CreateLifetimeEnd(AllocaResult_i8, SizeVal64); } I->replaceAllUsesWith(V); diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp index d11f375b176e..57cefae2066a 100644 --- a/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -1,9 +1,8 @@ //===- BasicTargetTransformInfo.cpp - Basic target-independent TTI impl ---===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index efbfd5f4ab2c..fb54b5d6c8d8 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -1,9 +1,8 @@ //===- BranchFolding.cpp - Fold machine code branch instructions ----------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -722,7 +721,7 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2, // branch instruction, which is likely to be smaller than the 2 // instructions that would be deleted in the merge. MachineFunction *MF = MBB1->getParent(); - return EffectiveTailLen >= 2 && MF->getFunction().optForSize() && + return EffectiveTailLen >= 2 && MF->getFunction().hasOptSize() && (I1 == MBB1->begin() || I2 == MBB2->begin()); } @@ -1071,31 +1070,29 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { bool MadeChange = false; - if (!EnableTailMerge) return MadeChange; + if (!EnableTailMerge) + return MadeChange; // First find blocks with no successors. - // Block placement does not create new tail merging opportunities for these - // blocks. - if (!AfterBlockPlacement) { - MergePotentials.clear(); - for (MachineBasicBlock &MBB : MF) { - if (MergePotentials.size() == TailMergeThreshold) - break; - if (!TriedMerging.count(&MBB) && MBB.succ_empty()) - MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(MBB), &MBB)); - } - - // If this is a large problem, avoid visiting the same basic blocks - // multiple times. + // Block placement may create new tail merging opportunities for these blocks. + MergePotentials.clear(); + for (MachineBasicBlock &MBB : MF) { if (MergePotentials.size() == TailMergeThreshold) - for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) - TriedMerging.insert(MergePotentials[i].getBlock()); - - // See if we can do any tail merging on those. - if (MergePotentials.size() >= 2) - MadeChange |= TryTailMergeBlocks(nullptr, nullptr, MinCommonTailLength); + break; + if (!TriedMerging.count(&MBB) && MBB.succ_empty()) + MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(MBB), &MBB)); } + // If this is a large problem, avoid visiting the same basic blocks + // multiple times. + if (MergePotentials.size() == TailMergeThreshold) + for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) + TriedMerging.insert(MergePotentials[i].getBlock()); + + // See if we can do any tail merging on those. + if (MergePotentials.size() >= 2) + MadeChange |= TryTailMergeBlocks(nullptr, nullptr, MinCommonTailLength); + // Look at blocks (IBB) with multiple predecessors (PBB). // We change each predecessor to a canonical form, by // (1) temporarily removing any unconditional branch from the predecessor @@ -1183,29 +1180,6 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { } } - // Failing case: the only way IBB can be reached from PBB is via - // exception handling. Happens for landing pads. Would be nice to have - // a bit in the edge so we didn't have to do all this. - if (IBB->isEHPad()) { - MachineFunction::iterator IP = ++PBB->getIterator(); - MachineBasicBlock *PredNextBB = nullptr; - if (IP != MF.end()) - PredNextBB = &*IP; - if (!TBB) { - if (IBB != PredNextBB) // fallthrough - continue; - } else if (FBB) { - if (TBB != IBB && FBB != IBB) // cbr then ubr - continue; - } else if (Cond.empty()) { - if (TBB != IBB) // ubr - continue; - } else { - if (TBB != IBB && IBB != PredNextBB) // cbr - continue; - } - } - // Remove the unconditional branch at the end, if any. if (TBB && (Cond.empty() || FBB)) { DebugLoc dl = PBB->findBranchDebugLoc(); @@ -1598,7 +1572,7 @@ ReoptimizeBlock: } if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 && - MF.getFunction().optForSize()) { + MF.getFunction().hasOptSize()) { // Changing "Jcc foo; foo: jmp bar;" into "Jcc bar;" might change the branch // direction, thereby defeating careful block placement and regressing // performance. Therefore, only consider this for optsize functions. diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h index accd0ab7317b..761ff9c7d54e 100644 --- a/lib/CodeGen/BranchFolding.h +++ b/lib/CodeGen/BranchFolding.h @@ -1,9 +1,8 @@ //===- BranchFolding.h - Fold machine code branch instructions --*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/BranchRelaxation.cpp b/lib/CodeGen/BranchRelaxation.cpp index c092da2b6602..3ad6266d4f35 100644 --- a/lib/CodeGen/BranchRelaxation.cpp +++ b/lib/CodeGen/BranchRelaxation.cpp @@ -1,9 +1,8 @@ //===- BranchRelaxation.cpp -----------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/BreakFalseDeps.cpp b/lib/CodeGen/BreakFalseDeps.cpp index 210699cbf239..cc4b2caa9bed 100644 --- a/lib/CodeGen/BreakFalseDeps.cpp +++ b/lib/CodeGen/BreakFalseDeps.cpp @@ -1,9 +1,8 @@ //==- llvm/CodeGen/BreakFalseDeps.cpp - Break False Dependency Fix -*- C++ -*==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/BuiltinGCs.cpp b/lib/CodeGen/BuiltinGCs.cpp index 93939e573b7b..bfc10cb3fef2 100644 --- a/lib/CodeGen/BuiltinGCs.cpp +++ b/lib/CodeGen/BuiltinGCs.cpp @@ -1,9 +1,8 @@ //===- BuiltinGCs.cpp - Boilerplate for our built in GC types -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/CFIInstrInserter.cpp b/lib/CodeGen/CFIInstrInserter.cpp index c4799855a2b3..1a4d54231cfd 100644 --- a/lib/CodeGen/CFIInstrInserter.cpp +++ b/lib/CodeGen/CFIInstrInserter.cpp @@ -1,9 +1,8 @@ //===------ CFIInstrInserter.cpp - Insert additional CFI instructions -----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index 02347b9f0b5c..7164fdfb7886 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -1,9 +1,8 @@ //===- CalcSpillWeights.cpp -----------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp index 3593089b206d..497fcb147849 100644 --- a/lib/CodeGen/CallingConvLower.cpp +++ b/lib/CodeGen/CallingConvLower.cpp @@ -1,9 +1,8 @@ //===-- CallingConvLower.cpp - Calling Conventions ------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index 66166482c78b..c37ed57781d4 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -1,9 +1,8 @@ //===-- CodeGen.cpp -------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -31,14 +30,15 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeEarlyIfConverterPass(Registry); initializeEarlyMachineLICMPass(Registry); initializeEarlyTailDuplicatePass(Registry); - initializeExpandISelPseudosPass(Registry); initializeExpandMemCmpPassPass(Registry); initializeExpandPostRAPass(Registry); initializeFEntryInserterPass(Registry); + initializeFinalizeISelPass(Registry); initializeFinalizeMachineBundlesPass(Registry); initializeFuncletLayoutPass(Registry); initializeGCMachineCodeAnalysisPass(Registry); initializeGCModuleInfoPass(Registry); + initializeHardwareLoopsPass(Registry); initializeIfConverterPass(Registry); initializeImplicitNullChecksPass(Registry); initializeIndirectBrExpandPassPass(Registry); diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index c35f8666fa3c..52b4bbea012b 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -1,9 +1,8 @@ //===- CodeGenPrepare.cpp - Prepare a function for code generation --------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -16,6 +15,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -32,6 +32,7 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -292,15 +293,16 @@ class TypePromotionTransaction; /// Keep track of SExt promoted. ValueToSExts ValToSExtendedUses; - /// True if CFG is modified in any way. - bool ModifiedDT; - /// True if optimizing for size. bool OptSize; /// DataLayout for the Function being processed. const DataLayout *DL = nullptr; + /// Building the dominator tree can be expensive, so we only build it + /// lazily and update it when required. + std::unique_ptr<DominatorTree> DT; + public: static char ID; // Pass identification, replacement for typeid @@ -339,6 +341,13 @@ class TypePromotionTransaction; } } + // Get the DominatorTree, building if necessary. + DominatorTree &getDT(Function &F) { + if (!DT) + DT = llvm::make_unique<DominatorTree>(F); + return *DT; + } + bool eliminateFallThrough(Function &F); bool eliminateMostlyEmptyBlocks(Function &F); BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB); @@ -355,11 +364,12 @@ class TypePromotionTransaction; bool optimizeExt(Instruction *&I); bool optimizeExtUses(Instruction *I); bool optimizeLoadExt(LoadInst *Load); + bool optimizeShiftInst(BinaryOperator *BO); bool optimizeSelectInst(SelectInst *SI); bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI); bool optimizeSwitchInst(SwitchInst *SI); bool optimizeExtractElementInst(Instruction *Inst); - bool dupRetToEnableTailCallOpts(BasicBlock *BB); + bool dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT); bool placeDbgValues(Function &F); bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts, LoadInst *&LI, Instruction *&Inst, bool HasPromoted); @@ -374,8 +384,15 @@ class TypePromotionTransaction; bool AllowPromotionWithoutCommonHeader, bool HasPromoted, TypePromotionTransaction &TPT, SmallVectorImpl<Instruction *> &SpeculativelyMovedExts); - bool splitBranchCondition(Function &F); + bool splitBranchCondition(Function &F, bool &ModifiedDT); bool simplifyOffsetableRelocate(Instruction &I); + + bool tryToSinkFreeOperands(Instruction *I); + bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, CmpInst *Cmp, + Intrinsic::ID IID); + bool optimizeCmp(CmpInst *Cmp, bool &ModifiedDT); + bool combineToUSubWithOverflow(CmpInst *Cmp, bool &ModifiedDT); + bool combineToUAddWithOverflow(CmpInst *Cmp, bool &ModifiedDT); }; } // end anonymous namespace @@ -401,7 +418,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) { InsertedInsts.clear(); PromotedInsts.clear(); - ModifiedDT = false; if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) { TM = &TPC->getTM<TargetMachine>(); SubtargetInfo = TM->getSubtargetImpl(F); @@ -413,7 +429,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); BPI.reset(new BranchProbabilityInfo(F, *LI)); BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI)); - OptSize = F.optForSize(); + OptSize = F.hasOptSize(); ProfileSummaryInfo *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); @@ -444,8 +460,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) { // unconditional branch. EverMadeChange |= eliminateMostlyEmptyBlocks(F); + bool ModifiedDT = false; if (!DisableBranchOpts) - EverMadeChange |= splitBranchCondition(F); + EverMadeChange |= splitBranchCondition(F, ModifiedDT); // Split some critical edges where one of the sources is an indirect branch, // to help generate sane code for PHIs involving such edges. @@ -454,6 +471,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { bool MadeChange = true; while (MadeChange) { MadeChange = false; + DT.reset(); for (Function::iterator I = F.begin(); I != F.end(); ) { BasicBlock *BB = &*I++; bool ModifiedDTOnIteration = false; @@ -654,6 +672,16 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB, BB->getSinglePredecessor()->getSingleSuccessor())) return false; + // Skip merging if the block's successor is also a successor to any callbr + // that leads to this block. + // FIXME: Is this really needed? Is this a correctness issue? + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + if (auto *CBI = dyn_cast<CallBrInst>((*PI)->getTerminator())) + for (unsigned i = 0, e = CBI->getNumSuccessors(); i != e; ++i) + if (DestBB == CBI->getSuccessor(i)) + return false; + } + // Try to skip merging if the unique predecessor of BB is terminated by a // switch or indirect branch instruction, and BB is used as an incoming block // of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to @@ -1040,7 +1068,7 @@ bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) { return MadeChange; } -/// SinkCast - Sink the specified cast instruction into its user blocks +/// Sink the specified cast instruction into its user blocks. static bool SinkCast(CastInst *CI) { BasicBlock *DefBB = CI->getParent(); @@ -1114,8 +1142,8 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, // Sink only "cheap" (or nop) address-space casts. This is a weaker condition // than sinking only nop casts, but is helpful on some platforms. if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) { - if (!TLI.isCheapAddrSpaceCast(ASC->getSrcAddressSpace(), - ASC->getDestAddressSpace())) + if (!TLI.isFreeAddrSpaceCast(ASC->getSrcAddressSpace(), + ASC->getDestAddressSpace())) return false; } @@ -1148,54 +1176,169 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, return SinkCast(CI); } -/// Try to combine CI into a call to the llvm.uadd.with.overflow intrinsic if -/// possible. -/// -/// Return true if any changes were made. -static bool CombineUAddWithOverflow(CmpInst *CI) { - Value *A, *B; - Instruction *AddI; - if (!match(CI, - m_UAddWithOverflow(m_Value(A), m_Value(B), m_Instruction(AddI)))) +bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO, + CmpInst *Cmp, + Intrinsic::ID IID) { + if (BO->getParent() != Cmp->getParent()) { + // We used to use a dominator tree here to allow multi-block optimization. + // But that was problematic because: + // 1. It could cause a perf regression by hoisting the math op into the + // critical path. + // 2. It could cause a perf regression by creating a value that was live + // across multiple blocks and increasing register pressure. + // 3. Use of a dominator tree could cause large compile-time regression. + // This is because we recompute the DT on every change in the main CGP + // run-loop. The recomputing is probably unnecessary in many cases, so if + // that was fixed, using a DT here would be ok. + return false; + } + + // We allow matching the canonical IR (add X, C) back to (usubo X, -C). + Value *Arg0 = BO->getOperand(0); + Value *Arg1 = BO->getOperand(1); + if (BO->getOpcode() == Instruction::Add && + IID == Intrinsic::usub_with_overflow) { + assert(isa<Constant>(Arg1) && "Unexpected input for usubo"); + Arg1 = ConstantExpr::getNeg(cast<Constant>(Arg1)); + } + + // Insert at the first instruction of the pair. + Instruction *InsertPt = nullptr; + for (Instruction &Iter : *Cmp->getParent()) { + if (&Iter == BO || &Iter == Cmp) { + InsertPt = &Iter; + break; + } + } + assert(InsertPt != nullptr && "Parent block did not contain cmp or binop"); + + IRBuilder<> Builder(InsertPt); + Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1); + Value *Math = Builder.CreateExtractValue(MathOV, 0, "math"); + Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov"); + BO->replaceAllUsesWith(Math); + Cmp->replaceAllUsesWith(OV); + BO->eraseFromParent(); + Cmp->eraseFromParent(); + return true; +} + +/// Match special-case patterns that check for unsigned add overflow. +static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp, + BinaryOperator *&Add) { + // Add = add A, 1; Cmp = icmp eq A,-1 (overflow if A is max val) + // Add = add A,-1; Cmp = icmp ne A, 0 (overflow if A is non-zero) + Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1); + + // We are not expecting non-canonical/degenerate code. Just bail out. + if (isa<Constant>(A)) + return false; + + ICmpInst::Predicate Pred = Cmp->getPredicate(); + if (Pred == ICmpInst::ICMP_EQ && match(B, m_AllOnes())) + B = ConstantInt::get(B->getType(), 1); + else if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) + B = ConstantInt::get(B->getType(), -1); + else return false; - Type *Ty = AddI->getType(); - if (!isa<IntegerType>(Ty)) + // Check the users of the variable operand of the compare looking for an add + // with the adjusted constant. + for (User *U : A->users()) { + if (match(U, m_Add(m_Specific(A), m_Specific(B)))) { + Add = cast<BinaryOperator>(U); + return true; + } + } + return false; +} + +/// Try to combine the compare into a call to the llvm.uadd.with.overflow +/// intrinsic. Return true if any changes were made. +bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp, + bool &ModifiedDT) { + Value *A, *B; + BinaryOperator *Add; + if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) + if (!matchUAddWithOverflowConstantEdgeCases(Cmp, Add)) + return false; + + if (!TLI->shouldFormOverflowOp(ISD::UADDO, + TLI->getValueType(*DL, Add->getType()))) return false; - // We don't want to move around uses of condition values this late, so we we + // We don't want to move around uses of condition values this late, so we // check if it is legal to create the call to the intrinsic in the basic - // block containing the icmp: + // block containing the icmp. + if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse()) + return false; - if (AddI->getParent() != CI->getParent() && !AddI->hasOneUse()) + if (!replaceMathCmpWithIntrinsic(Add, Cmp, Intrinsic::uadd_with_overflow)) return false; -#ifndef NDEBUG - // Someday m_UAddWithOverflow may get smarter, but this is a safe assumption - // for now: - if (AddI->hasOneUse()) - assert(*AddI->user_begin() == CI && "expected!"); -#endif + // Reset callers - do not crash by iterating over a dead instruction. + ModifiedDT = true; + return true; +} + +bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp, + bool &ModifiedDT) { + // We are not expecting non-canonical/degenerate code. Just bail out. + Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1); + if (isa<Constant>(A) && isa<Constant>(B)) + return false; + + // Convert (A u> B) to (A u< B) to simplify pattern matching. + ICmpInst::Predicate Pred = Cmp->getPredicate(); + if (Pred == ICmpInst::ICMP_UGT) { + std::swap(A, B); + Pred = ICmpInst::ICMP_ULT; + } + // Convert special-case: (A == 0) is the same as (A u< 1). + if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) { + B = ConstantInt::get(B->getType(), 1); + Pred = ICmpInst::ICMP_ULT; + } + // Convert special-case: (A != 0) is the same as (0 u< A). + if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) { + std::swap(A, B); + Pred = ICmpInst::ICMP_ULT; + } + if (Pred != ICmpInst::ICMP_ULT) + return false; + + // Walk the users of a variable operand of a compare looking for a subtract or + // add with that same operand. Also match the 2nd operand of the compare to + // the add/sub, but that may be a negated constant operand of an add. + Value *CmpVariableOperand = isa<Constant>(A) ? B : A; + BinaryOperator *Sub = nullptr; + for (User *U : CmpVariableOperand->users()) { + // A - B, A u< B --> usubo(A, B) + if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) { + Sub = cast<BinaryOperator>(U); + break; + } + + // A + (-C), A u< C (canonicalized form of (sub A, C)) + const APInt *CmpC, *AddC; + if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) && + match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) { + Sub = cast<BinaryOperator>(U); + break; + } + } + if (!Sub) + return false; + + if (!TLI->shouldFormOverflowOp(ISD::USUBO, + TLI->getValueType(*DL, Sub->getType()))) + return false; + + if (!replaceMathCmpWithIntrinsic(Sub, Cmp, Intrinsic::usub_with_overflow)) + return false; - Module *M = CI->getModule(); - Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty); - - auto *InsertPt = AddI->hasOneUse() ? CI : AddI; - - DebugLoc Loc = CI->getDebugLoc(); - auto *UAddWithOverflow = - CallInst::Create(F, {A, B}, "uadd.overflow", InsertPt); - UAddWithOverflow->setDebugLoc(Loc); - auto *UAdd = ExtractValueInst::Create(UAddWithOverflow, 0, "uadd", InsertPt); - UAdd->setDebugLoc(Loc); - auto *Overflow = - ExtractValueInst::Create(UAddWithOverflow, 1, "overflow", InsertPt); - Overflow->setDebugLoc(Loc); - - CI->replaceAllUsesWith(Overflow); - AddI->replaceAllUsesWith(UAdd); - CI->eraseFromParent(); - AddI->eraseFromParent(); + // Reset callers - do not crash by iterating over a dead instruction. + ModifiedDT = true; return true; } @@ -1205,18 +1348,19 @@ static bool CombineUAddWithOverflow(CmpInst *CI) { /// lose; some adjustment may be wanted there. /// /// Return true if any changes are made. -static bool SinkCmpExpression(CmpInst *CI, const TargetLowering *TLI) { - BasicBlock *DefBB = CI->getParent(); +static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) { + if (TLI.hasMultipleConditionRegisters()) + return false; // Avoid sinking soft-FP comparisons, since this can move them into a loop. - if (TLI && TLI->useSoftFloat() && isa<FCmpInst>(CI)) + if (TLI.useSoftFloat() && isa<FCmpInst>(Cmp)) return false; // Only insert a cmp in each block once. DenseMap<BasicBlock*, CmpInst*> InsertedCmps; bool MadeChange = false; - for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end(); + for (Value::user_iterator UI = Cmp->user_begin(), E = Cmp->user_end(); UI != E; ) { Use &TheUse = UI.getUse(); Instruction *User = cast<Instruction>(*UI); @@ -1230,6 +1374,7 @@ static bool SinkCmpExpression(CmpInst *CI, const TargetLowering *TLI) { // Figure out which BB this cmp is used in. BasicBlock *UserBB = User->getParent(); + BasicBlock *DefBB = Cmp->getParent(); // If this user is in the same block as the cmp, don't change the cmp. if (UserBB == DefBB) continue; @@ -1241,10 +1386,11 @@ static bool SinkCmpExpression(CmpInst *CI, const TargetLowering *TLI) { BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); assert(InsertPt != UserBB->end()); InsertedCmp = - CmpInst::Create(CI->getOpcode(), CI->getPredicate(), - CI->getOperand(0), CI->getOperand(1), "", &*InsertPt); + CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(), + Cmp->getOperand(0), Cmp->getOperand(1), "", + &*InsertPt); // Propagate the debug info. - InsertedCmp->setDebugLoc(CI->getDebugLoc()); + InsertedCmp->setDebugLoc(Cmp->getDebugLoc()); } // Replace a use of the cmp with a use of the new cmp. @@ -1254,19 +1400,22 @@ static bool SinkCmpExpression(CmpInst *CI, const TargetLowering *TLI) { } // If we removed all uses, nuke the cmp. - if (CI->use_empty()) { - CI->eraseFromParent(); + if (Cmp->use_empty()) { + Cmp->eraseFromParent(); MadeChange = true; } return MadeChange; } -static bool OptimizeCmpExpression(CmpInst *CI, const TargetLowering *TLI) { - if (SinkCmpExpression(CI, TLI)) +bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, bool &ModifiedDT) { + if (sinkCmpExpression(Cmp, *TLI)) return true; - if (CombineUAddWithOverflow(CI)) + if (combineToUAddWithOverflow(Cmp, ModifiedDT)) + return true; + + if (combineToUSubWithOverflow(Cmp, ModifiedDT)) return true; return false; @@ -1301,7 +1450,7 @@ static bool sinkAndCmp0Expression(Instruction *AndI, for (auto *U : AndI->users()) { Instruction *User = cast<Instruction>(U); - // Only sink for and mask feeding icmp with 0. + // Only sink 'and' feeding icmp with 0. if (!isa<ICmpInst>(User)) return false; @@ -1704,9 +1853,23 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { if (II) { switch (II->getIntrinsicID()) { default: break; + case Intrinsic::experimental_widenable_condition: { + // Give up on future widening oppurtunties so that we can fold away dead + // paths and merge blocks before going into block-local instruction + // selection. + if (II->use_empty()) { + II->eraseFromParent(); + return true; + } + Constant *RetVal = ConstantInt::getTrue(II->getContext()); + resetIteratorIfInvalidatedWhileCalling(BB, [&]() { + replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr); + }); + return true; + } case Intrinsic::objectsize: { // Lower all uses of llvm.objectsize.* - ConstantInt *RetVal = + Value *RetVal = lowerObjectSizeCall(II, *DL, TLInfo, /*MustSucceed=*/true); resetIteratorIfInvalidatedWhileCalling(BB, [&]() { @@ -1735,6 +1898,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { InsertedInsts.insert(ExtVal); return true; } + case Intrinsic::launder_invariant_group: case Intrinsic::strip_invariant_group: { Value *ArgVal = II->getArgOperand(0); @@ -1818,7 +1982,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { /// %tmp2 = tail call i32 @f2() /// ret i32 %tmp2 /// @endcode -bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) { +bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT) { if (!TLI) return false; @@ -1846,10 +2010,8 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) { // return is the first instruction in the block. if (PN) { BasicBlock::iterator BI = BB->begin(); - do { ++BI; } while (isa<DbgInfoIntrinsic>(BI)); - if (&*BI == BCI) - // Also skip over the bitcast. - ++BI; + // Skip over debug and the bitcast. + do { ++BI; } while (isa<DbgInfoIntrinsic>(BI) || &*BI == BCI); if (&*BI != RetI) return false; } else { @@ -1865,7 +2027,9 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) { SmallVector<CallInst*, 4> TailCalls; if (PN) { for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { - CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I)); + // Look through bitcasts. + Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts(); + CallInst *CI = dyn_cast<CallInst>(IncomingVal); // Make sure the phi value is indeed produced by the tail call. if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) && TLI->mayBeEmittedAsTailCall(CI) && @@ -1929,6 +2093,7 @@ struct ExtAddrMode : public TargetLowering::AddrMode { Value *BaseReg = nullptr; Value *ScaledReg = nullptr; Value *OriginalValue = nullptr; + bool InBounds = true; enum FieldName { NoField = 0x00, @@ -1940,6 +2105,7 @@ struct ExtAddrMode : public TargetLowering::AddrMode { MultipleFields = 0xff }; + ExtAddrMode() = default; void print(raw_ostream &OS) const; @@ -1958,6 +2124,10 @@ struct ExtAddrMode : public TargetLowering::AddrMode { ScaledReg->getType() != other.ScaledReg->getType()) return MultipleFields; + // Conservatively reject 'inbounds' mismatches. + if (InBounds != other.InBounds) + return MultipleFields; + // Check each field to see if it differs. unsigned Result = NoField; if (BaseReg != other.BaseReg) @@ -2056,6 +2226,8 @@ static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) { void ExtAddrMode::print(raw_ostream &OS) const { bool NeedPlus = false; OS << "["; + if (InBounds) + OS << "inbounds "; if (BaseGV) { OS << (NeedPlus ? " + " : "") << "GV:"; @@ -3126,6 +3298,8 @@ private: PhiNodeSet &PhiNodesToMatch) { SmallVector<PHIPair, 8> WorkList; Matcher.insert({ PHI, Candidate }); + SmallSet<PHINode *, 8> MatchedPHIs; + MatchedPHIs.insert(PHI); WorkList.push_back({ PHI, Candidate }); SmallSet<PHIPair, 8> Visited; while (!WorkList.empty()) { @@ -3158,8 +3332,10 @@ private: if (Matcher.count({ FirstPhi, SecondPhi })) continue; // So the values are different and does not match. So we need them to - // match. - Matcher.insert({ FirstPhi, SecondPhi }); + // match. (But we register no more than one match per PHI node, so that + // we won't later try to replace them twice.) + if (!MatchedPHIs.insert(FirstPhi).second) + Matcher.insert({ FirstPhi, SecondPhi }); // But me must check it. WorkList.push_back({ FirstPhi, SecondPhi }); } @@ -3354,6 +3530,7 @@ bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale, ConstantInt *CI = nullptr; Value *AddLHS = nullptr; if (isa<Instruction>(ScaleReg) && // not a constant expr. match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) { + TestAddrMode.InBounds = false; TestAddrMode.ScaledReg = AddLHS; TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale; @@ -3928,6 +4105,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); + AddrMode.InBounds = false; if (matchAddr(AddrInst->getOperand(1), Depth+1) && matchAddr(AddrInst->getOperand(0), Depth+1)) return true; @@ -3954,6 +4132,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, case Instruction::Mul: case Instruction::Shl: { // Can only handle X*C and X << C. + AddrMode.InBounds = false; ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1)); if (!RHS || RHS->getBitWidth() > 64) return false; @@ -4005,8 +4184,11 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, if (ConstantOffset == 0 || TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) { // Check to see if we can fold the base pointer in too. - if (matchAddr(AddrInst->getOperand(0), Depth+1)) + if (matchAddr(AddrInst->getOperand(0), Depth+1)) { + if (!cast<GEPOperator>(AddrInst)->isInBounds()) + AddrMode.InBounds = false; return true; + } } else if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) && TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 && ConstantOffset > 0) { @@ -4020,15 +4202,11 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, if (isa<Argument>(Base) || isa<GlobalValue>(Base) || (BaseI && !isa<CastInst>(BaseI) && !isa<GetElementPtrInst>(BaseI))) { - // If the base is an instruction, make sure the GEP is not in the same - // basic block as the base. If the base is an argument or global - // value, make sure the GEP is not in the entry block. Otherwise, - // instruction selection can undo the split. Also make sure the - // parent block allows inserting non-PHI instructions before the - // terminator. + // Make sure the parent block allows inserting non-PHI instructions + // before the terminator. BasicBlock *Parent = BaseI ? BaseI->getParent() : &GEP->getFunction()->getEntryBlock(); - if (GEP->getParent() != Parent && !Parent->getTerminator()->isEHPad()) + if (!Parent->getTerminator()->isEHPad()) LargeOffsetGEP = std::make_pair(GEP, ConstantOffset); } } @@ -4042,6 +4220,8 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, // See if the scale and offset amount is valid for this target. AddrMode.BaseOffs += ConstantOffset; + if (!cast<GEPOperator>(AddrInst)->isInBounds()) + AddrMode.InBounds = false; // Match the base operand of the GEP. if (!matchAddr(AddrInst->getOperand(0), Depth+1)) { @@ -4268,7 +4448,7 @@ static bool FindAllMemoryUses( if (!MightBeFoldableInst(I)) return true; - const bool OptSize = I->getFunction()->optForSize(); + const bool OptSize = I->getFunction()->hasOptSize(); // Loop over all the uses, recursively processing them. for (Use &U : I->uses()) { @@ -4556,8 +4736,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP); GetElementPtrInst *GEP = LargeOffsetGEP.first; - if (GEP && GEP->getParent() != MemoryInst->getParent() && - !NewGEPBases.count(GEP)) { + if (GEP && !NewGEPBases.count(GEP)) { // If splitting the underlying data structure can reduce the offset of a // GEP, collect the GEP. Skip the GEPs that are the new bases of // previously split data structures. @@ -4727,7 +4906,11 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // SDAG consecutive load/store merging. if (ResultPtr->getType() != I8PtrTy) ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy); - ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr"); + ResultPtr = + AddrMode.InBounds + ? Builder.CreateInBoundsGEP(I8Ty, ResultPtr, ResultIndex, + "sunkaddr") + : Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr"); } ResultIndex = V; @@ -4738,7 +4921,11 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, } else { if (ResultPtr->getType() != I8PtrTy) ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy); - SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr"); + SunkAddr = + AddrMode.InBounds + ? Builder.CreateInBoundsGEP(I8Ty, ResultPtr, ResultIndex, + "sunkaddr") + : Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr"); } if (SunkAddr->getType() != Addr->getType()) @@ -5037,7 +5224,6 @@ bool CodeGenPrepare::tryToPromoteExts( /// Merging redundant sexts when one is dominating the other. bool CodeGenPrepare::mergeSExts(Function &F) { - DominatorTree DT(F); bool Changed = false; for (auto &Entry : ValToSExtendedUses) { SExts &Insts = Entry.second; @@ -5048,7 +5234,7 @@ bool CodeGenPrepare::mergeSExts(Function &F) { continue; bool inserted = false; for (auto &Pt : CurPts) { - if (DT.dominates(Inst, Pt)) { + if (getDT(F).dominates(Inst, Pt)) { Pt->replaceAllUsesWith(Inst); RemovedInsts.insert(Pt); Pt->removeFromParent(); @@ -5057,7 +5243,7 @@ bool CodeGenPrepare::mergeSExts(Function &F) { Changed = true; break; } - if (!DT.dominates(Pt, Inst)) + if (!getDT(F).dominates(Pt, Inst)) // Give up if we need to merge in a common dominator as the // experiments show it is not profitable. continue; @@ -5715,7 +5901,7 @@ static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, static Value *getTrueOrFalseValue( SelectInst *SI, bool isTrue, const SmallPtrSet<const Instruction *, 2> &Selects) { - Value *V; + Value *V = nullptr; for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI); DefSI = dyn_cast<SelectInst>(V)) { @@ -5723,9 +5909,44 @@ static Value *getTrueOrFalseValue( "The condition of DefSI does not match with SI"); V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue()); } + + assert(V && "Failed to get select true/false value"); return V; } +bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) { + assert(Shift->isShift() && "Expected a shift"); + + // If this is (1) a vector shift, (2) shifts by scalars are cheaper than + // general vector shifts, and (3) the shift amount is a select-of-splatted + // values, hoist the shifts before the select: + // shift Op0, (select Cond, TVal, FVal) --> + // select Cond, (shift Op0, TVal), (shift Op0, FVal) + // + // This is inverting a generic IR transform when we know that the cost of a + // general vector shift is more than the cost of 2 shift-by-scalars. + // We can't do this effectively in SDAG because we may not be able to + // determine if the select operands are splats from within a basic block. + Type *Ty = Shift->getType(); + if (!Ty->isVectorTy() || !TLI->isVectorShiftByScalarCheap(Ty)) + return false; + Value *Cond, *TVal, *FVal; + if (!match(Shift->getOperand(1), + m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal))))) + return false; + if (!isSplatValue(TVal) || !isSplatValue(FVal)) + return false; + + IRBuilder<> Builder(Shift); + BinaryOperator::BinaryOps Opcode = Shift->getOpcode(); + Value *NewTVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), TVal); + Value *NewFVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), FVal); + Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal); + Shift->replaceAllUsesWith(NewSel); + Shift->eraseFromParent(); + return true; +} + /// If we have a SelectInst that will likely profit from branch prediction, /// turn it into a branch. bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { @@ -5769,7 +5990,11 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { !isFormingBranchFromSelectProfitable(TTI, TLI, SI)) return false; - ModifiedDT = true; + // The DominatorTree needs to be rebuilt by any consumers after this + // transformation. We simply reset here rather than setting the ModifiedDT + // flag to avoid restarting the function walk in runOnFunction for each + // select optimized. + DT.reset(); // Transform a sequence like this: // start: @@ -5943,6 +6168,7 @@ bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) { InsertedShuffle = new ShuffleVectorInst(SVI->getOperand(0), SVI->getOperand(1), SVI->getOperand(2), "", &*InsertPt); + InsertedShuffle->setDebugLoc(SVI->getDebugLoc()); } UI->replaceUsesOfWith(SVI, InsertedShuffle); @@ -5958,6 +6184,48 @@ bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) { return MadeChange; } +bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) { + // If the operands of I can be folded into a target instruction together with + // I, duplicate and sink them. + SmallVector<Use *, 4> OpsToSink; + if (!TLI || !TLI->shouldSinkOperands(I, OpsToSink)) + return false; + + // OpsToSink can contain multiple uses in a use chain (e.g. + // (%u1 with %u1 = shufflevector), (%u2 with %u2 = zext %u1)). The dominating + // uses must come first, which means they are sunk first, temporarily creating + // invalid IR. This will be fixed once their dominated users are sunk and + // updated. + BasicBlock *TargetBB = I->getParent(); + bool Changed = false; + SmallVector<Use *, 4> ToReplace; + for (Use *U : OpsToSink) { + auto *UI = cast<Instruction>(U->get()); + if (UI->getParent() == TargetBB || isa<PHINode>(UI)) + continue; + ToReplace.push_back(U); + } + + SmallPtrSet<Instruction *, 4> MaybeDead; + for (Use *U : ToReplace) { + auto *UI = cast<Instruction>(U->get()); + Instruction *NI = UI->clone(); + MaybeDead.insert(UI); + LLVM_DEBUG(dbgs() << "Sinking " << *UI << " to user " << *I << "\n"); + NI->insertBefore(I); + InsertedInsts.insert(NI); + U->set(NI); + Changed = true; + } + + // Remove instructions that are dead after sinking. + for (auto *I : MaybeDead) + if (!I->hasNUsesOrMore(1)) + I->eraseFromParent(); + + return Changed; +} + bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) { if (!TLI || !DL) return false; @@ -6412,14 +6680,17 @@ static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI) { // Handle simple but common cases only. Type *StoreType = SI.getValueOperand()->getType(); - if (DL.getTypeStoreSizeInBits(StoreType) != DL.getTypeSizeInBits(StoreType) || + if (!DL.typeSizeEqualsStoreSize(StoreType) || DL.getTypeSizeInBits(StoreType) == 0) return false; unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2; Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize); - if (DL.getTypeStoreSizeInBits(SplitStoreType) != - DL.getTypeSizeInBits(SplitStoreType)) + if (!DL.typeSizeEqualsStoreSize(SplitStoreType)) + return false; + + // Don't split the store if it is volatile. + if (SI.isVolatile()) return false; // Match the following patterns: @@ -6658,11 +6929,13 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) { if (InsertedInsts.count(I)) return false; + // TODO: Move into the switch on opcode below here. if (PHINode *P = dyn_cast<PHINode>(I)) { // It is possible for very late stage optimizations (such as SimplifyCFG) // to introduce PHI nodes too late to be cleaned up. If we detect such a // trivial PHI, go ahead and zap it here. if (Value *V = SimplifyInstruction(P, {*DL, TLInfo})) { + LargeOffsetGEPMap.erase(P); P->replaceAllUsesWith(V); P->eraseFromParent(); ++NumPHIsElim; @@ -6700,9 +6973,9 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) { return false; } - if (CmpInst *CI = dyn_cast<CmpInst>(I)) - if (!TLI || !TLI->hasMultipleConditionRegisters()) - return OptimizeCmpExpression(CI, TLI); + if (auto *Cmp = dyn_cast<CmpInst>(I)) + if (TLI && optimizeCmp(Cmp, ModifiedDT)) + return true; if (LoadInst *LI = dyn_cast<LoadInst>(I)) { LI->setMetadata(LLVMContext::MD_invariant_group, nullptr); @@ -6745,13 +7018,13 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) { EnableAndCmpSinking && TLI) return sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts); + // TODO: Move this into the switch on opcode - it handles shifts already. if (BinOp && (BinOp->getOpcode() == Instruction::AShr || BinOp->getOpcode() == Instruction::LShr)) { ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1)); if (TLI && CI && TLI->hasExtractBitsInsn()) - return OptimizeExtractBits(BinOp, CI, *TLI, *DL); - - return false; + if (OptimizeExtractBits(BinOp, CI, *TLI, *DL)) + return true; } if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { @@ -6772,20 +7045,25 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) { return false; } - if (CallInst *CI = dyn_cast<CallInst>(I)) - return optimizeCallInst(CI, ModifiedDT); - - if (SelectInst *SI = dyn_cast<SelectInst>(I)) - return optimizeSelectInst(SI); - - if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) - return optimizeShuffleVectorInst(SVI); - - if (auto *Switch = dyn_cast<SwitchInst>(I)) - return optimizeSwitchInst(Switch); + if (tryToSinkFreeOperands(I)) + return true; - if (isa<ExtractElementInst>(I)) - return optimizeExtractElementInst(I); + switch (I->getOpcode()) { + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + return optimizeShiftInst(cast<BinaryOperator>(I)); + case Instruction::Call: + return optimizeCallInst(cast<CallInst>(I), ModifiedDT); + case Instruction::Select: + return optimizeSelectInst(cast<SelectInst>(I)); + case Instruction::ShuffleVector: + return optimizeShuffleVectorInst(cast<ShuffleVectorInst>(I)); + case Instruction::Switch: + return optimizeSwitchInst(cast<SwitchInst>(I)); + case Instruction::ExtractElement: + return optimizeExtractElementInst(cast<ExtractElementInst>(I)); + } return false; } @@ -6833,7 +7111,7 @@ bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) { } } } - MadeChange |= dupRetToEnableTailCallOpts(&BB); + MadeChange |= dupRetToEnableTailCallOpts(&BB, ModifiedDT); return MadeChange; } @@ -6909,7 +7187,7 @@ static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) { /// /// FIXME: Remove the (equivalent?) implementation in SelectionDAG. /// -bool CodeGenPrepare::splitBranchCondition(Function &F) { +bool CodeGenPrepare::splitBranchCondition(Function &F, bool &ModifiedDT) { if (!TM || !TM->Options.EnableFastISel || !TLI || TLI->isJumpExpensive()) return false; @@ -6983,11 +7261,7 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) { std::swap(TBB, FBB); // Replace the old BB with the new BB. - for (PHINode &PN : TBB->phis()) { - int i; - while ((i = PN.getBasicBlockIndex(&BB)) >= 0) - PN.setIncomingBlock(i, TmpBB); - } + TBB->replacePhiUsesWith(&BB, TmpBB); // Add another incoming edge form the new BB. for (PHINode &PN : FBB->phis()) { @@ -7066,10 +7340,7 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) { } } - // Note: No point in getting fancy here, since the DT info is never - // available to CodeGenPrepare. ModifiedDT = true; - MadeChange = true; LLVM_DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump(); diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index 5a5960b16130..4144c243a341 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -1,9 +1,8 @@ //===- CriticalAntiDepBreaker.cpp - Anti-dep breaker ----------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h index 09c4423a2f05..4e127ce525c8 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/lib/CodeGen/CriticalAntiDepBreaker.h @@ -1,9 +1,8 @@ //===- llvm/CodeGen/CriticalAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp index 68034afe98d5..b99be5d7a87c 100644 --- a/lib/CodeGen/DFAPacketizer.cpp +++ b/lib/CodeGen/DFAPacketizer.cpp @@ -1,9 +1,8 @@ //=- llvm/CodeGen/DFAPacketizer.cpp - DFA Packetizer for VLIW -*- C++ -*-=====// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // This class implements a deterministic finite automaton (DFA) based diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp index ff44c5660bad..049ce7063307 100644 --- a/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -1,9 +1,8 @@ //===- DeadMachineInstructionElim.cpp - Remove dead machine instructions --===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -82,9 +81,11 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg)) return false; } else { - if (!MRI->use_nodbg_empty(Reg)) - // This def has a non-debug use. Don't delete the instruction! - return false; + for (const MachineInstr &Use : MRI->use_nodbg_instructions(Reg)) { + if (&Use != MI) + // This def has a non-debug use. Don't delete the instruction! + return false; + } } } } diff --git a/lib/CodeGen/DetectDeadLanes.cpp b/lib/CodeGen/DetectDeadLanes.cpp index c83db476a4de..fe78acf4d80a 100644 --- a/lib/CodeGen/DetectDeadLanes.cpp +++ b/lib/CodeGen/DetectDeadLanes.cpp @@ -1,9 +1,8 @@ //===- DetectDeadLanes.cpp - SubRegister Lane Usage Analysis --*- C++ -*---===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp index 4586649d17f0..ddd6cec5a178 100644 --- a/lib/CodeGen/DwarfEHPrepare.cpp +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -1,9 +1,8 @@ //===- DwarfEHPrepare - Prepare exception handling for code generation ----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -46,7 +45,7 @@ namespace { class DwarfEHPrepare : public FunctionPass { // RewindFunction - _Unwind_Resume or the target equivalent. - Constant *RewindFunction = nullptr; + FunctionCallee RewindFunction = nullptr; DominatorTree *DT = nullptr; const TargetLowering *TLI = nullptr; @@ -146,7 +145,7 @@ size_t DwarfEHPrepare::pruneUnreachableResumes( size_t ResumeIndex = 0; for (auto *RI : Resumes) { for (auto *LP : CleanupLPads) { - if (isPotentiallyReachable(LP, RI, DT)) { + if (isPotentiallyReachable(LP, RI, nullptr, DT)) { ResumeReachable.set(ResumeIndex); break; } diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp index 364e1f030942..0a83760befaa 100644 --- a/lib/CodeGen/EarlyIfConversion.cpp +++ b/lib/CodeGen/EarlyIfConversion.cpp @@ -1,9 +1,8 @@ //===-- EarlyIfConversion.cpp - If-conversion on SSA form machine code ----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/EdgeBundles.cpp b/lib/CodeGen/EdgeBundles.cpp index 54c53eb16312..486720cadd27 100644 --- a/lib/CodeGen/EdgeBundles.cpp +++ b/lib/CodeGen/EdgeBundles.cpp @@ -1,9 +1,8 @@ //===-------- EdgeBundles.cpp - Bundles of CFG edges ----------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -28,7 +27,7 @@ ViewEdgeBundles("view-edge-bundles", cl::Hidden, char EdgeBundles::ID = 0; INITIALIZE_PASS(EdgeBundles, "edge-bundles", "Bundle Machine CFG Edges", - /* cfg = */true, /* analysis = */ true) + /* cfg = */true, /* is_analysis = */ true) char &llvm::EdgeBundlesID = EdgeBundles::ID; diff --git a/lib/CodeGen/ExecutionDomainFix.cpp b/lib/CodeGen/ExecutionDomainFix.cpp index 458dcf2b0e26..a2dd5eee33b7 100644 --- a/lib/CodeGen/ExecutionDomainFix.cpp +++ b/lib/CodeGen/ExecutionDomainFix.cpp @@ -1,9 +1,8 @@ //===- ExecutionDomainFix.cpp - Fix execution domain issues ----*- C++ -*--===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -337,11 +336,10 @@ void ExecutionDomainFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { } // Sorted insertion. // Enables giving priority to the latest domains during merging. - auto I = std::upper_bound( - Regs.begin(), Regs.end(), rx, [&](int LHS, const int RHS) { - return RDA->getReachingDef(mi, RC->getRegister(LHS)) < - RDA->getReachingDef(mi, RC->getRegister(RHS)); - }); + const int Def = RDA->getReachingDef(mi, RC->getRegister(rx)); + auto I = partition_point(Regs, [&](int I) { + return RDA->getReachingDef(mi, RC->getRegister(I)) <= Def; + }); Regs.insert(I, rx); } diff --git a/lib/CodeGen/ExpandMemCmp.cpp b/lib/CodeGen/ExpandMemCmp.cpp index ee7683adbcdd..b425482e6adf 100644 --- a/lib/CodeGen/ExpandMemCmp.cpp +++ b/lib/CodeGen/ExpandMemCmp.cpp @@ -1,9 +1,8 @@ //===--- ExpandMemCmp.cpp - Expand memcmp() to load/stores ----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -37,6 +36,14 @@ static cl::opt<unsigned> MemCmpEqZeroNumLoadsPerBlock( cl::desc("The number of loads per basic block for inline expansion of " "memcmp that is only being compared against zero.")); +static cl::opt<unsigned> MaxLoadsPerMemcmp( + "max-loads-per-memcmp", cl::Hidden, + cl::desc("Set maximum number of loads used in expanded memcmp")); + +static cl::opt<unsigned> MaxLoadsPerMemcmpOptSize( + "max-loads-per-memcmp-opt-size", cl::Hidden, + cl::desc("Set maximum number of loads used in expanded memcmp for -Os/Oz")); + namespace { @@ -106,8 +113,7 @@ class MemCmpExpansion { public: MemCmpExpansion(CallInst *CI, uint64_t Size, const TargetTransformInfo::MemCmpExpansionOptions &Options, - unsigned MaxNumLoads, const bool IsUsedForZeroCmp, - unsigned MaxLoadsPerBlockForZeroCmp, const DataLayout &TheDataLayout); + const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout); unsigned getNumBlocks(); uint64_t getNumLoads() const { return LoadSequence.size(); } @@ -196,16 +202,10 @@ MemCmpExpansion::computeOverlappingLoadSequence(uint64_t Size, MemCmpExpansion::MemCmpExpansion( CallInst *const CI, uint64_t Size, const TargetTransformInfo::MemCmpExpansionOptions &Options, - const unsigned MaxNumLoads, const bool IsUsedForZeroCmp, - const unsigned MaxLoadsPerBlockForZeroCmp, const DataLayout &TheDataLayout) - : CI(CI), - Size(Size), - MaxLoadSize(0), - NumLoadsNonOneByte(0), - NumLoadsPerBlockForZeroCmp(MaxLoadsPerBlockForZeroCmp), - IsUsedForZeroCmp(IsUsedForZeroCmp), - DL(TheDataLayout), - Builder(CI) { + const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout) + : CI(CI), Size(Size), MaxLoadSize(0), NumLoadsNonOneByte(0), + NumLoadsPerBlockForZeroCmp(Options.NumLoadsPerBlock), + IsUsedForZeroCmp(IsUsedForZeroCmp), DL(TheDataLayout), Builder(CI) { assert(Size > 0 && "zero blocks"); // Scale the max size down if the target can load more bytes than we need. llvm::ArrayRef<unsigned> LoadSizes(Options.LoadSizes); @@ -216,17 +216,17 @@ MemCmpExpansion::MemCmpExpansion( MaxLoadSize = LoadSizes.front(); // Compute the decomposition. unsigned GreedyNumLoadsNonOneByte = 0; - LoadSequence = computeGreedyLoadSequence(Size, LoadSizes, MaxNumLoads, + LoadSequence = computeGreedyLoadSequence(Size, LoadSizes, Options.MaxNumLoads, GreedyNumLoadsNonOneByte); NumLoadsNonOneByte = GreedyNumLoadsNonOneByte; - assert(LoadSequence.size() <= MaxNumLoads && "broken invariant"); + assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant"); // If we allow overlapping loads and the load sequence is not already optimal, // use overlapping loads. if (Options.AllowOverlappingLoads && (LoadSequence.empty() || LoadSequence.size() > 2)) { unsigned OverlappingNumLoadsNonOneByte = 0; auto OverlappingLoads = computeOverlappingLoadSequence( - Size, MaxLoadSize, MaxNumLoads, OverlappingNumLoadsNonOneByte); + Size, MaxLoadSize, Options.MaxNumLoads, OverlappingNumLoadsNonOneByte); if (!OverlappingLoads.empty() && (LoadSequence.empty() || OverlappingLoads.size() < LoadSequence.size())) { @@ -234,7 +234,7 @@ MemCmpExpansion::MemCmpExpansion( NumLoadsNonOneByte = OverlappingNumLoadsNonOneByte; } } - assert(LoadSequence.size() <= MaxNumLoads && "broken invariant"); + assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant"); } unsigned MemCmpExpansion::getNumBlocks() { @@ -316,7 +316,7 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex, assert(LoadIndex < getNumLoads() && "getCompareLoadPairs() called with no remaining loads"); std::vector<Value *> XorList, OrList; - Value *Diff; + Value *Diff = nullptr; const unsigned NumLoads = std::min(getNumLoads() - LoadIndex, NumLoadsPerBlockForZeroCmp); @@ -393,6 +393,8 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex, while (OrList.size() != 1) { OrList = pairWiseOr(OrList); } + + assert(Diff && "Failed to find comparison diff"); Cmp = Builder.CreateICmpNE(OrList[0], ConstantInt::get(Diff->getType(), 0)); } @@ -722,7 +724,7 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, NumMemCmpCalls++; // Early exit from expansion if -Oz. - if (CI->getFunction()->optForMinSize()) + if (CI->getFunction()->hasMinSize()) return false; // Early exit from expansion if size is not a constant. @@ -739,18 +741,21 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, // TTI call to check if target would like to expand memcmp. Also, get the // available load sizes. const bool IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI); - const auto *const Options = TTI->enableMemCmpExpansion(IsUsedForZeroCmp); + auto Options = TTI->enableMemCmpExpansion(CI->getFunction()->hasOptSize(), + IsUsedForZeroCmp); if (!Options) return false; - const unsigned MaxNumLoads = - TLI->getMaxExpandSizeMemcmp(CI->getFunction()->optForSize()); + if (MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences()) + Options.NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock; + + if (CI->getFunction()->hasOptSize() && + MaxLoadsPerMemcmpOptSize.getNumOccurrences()) + Options.MaxNumLoads = MaxLoadsPerMemcmpOptSize; - unsigned NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences() - ? MemCmpEqZeroNumLoadsPerBlock - : TLI->getMemcmpEqZeroLoadsPerBlock(); + if (!CI->getFunction()->hasOptSize() && MaxLoadsPerMemcmp.getNumOccurrences()) + Options.MaxNumLoads = MaxLoadsPerMemcmp; - MemCmpExpansion Expansion(CI, SizeVal, *Options, MaxNumLoads, - IsUsedForZeroCmp, NumLoadsPerBlock, *DL); + MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL); // Don't expand if this will require more loads than desired by the target. if (Expansion.getNumLoads() == 0) { @@ -824,7 +829,8 @@ bool ExpandMemCmpPass::runOnBlock( } LibFunc Func; if (TLI->getLibFunc(ImmutableCallSite(CI), Func) && - Func == LibFunc_memcmp && expandMemCmp(CI, TTI, TL, &DL)) { + (Func == LibFunc_memcmp || Func == LibFunc_bcmp) && + expandMemCmp(CI, TTI, TL, &DL)) { return true; } } diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp index f2a2bcbb94b1..0ab70aff7dc4 100644 --- a/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -1,9 +1,8 @@ //===-- ExpandPostRAPseudos.cpp - Pseudo instruction expansion pass -------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/ExpandReductions.cpp b/lib/CodeGen/ExpandReductions.cpp index 7552ba8cd85d..1069a2423b8b 100644 --- a/lib/CodeGen/ExpandReductions.cpp +++ b/lib/CodeGen/ExpandReductions.cpp @@ -1,9 +1,8 @@ //===--- ExpandReductions.cpp - Expand experimental reduction intrinsics --===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -30,9 +29,9 @@ namespace { unsigned getOpcode(Intrinsic::ID ID) { switch (ID) { - case Intrinsic::experimental_vector_reduce_fadd: + case Intrinsic::experimental_vector_reduce_v2_fadd: return Instruction::FAdd; - case Intrinsic::experimental_vector_reduce_fmul: + case Intrinsic::experimental_vector_reduce_v2_fmul: return Instruction::FMul; case Intrinsic::experimental_vector_reduce_add: return Instruction::Add; @@ -84,22 +83,33 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) { Worklist.push_back(II); for (auto *II : Worklist) { + if (!TTI->shouldExpandReduction(II)) + continue; + + FastMathFlags FMF = + isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{}; + Intrinsic::ID ID = II->getIntrinsicID(); + RecurrenceDescriptor::MinMaxRecurrenceKind MRK = getMRK(ID); + + Value *Rdx = nullptr; IRBuilder<> Builder(II); - bool IsOrdered = false; - Value *Acc = nullptr; - Value *Vec = nullptr; - auto ID = II->getIntrinsicID(); - auto MRK = RecurrenceDescriptor::MRK_Invalid; + IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); + Builder.setFastMathFlags(FMF); switch (ID) { - case Intrinsic::experimental_vector_reduce_fadd: - case Intrinsic::experimental_vector_reduce_fmul: + case Intrinsic::experimental_vector_reduce_v2_fadd: + case Intrinsic::experimental_vector_reduce_v2_fmul: { // FMFs must be attached to the call, otherwise it's an ordered reduction // and it can't be handled by generating a shuffle sequence. - if (!II->getFastMathFlags().isFast()) - IsOrdered = true; - Acc = II->getArgOperand(0); - Vec = II->getArgOperand(1); - break; + Value *Acc = II->getArgOperand(0); + Value *Vec = II->getArgOperand(1); + if (!FMF.allowReassoc()) + Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK); + else { + Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK); + Rdx = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(ID), + Acc, Rdx, "bin.rdx"); + } + } break; case Intrinsic::experimental_vector_reduce_add: case Intrinsic::experimental_vector_reduce_mul: case Intrinsic::experimental_vector_reduce_and: @@ -110,18 +120,13 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) { case Intrinsic::experimental_vector_reduce_umax: case Intrinsic::experimental_vector_reduce_umin: case Intrinsic::experimental_vector_reduce_fmax: - case Intrinsic::experimental_vector_reduce_fmin: - Vec = II->getArgOperand(0); - MRK = getMRK(ID); - break; + case Intrinsic::experimental_vector_reduce_fmin: { + Value *Vec = II->getArgOperand(0); + Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK); + } break; default: continue; } - if (!TTI->shouldExpandReduction(II)) - continue; - Value *Rdx = - IsOrdered ? getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK) - : getShuffleReduction(Builder, Vec, getOpcode(ID), MRK); II->replaceAllUsesWith(Rdx); II->eraseFromParent(); Changed = true; diff --git a/lib/CodeGen/FEntryInserter.cpp b/lib/CodeGen/FEntryInserter.cpp index 4ddf9f92836c..a122f490884e 100644 --- a/lib/CodeGen/FEntryInserter.cpp +++ b/lib/CodeGen/FEntryInserter.cpp @@ -1,9 +1,8 @@ //===-- FEntryInsertion.cpp - Patchable prologues for LLVM -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/FaultMaps.cpp b/lib/CodeGen/FaultMaps.cpp index 361558a0e562..600f72d320eb 100644 --- a/lib/CodeGen/FaultMaps.cpp +++ b/lib/CodeGen/FaultMaps.cpp @@ -1,9 +1,8 @@ //===- FaultMaps.cpp ------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/FinalizeISel.cpp index ec586a2caea3..772d7f71bb37 100644 --- a/lib/CodeGen/ExpandISelPseudos.cpp +++ b/lib/CodeGen/FinalizeISel.cpp @@ -1,16 +1,16 @@ -//===-- llvm/CodeGen/ExpandISelPseudos.cpp ----------------------*- C++ -*-===// +//===-- llvm/CodeGen/FinalizeISel.cpp ---------------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // -// Expand Pseudo-instructions produced by ISel. These are usually to allow -// the expansion to contain control flow, such as a conditional move -// implemented with a conditional branch and a phi, or an atomic operation -// implemented with a loop. +/// This pass expands Pseudo-instructions produced by ISel, fixes register +/// reservations and may do machine frame information adjustments. +/// The pseudo instructions are used to allow the expansion to contain control +/// flow, such as a conditional move implemented with a conditional branch and a +/// phi, or an atomic operation implemented with a loop. // //===----------------------------------------------------------------------===// @@ -22,13 +22,13 @@ #include "llvm/Support/Debug.h" using namespace llvm; -#define DEBUG_TYPE "expand-isel-pseudos" +#define DEBUG_TYPE "finalize-isel" namespace { - class ExpandISelPseudos : public MachineFunctionPass { + class FinalizeISel : public MachineFunctionPass { public: static char ID; // Pass identification, replacement for typeid - ExpandISelPseudos() : MachineFunctionPass(ID) {} + FinalizeISel() : MachineFunctionPass(ID) {} private: bool runOnMachineFunction(MachineFunction &MF) override; @@ -39,12 +39,12 @@ namespace { }; } // end anonymous namespace -char ExpandISelPseudos::ID = 0; -char &llvm::ExpandISelPseudosID = ExpandISelPseudos::ID; -INITIALIZE_PASS(ExpandISelPseudos, DEBUG_TYPE, - "Expand ISel Pseudo-instructions", false, false) +char FinalizeISel::ID = 0; +char &llvm::FinalizeISelID = FinalizeISel::ID; +INITIALIZE_PASS(FinalizeISel, DEBUG_TYPE, + "Finalize ISel and expand pseudo-instructions", false, false) -bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) { +bool FinalizeISel::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; const TargetLowering *TLI = MF.getSubtarget().getTargetLowering(); @@ -70,5 +70,7 @@ bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) { } } + TLI->finalizeLowering(MF); + return Changed; } diff --git a/lib/CodeGen/FuncletLayout.cpp b/lib/CodeGen/FuncletLayout.cpp index 581cd423f2d4..75f6d0b8f0bf 100644 --- a/lib/CodeGen/FuncletLayout.cpp +++ b/lib/CodeGen/FuncletLayout.cpp @@ -1,9 +1,8 @@ //===-- FuncletLayout.cpp - Contiguously lay out funclets -----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp index 1c80556dfef5..9c53550eaa9d 100644 --- a/lib/CodeGen/GCMetadata.cpp +++ b/lib/CodeGen/GCMetadata.cpp @@ -1,9 +1,8 @@ //===-- GCMetadata.cpp - Garbage collector metadata -----------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/GCMetadataPrinter.cpp b/lib/CodeGen/GCMetadataPrinter.cpp index bc7beb6f6c2d..500dba9aea37 100644 --- a/lib/CodeGen/GCMetadataPrinter.cpp +++ b/lib/CodeGen/GCMetadataPrinter.cpp @@ -1,9 +1,8 @@ //===- GCMetadataPrinter.cpp - Garbage collection infrastructure ----------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/GCRootLowering.cpp b/lib/CodeGen/GCRootLowering.cpp index e8ccd84b0b93..90571d090bfb 100644 --- a/lib/CodeGen/GCRootLowering.cpp +++ b/lib/CodeGen/GCRootLowering.cpp @@ -1,9 +1,8 @@ //===-- GCRootLowering.cpp - Garbage collection infrastructure ------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -214,7 +213,7 @@ bool LowerIntrinsics::DoLowering(Function &F, GCStrategy &S) { } case Intrinsic::gcread: { // Replace a read barrier with a simple load. - Value *Ld = new LoadInst(CI->getArgOperand(1), "", CI); + Value *Ld = new LoadInst(CI->getType(), CI->getArgOperand(1), "", CI); Ld->takeName(CI); CI->replaceAllUsesWith(Ld); CI->eraseFromParent(); diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp index 6be4c16c6301..43d06b0f82e9 100644 --- a/lib/CodeGen/GCStrategy.cpp +++ b/lib/CodeGen/GCStrategy.cpp @@ -1,9 +1,8 @@ //===- GCStrategy.cpp - Garbage Collector Description ---------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/GlobalISel/CSEInfo.cpp b/lib/CodeGen/GlobalISel/CSEInfo.cpp index 89c525c5ba15..4518dbee1a9f 100644 --- a/lib/CodeGen/GlobalISel/CSEInfo.cpp +++ b/lib/CodeGen/GlobalISel/CSEInfo.cpp @@ -1,9 +1,8 @@ //===- CSEInfo.cpp ------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -28,8 +27,8 @@ void UniqueMachineInstr::Profile(FoldingSetNodeID &ID) { } /// ----------------------------------------- -/// --------- CSEConfig ---------- /// -bool CSEConfig::shouldCSEOpc(unsigned Opc) { +/// --------- CSEConfigFull ---------- /// +bool CSEConfigFull::shouldCSEOpc(unsigned Opc) { switch (Opc) { default: break; @@ -61,6 +60,17 @@ bool CSEConfig::shouldCSEOpc(unsigned Opc) { bool CSEConfigConstantOnly::shouldCSEOpc(unsigned Opc) { return Opc == TargetOpcode::G_CONSTANT; } + +std::unique_ptr<CSEConfigBase> +llvm::getStandardCSEConfigForOpt(CodeGenOpt::Level Level) { + std::unique_ptr<CSEConfigBase> Config; + if (Level == CodeGenOpt::None) + Config = make_unique<CSEConfigConstantOnly>(); + else + Config = make_unique<CSEConfigFull>(); + return Config; +} + /// ----------------------------------------- /// -------- GISelCSEInfo -------------// @@ -139,7 +149,7 @@ MachineInstr *GISelCSEInfo::getMachineInstrIfExists(FoldingSetNodeID &ID, void *&InsertPos) { handleRecordedInsts(); if (auto *Inst = getNodeIfExists(ID, MBB, InsertPos)) { - LLVM_DEBUG(dbgs() << "CSEInfo: Found Instr " << *Inst->MI << "\n";); + LLVM_DEBUG(dbgs() << "CSEInfo::Found Instr " << *Inst->MI;); return const_cast<MachineInstr *>(Inst->MI); } return nullptr; @@ -158,14 +168,14 @@ void GISelCSEInfo::countOpcodeHit(unsigned Opc) { void GISelCSEInfo::recordNewInstruction(MachineInstr *MI) { if (shouldCSE(MI->getOpcode())) { TemporaryInsts.insert(MI); - LLVM_DEBUG(dbgs() << "CSEInfo: Recording new MI" << *MI << "\n";); + LLVM_DEBUG(dbgs() << "CSEInfo::Recording new MI " << *MI); } } void GISelCSEInfo::handleRecordedInst(MachineInstr *MI) { assert(shouldCSE(MI->getOpcode()) && "Invalid instruction for CSE"); auto *UMI = InstrMapping.lookup(MI); - LLVM_DEBUG(dbgs() << "CSEInfo: Handling recorded MI" << *MI << "\n";); + LLVM_DEBUG(dbgs() << "CSEInfo::Handling recorded MI " << *MI); if (UMI) { // Invalidate this MI. invalidateUniqueMachineInstr(UMI); @@ -224,14 +234,14 @@ void GISelCSEInfo::analyze(MachineFunction &MF) { for (MachineInstr &MI : MBB) { if (!shouldCSE(MI.getOpcode())) continue; - LLVM_DEBUG(dbgs() << "CSEInfo::Add MI: " << MI << "\n";); + LLVM_DEBUG(dbgs() << "CSEInfo::Add MI: " << MI); insertInstr(&MI); } } } void GISelCSEInfo::releaseMemory() { - // print(); + print(); CSEMap.clear(); InstrMapping.clear(); UniqueInstrAllocator.Reset(); @@ -245,11 +255,11 @@ void GISelCSEInfo::releaseMemory() { } void GISelCSEInfo::print() { -#ifndef NDEBUG - for (auto &It : OpcodeHitTable) { - dbgs() << "CSE Count for Opc " << It.first << " : " << It.second << "\n"; - }; -#endif + LLVM_DEBUG(for (auto &It + : OpcodeHitTable) { + dbgs() << "CSEInfo::CSE Hit for Opc " << It.first << " : " << It.second + << "\n"; + };); } /// ----------------------------------------- // ---- Profiling methods for FoldingSetNode --- // @@ -349,8 +359,9 @@ const GISelInstProfileBuilder &GISelInstProfileBuilder::addNodeIDMachineOperand( return *this; } -GISelCSEInfo &GISelCSEAnalysisWrapper::get(std::unique_ptr<CSEConfig> CSEOpt, - bool Recompute) { +GISelCSEInfo & +GISelCSEAnalysisWrapper::get(std::unique_ptr<CSEConfigBase> CSEOpt, + bool Recompute) { if (!AlreadyComputed || Recompute) { Info.setCSEConfig(std::move(CSEOpt)); Info.analyze(*MF); diff --git a/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp index 863efe0c3e34..461bc6038c2c 100644 --- a/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp +++ b/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp @@ -1,9 +1,8 @@ //===-- llvm/CodeGen/GlobalISel/CSEMIRBuilder.cpp - MIBuilder--*- C++ -*-==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file @@ -40,6 +39,7 @@ CSEMIRBuilder::getDominatingInstrForID(FoldingSetNodeID &ID, MachineInstr *MI = CSEInfo->getMachineInstrIfExists(ID, CurMBB, NodeInsertPos); if (MI) { + CSEInfo->countOpcodeHit(MI->getOpcode()); auto CurrPos = getInsertPt(); if (!dominates(MI, CurrPos)) CurMBB->splice(CurrPos, CurMBB, MI); @@ -195,6 +195,12 @@ MachineInstrBuilder CSEMIRBuilder::buildConstant(const DstOp &Res, constexpr unsigned Opc = TargetOpcode::G_CONSTANT; if (!canPerformCSEForOpc(Opc)) return MachineIRBuilder::buildConstant(Res, Val); + + // For vectors, CSE the element only for now. + LLT Ty = Res.getLLTTy(*getMRI()); + if (Ty.isVector()) + return buildSplatVector(Res, buildConstant(Ty.getElementType(), Val)); + FoldingSetNodeID ID; GISelInstProfileBuilder ProfBuilder(ID, *getMRI()); void *InsertPos = nullptr; @@ -206,6 +212,7 @@ MachineInstrBuilder CSEMIRBuilder::buildConstant(const DstOp &Res, // Handle generating copies here. return generateCopiesIfRequired({Res}, MIB); } + MachineInstrBuilder NewMIB = MachineIRBuilder::buildConstant(Res, Val); return memoizeMI(NewMIB, InsertPos); } @@ -215,6 +222,12 @@ MachineInstrBuilder CSEMIRBuilder::buildFConstant(const DstOp &Res, constexpr unsigned Opc = TargetOpcode::G_FCONSTANT; if (!canPerformCSEForOpc(Opc)) return MachineIRBuilder::buildFConstant(Res, Val); + + // For vectors, CSE the element only for now. + LLT Ty = Res.getLLTTy(*getMRI()); + if (Ty.isVector()) + return buildSplatVector(Res, buildFConstant(Ty.getElementType(), Val)); + FoldingSetNodeID ID; GISelInstProfileBuilder ProfBuilder(ID, *getMRI()); void *InsertPos = nullptr; diff --git a/lib/CodeGen/GlobalISel/CallLowering.cpp b/lib/CodeGen/GlobalISel/CallLowering.cpp index 724ecedf3b3f..a5d8205a34a8 100644 --- a/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -1,9 +1,8 @@ //===-- lib/CodeGen/GlobalISel/CallLowering.cpp - Call lowering -----------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// @@ -13,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/CallLowering.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -21,13 +21,17 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" +#define DEBUG_TYPE "call-lowering" + using namespace llvm; void CallLowering::anchor() {} -bool CallLowering::lowerCall( - MachineIRBuilder &MIRBuilder, ImmutableCallSite CS, unsigned ResReg, - ArrayRef<unsigned> ArgRegs, std::function<unsigned()> GetCalleeReg) const { +bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, ImmutableCallSite CS, + ArrayRef<Register> ResRegs, + ArrayRef<ArrayRef<Register>> ArgRegs, + Register SwiftErrorVReg, + std::function<unsigned()> GetCalleeReg) const { auto &DL = CS.getParent()->getParent()->getParent()->getDataLayout(); // First step is to marshall all the function's parameters into the correct @@ -40,8 +44,8 @@ bool CallLowering::lowerCall( ArgInfo OrigArg{ArgRegs[i], Arg->getType(), ISD::ArgFlagsTy{}, i < NumFixedArgs}; setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CS); - // We don't currently support swifterror or swiftself args. - if (OrigArg.Flags.isSwiftError() || OrigArg.Flags.isSwiftSelf()) + // We don't currently support swiftself args. + if (OrigArg.Flags.isSwiftSelf()) return false; OrigArgs.push_back(OrigArg); ++i; @@ -53,11 +57,12 @@ bool CallLowering::lowerCall( else Callee = MachineOperand::CreateReg(GetCalleeReg(), false); - ArgInfo OrigRet{ResReg, CS.getType(), ISD::ArgFlagsTy{}}; + ArgInfo OrigRet{ResRegs, CS.getType(), ISD::ArgFlagsTy{}}; if (!OrigRet.Ty->isVoidTy()) setArgFlags(OrigRet, AttributeList::ReturnIndex, DL, CS); - return lowerCall(MIRBuilder, CS.getCallingConv(), Callee, OrigRet, OrigArgs); + return lowerCall(MIRBuilder, CS.getCallingConv(), Callee, OrigRet, OrigArgs, + SwiftErrorVReg); } template <typename FuncInfoTy> @@ -84,7 +89,10 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx, if (Arg.Flags.isByVal() || Arg.Flags.isInAlloca()) { Type *ElementTy = cast<PointerType>(Arg.Ty)->getElementType(); - Arg.Flags.setByValSize(DL.getTypeAllocSize(ElementTy)); + + auto Ty = Attrs.getAttribute(OpIdx, Attribute::ByVal).getValueAsType(); + Arg.Flags.setByValSize(DL.getTypeAllocSize(Ty ? Ty : ElementTy)); + // For ByVal, alignment should be passed from FE. BE will guess if // this info is not there but there are cases it cannot get right. unsigned FrameAlign; @@ -109,21 +117,78 @@ CallLowering::setArgFlags<CallInst>(CallLowering::ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL, const CallInst &FuncInfo) const; +Register CallLowering::packRegs(ArrayRef<Register> SrcRegs, Type *PackedTy, + MachineIRBuilder &MIRBuilder) const { + assert(SrcRegs.size() > 1 && "Nothing to pack"); + + const DataLayout &DL = MIRBuilder.getMF().getDataLayout(); + MachineRegisterInfo *MRI = MIRBuilder.getMRI(); + + LLT PackedLLT = getLLTForType(*PackedTy, DL); + + SmallVector<LLT, 8> LLTs; + SmallVector<uint64_t, 8> Offsets; + computeValueLLTs(DL, *PackedTy, LLTs, &Offsets); + assert(LLTs.size() == SrcRegs.size() && "Regs / types mismatch"); + + Register Dst = MRI->createGenericVirtualRegister(PackedLLT); + MIRBuilder.buildUndef(Dst); + for (unsigned i = 0; i < SrcRegs.size(); ++i) { + Register NewDst = MRI->createGenericVirtualRegister(PackedLLT); + MIRBuilder.buildInsert(NewDst, Dst, SrcRegs[i], Offsets[i]); + Dst = NewDst; + } + + return Dst; +} + +void CallLowering::unpackRegs(ArrayRef<Register> DstRegs, Register SrcReg, + Type *PackedTy, + MachineIRBuilder &MIRBuilder) const { + assert(DstRegs.size() > 1 && "Nothing to unpack"); + + const DataLayout &DL = MIRBuilder.getMF().getDataLayout(); + + SmallVector<LLT, 8> LLTs; + SmallVector<uint64_t, 8> Offsets; + computeValueLLTs(DL, *PackedTy, LLTs, &Offsets); + assert(LLTs.size() == DstRegs.size() && "Regs / types mismatch"); + + for (unsigned i = 0; i < DstRegs.size(); ++i) + MIRBuilder.buildExtract(DstRegs[i], SrcReg, Offsets[i]); +} + bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder, ArrayRef<ArgInfo> Args, ValueHandler &Handler) const { MachineFunction &MF = MIRBuilder.getMF(); const Function &F = MF.getFunction(); - const DataLayout &DL = F.getParent()->getDataLayout(); - SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext()); + return handleAssignments(CCInfo, ArgLocs, MIRBuilder, Args, Handler); +} + +bool CallLowering::handleAssignments(CCState &CCInfo, + SmallVectorImpl<CCValAssign> &ArgLocs, + MachineIRBuilder &MIRBuilder, + ArrayRef<ArgInfo> Args, + ValueHandler &Handler) const { + MachineFunction &MF = MIRBuilder.getMF(); + const Function &F = MF.getFunction(); + const DataLayout &DL = F.getParent()->getDataLayout(); unsigned NumArgs = Args.size(); for (unsigned i = 0; i != NumArgs; ++i) { MVT CurVT = MVT::getVT(Args[i].Ty); - if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], CCInfo)) - return false; + if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], CCInfo)) { + // Try to use the register type if we couldn't assign the VT. + if (!Handler.isArgumentHandler() || !CurVT.isValid()) + return false; + CurVT = TLI->getRegisterTypeForCallingConv( + F.getContext(), F.getCallingConv(), EVT(CurVT)); + if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], CCInfo)) + return false; + } } for (unsigned i = 0, e = Args.size(), j = 0; i != e; ++i, ++j) { @@ -137,16 +202,49 @@ bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder, continue; } - if (VA.isRegLoc()) - Handler.assignValueToReg(Args[i].Reg, VA.getLocReg(), VA); - else if (VA.isMemLoc()) { - unsigned Size = VA.getValVT() == MVT::iPTR - ? DL.getPointerSize() - : alignTo(VA.getValVT().getSizeInBits(), 8) / 8; + assert(Args[i].Regs.size() == 1 && + "Can't handle multiple virtual regs yet"); + + // FIXME: Pack registers if we have more than one. + Register ArgReg = Args[i].Regs[0]; + + if (VA.isRegLoc()) { + MVT OrigVT = MVT::getVT(Args[i].Ty); + MVT VAVT = VA.getValVT(); + if (Handler.isArgumentHandler() && VAVT != OrigVT) { + if (VAVT.getSizeInBits() < OrigVT.getSizeInBits()) + return false; // Can't handle this type of arg yet. + const LLT VATy(VAVT); + Register NewReg = + MIRBuilder.getMRI()->createGenericVirtualRegister(VATy); + Handler.assignValueToReg(NewReg, VA.getLocReg(), VA); + // If it's a vector type, we either need to truncate the elements + // or do an unmerge to get the lower block of elements. + if (VATy.isVector() && + VATy.getNumElements() > OrigVT.getVectorNumElements()) { + const LLT OrigTy(OrigVT); + // Just handle the case where the VA type is 2 * original type. + if (VATy.getNumElements() != OrigVT.getVectorNumElements() * 2) { + LLVM_DEBUG(dbgs() + << "Incoming promoted vector arg has too many elts"); + return false; + } + auto Unmerge = MIRBuilder.buildUnmerge({OrigTy, OrigTy}, {NewReg}); + MIRBuilder.buildCopy(ArgReg, Unmerge.getReg(0)); + } else { + MIRBuilder.buildTrunc(ArgReg, {NewReg}).getReg(0); + } + } else { + Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA); + } + } else if (VA.isMemLoc()) { + MVT VT = MVT::getVT(Args[i].Ty); + unsigned Size = VT == MVT::iPTR ? DL.getPointerSize() + : alignTo(VT.getSizeInBits(), 8) / 8; unsigned Offset = VA.getLocMemOffset(); MachinePointerInfo MPO; - unsigned StackAddr = Handler.getStackAddress(Size, Offset, MPO); - Handler.assignValueToAddress(Args[i].Reg, StackAddr, Size, MPO, VA); + Register StackAddr = Handler.getStackAddress(Size, Offset, MPO); + Handler.assignValueToAddress(ArgReg, StackAddr, Size, MPO, VA); } else { // FIXME: Support byvals and other weirdness return false; @@ -155,9 +253,11 @@ bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder, return true; } -unsigned CallLowering::ValueHandler::extendRegister(unsigned ValReg, +Register CallLowering::ValueHandler::extendRegister(Register ValReg, CCValAssign &VA) { LLT LocTy{VA.getLocVT()}; + if (LocTy.getSizeInBits() == MRI.getType(ValReg).getSizeInBits()) + return ValReg; switch (VA.getLocInfo()) { default: break; case CCValAssign::Full: @@ -170,12 +270,12 @@ unsigned CallLowering::ValueHandler::extendRegister(unsigned ValReg, return MIB->getOperand(0).getReg(); } case CCValAssign::SExt: { - unsigned NewReg = MRI.createGenericVirtualRegister(LocTy); + Register NewReg = MRI.createGenericVirtualRegister(LocTy); MIRBuilder.buildSExt(NewReg, ValReg); return NewReg; } case CCValAssign::ZExt: { - unsigned NewReg = MRI.createGenericVirtualRegister(LocTy); + Register NewReg = MRI.createGenericVirtualRegister(LocTy); MIRBuilder.buildZExt(NewReg, ValReg); return NewReg; } diff --git a/lib/CodeGen/GlobalISel/Combiner.cpp b/lib/CodeGen/GlobalISel/Combiner.cpp index 45b0e36fd7d9..31cb1dbbc9b5 100644 --- a/lib/CodeGen/GlobalISel/Combiner.cpp +++ b/lib/CodeGen/GlobalISel/Combiner.cpp @@ -1,9 +1,8 @@ //===-- lib/CodeGen/GlobalISel/Combiner.cpp -------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -51,7 +50,7 @@ public: } void erasingInstr(MachineInstr &MI) override { - LLVM_DEBUG(dbgs() << "Erased: " << MI << "\n"); + LLVM_DEBUG(dbgs() << "Erasing: " << MI << "\n"); WorkList.remove(&MI); } void createdInstr(MachineInstr &MI) override { @@ -130,9 +129,10 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF, CurMI->eraseFromParentAndMarkDBGValuesForRemoval(); continue; } - WorkList.insert(CurMI); + WorkList.deferred_insert(CurMI); } } + WorkList.finalize(); // Main Loop. Process the instructions here. while (!WorkList.empty()) { MachineInstr *CurrInst = WorkList.pop_back_val(); diff --git a/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/lib/CodeGen/GlobalISel/CombinerHelper.cpp index b1c5670a6dec..9cbf3dd83ff1 100644 --- a/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1,9 +1,8 @@ //===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" @@ -23,8 +22,8 @@ CombinerHelper::CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B) : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer) {} -void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, unsigned FromReg, - unsigned ToReg) const { +void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, + Register ToReg) const { Observer.changingAllUsesOfReg(MRI, FromReg); if (MRI.constrainRegAttrs(ToReg, FromReg)) @@ -37,7 +36,7 @@ void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, unsigned FromReg, void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, - unsigned ToReg) const { + Register ToReg) const { assert(FromRegOp.getParent() && "Expected an operand in an MI"); Observer.changingInstr(*FromRegOp.getParent()); @@ -47,6 +46,13 @@ void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI, } bool CombinerHelper::tryCombineCopy(MachineInstr &MI) { + if (matchCombineCopy(MI)) { + applyCombineCopy(MI); + return true; + } + return false; +} +bool CombinerHelper::matchCombineCopy(MachineInstr &MI) { if (MI.getOpcode() != TargetOpcode::COPY) return false; unsigned DstReg = MI.getOperand(0).getReg(); @@ -55,20 +61,18 @@ bool CombinerHelper::tryCombineCopy(MachineInstr &MI) { LLT SrcTy = MRI.getType(SrcReg); // Simple Copy Propagation. // a(sx) = COPY b(sx) -> Replace all uses of a with b. - if (DstTy.isValid() && SrcTy.isValid() && DstTy == SrcTy) { - MI.eraseFromParent(); - replaceRegWith(MRI, DstReg, SrcReg); + if (DstTy.isValid() && SrcTy.isValid() && DstTy == SrcTy) return true; - } return false; } +void CombinerHelper::applyCombineCopy(MachineInstr &MI) { + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned SrcReg = MI.getOperand(1).getReg(); + MI.eraseFromParent(); + replaceRegWith(MRI, DstReg, SrcReg); +} namespace { -struct PreferredTuple { - LLT Ty; // The result type of the extend. - unsigned ExtendOpcode; // G_ANYEXT/G_SEXT/G_ZEXT - MachineInstr *MI; -}; /// Select a preference between two uses. CurrentUse is the current preference /// while *ForCandidate is attributes of the candidate under consideration. @@ -127,7 +131,8 @@ PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse, /// want to try harder to find a dominating block. static void InsertInsnsWithoutSideEffectsBeforeUse( MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO, - std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator)> + std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator, + MachineOperand &UseMO)> Inserter) { MachineInstr &UseMI = *UseMO.getParent(); @@ -143,26 +148,26 @@ static void InsertInsnsWithoutSideEffectsBeforeUse( // the def instead of at the start of the block. if (InsertBB == DefMI.getParent()) { MachineBasicBlock::iterator InsertPt = &DefMI; - Inserter(InsertBB, std::next(InsertPt)); + Inserter(InsertBB, std::next(InsertPt), UseMO); return; } // Otherwise we want the start of the BB - Inserter(InsertBB, InsertBB->getFirstNonPHI()); + Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO); } } // end anonymous namespace bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) { - struct InsertionPoint { - MachineOperand *UseMO; - MachineBasicBlock *InsertIntoBB; - MachineBasicBlock::iterator InsertBefore; - InsertionPoint(MachineOperand *UseMO, MachineBasicBlock *InsertIntoBB, - MachineBasicBlock::iterator InsertBefore) - : UseMO(UseMO), InsertIntoBB(InsertIntoBB), InsertBefore(InsertBefore) { - } - }; + PreferredTuple Preferred; + if (matchCombineExtendingLoads(MI, Preferred)) { + applyCombineExtendingLoads(MI, Preferred); + return true; + } + return false; +} +bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI, + PreferredTuple &Preferred) { // We match the loads and follow the uses to the extend instead of matching // the extends and following the def to the load. This is because the load // must remain in the same position for correctness (unless we also add code @@ -182,6 +187,19 @@ bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) { if (!LoadValueTy.isScalar()) return false; + // Most architectures are going to legalize <s8 loads into at least a 1 byte + // load, and the MMOs can only describe memory accesses in multiples of bytes. + // If we try to perform extload combining on those, we can end up with + // %a(s8) = extload %ptr (load 1 byte from %ptr) + // ... which is an illegal extload instruction. + if (LoadValueTy.getSizeInBits() < 8) + return false; + + // For non power-of-2 types, they will very likely be legalized into multiple + // loads. Don't bother trying to match them into extending loads. + if (!isPowerOf2_32(LoadValueTy.getSizeInBits())) + return false; + // Find the preferred type aside from the any-extends (unless it's the only // one) and non-extending ops. We'll emit an extending load to that type and // and emit a variant of (extend (trunc X)) for the others according to the @@ -192,7 +210,7 @@ bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) { : MI.getOpcode() == TargetOpcode::G_SEXTLOAD ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; - PreferredTuple Preferred = {LLT(), PreferredOpcode, nullptr}; + Preferred = {LLT(), PreferredOpcode, nullptr}; for (auto &UseMI : MRI.use_instructions(LoadValue.getReg())) { if (UseMI.getOpcode() == TargetOpcode::G_SEXT || UseMI.getOpcode() == TargetOpcode::G_ZEXT || @@ -211,9 +229,35 @@ bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) { assert(Preferred.Ty != LoadValueTy && "Extending to same type?"); LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI); + return true; +} +void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI, + PreferredTuple &Preferred) { // Rewrite the load to the chosen extending load. - unsigned ChosenDstReg = Preferred.MI->getOperand(0).getReg(); + Register ChosenDstReg = Preferred.MI->getOperand(0).getReg(); + + // Inserter to insert a truncate back to the original type at a given point + // with some basic CSE to limit truncate duplication to one per BB. + DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns; + auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB, + MachineBasicBlock::iterator InsertBefore, + MachineOperand &UseMO) { + MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB); + if (PreviouslyEmitted) { + Observer.changingInstr(*UseMO.getParent()); + UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg()); + Observer.changedInstr(*UseMO.getParent()); + return; + } + + Builder.setInsertPt(*InsertIntoBB, InsertBefore); + Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg()); + MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg); + EmittedInsns[InsertIntoBB] = NewMI; + replaceRegOpWith(MRI, UseMO, NewDstReg); + }; + Observer.changingInstr(MI); MI.setDesc( Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT @@ -223,10 +267,13 @@ bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) { : TargetOpcode::G_LOAD)); // Rewrite all the uses to fix up the types. - SmallVector<MachineInstr *, 1> ScheduleForErase; - SmallVector<InsertionPoint, 4> ScheduleForInsert; - for (auto &UseMO : MRI.use_operands(LoadValue.getReg())) { - MachineInstr *UseMI = UseMO.getParent(); + auto &LoadValue = MI.getOperand(0); + SmallVector<MachineOperand *, 4> Uses; + for (auto &UseMO : MRI.use_operands(LoadValue.getReg())) + Uses.push_back(&UseMO); + + for (auto *UseMO : Uses) { + MachineInstr *UseMI = UseMO->getParent(); // If the extend is compatible with the preferred extend then we should fix // up the type and extend so that it uses the preferred use. @@ -247,7 +294,8 @@ bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) { // %2:_(s32) = G_SEXTLOAD ... // ... = ... %2(s32) replaceRegWith(MRI, UseDstReg, ChosenDstReg); - ScheduleForErase.push_back(UseMO.getParent()); + Observer.erasingInstr(*UseMO->getParent()); + UseMO->getParent()->eraseFromParent(); } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) { // If the preferred size is smaller, then keep the extend but extend // from the result of the extending load. For example: @@ -272,59 +320,87 @@ bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) { // %4:_(s8) = G_TRUNC %2:_(s32) // %3:_(s64) = G_ZEXT %2:_(s8) // ... = ... %3(s64) - InsertInsnsWithoutSideEffectsBeforeUse( - Builder, MI, UseMO, - [&](MachineBasicBlock *InsertIntoBB, - MachineBasicBlock::iterator InsertBefore) { - ScheduleForInsert.emplace_back(&UseMO, InsertIntoBB, InsertBefore); - }); + InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, + InsertTruncAt); } continue; } // The use is (one of) the uses of the preferred use we chose earlier. // We're going to update the load to def this value later so just erase // the old extend. - ScheduleForErase.push_back(UseMO.getParent()); + Observer.erasingInstr(*UseMO->getParent()); + UseMO->getParent()->eraseFromParent(); continue; } // The use isn't an extend. Truncate back to the type we originally loaded. // This is free on many targets. - InsertInsnsWithoutSideEffectsBeforeUse( - Builder, MI, UseMO, - [&](MachineBasicBlock *InsertIntoBB, - MachineBasicBlock::iterator InsertBefore) { - ScheduleForInsert.emplace_back(&UseMO, InsertIntoBB, InsertBefore); - }); + InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt); } - DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns; - for (auto &InsertionInfo : ScheduleForInsert) { - MachineOperand *UseMO = InsertionInfo.UseMO; - MachineBasicBlock *InsertIntoBB = InsertionInfo.InsertIntoBB; - MachineBasicBlock::iterator InsertBefore = InsertionInfo.InsertBefore; - - MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB); - if (PreviouslyEmitted) { - Observer.changingInstr(*UseMO->getParent()); - UseMO->setReg(PreviouslyEmitted->getOperand(0).getReg()); - Observer.changedInstr(*UseMO->getParent()); - continue; - } - - Builder.setInsertPt(*InsertIntoBB, InsertBefore); - unsigned NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg()); - MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg); - EmittedInsns[InsertIntoBB] = NewMI; - replaceRegOpWith(MRI, *UseMO, NewDstReg); - } - for (auto &EraseMI : ScheduleForErase) { - Observer.erasingInstr(*EraseMI); - EraseMI->eraseFromParent(); - } MI.getOperand(0).setReg(ChosenDstReg); Observer.changedInstr(MI); +} + +bool CombinerHelper::matchCombineBr(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_BR && "Expected a G_BR"); + // Try to match the following: + // bb1: + // %c(s32) = G_ICMP pred, %a, %b + // %c1(s1) = G_TRUNC %c(s32) + // G_BRCOND %c1, %bb2 + // G_BR %bb3 + // bb2: + // ... + // bb3: + + // The above pattern does not have a fall through to the successor bb2, always + // resulting in a branch no matter which path is taken. Here we try to find + // and replace that pattern with conditional branch to bb3 and otherwise + // fallthrough to bb2. + + MachineBasicBlock *MBB = MI.getParent(); + MachineBasicBlock::iterator BrIt(MI); + if (BrIt == MBB->begin()) + return false; + assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator"); + + MachineInstr *BrCond = &*std::prev(BrIt); + if (BrCond->getOpcode() != TargetOpcode::G_BRCOND) + return false; + // Check that the next block is the conditional branch target. + if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB())) + return false; + + MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg()); + if (!CmpMI || CmpMI->getOpcode() != TargetOpcode::G_ICMP || + !MRI.hasOneUse(CmpMI->getOperand(0).getReg())) + return false; + return true; +} + +bool CombinerHelper::tryCombineBr(MachineInstr &MI) { + if (!matchCombineBr(MI)) + return false; + MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB(); + MachineBasicBlock::iterator BrIt(MI); + MachineInstr *BrCond = &*std::prev(BrIt); + MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg()); + + CmpInst::Predicate InversePred = CmpInst::getInversePredicate( + (CmpInst::Predicate)CmpMI->getOperand(1).getPredicate()); + + // Invert the G_ICMP condition. + Observer.changingInstr(*CmpMI); + CmpMI->getOperand(1).setPredicate(InversePred); + Observer.changedInstr(*CmpMI); + + // Change the conditional branch target. + Observer.changingInstr(*BrCond); + BrCond->getOperand(1).setMBB(BrTarget); + Observer.changedInstr(*BrCond); + MI.eraseFromParent(); return true; } diff --git a/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp b/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp index c693acbbf10b..62b903c30b89 100644 --- a/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp +++ b/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp @@ -1,9 +1,8 @@ //===-- lib/CodeGen/GlobalISel/GISelChangeObserver.cpp --------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -27,6 +26,7 @@ void GISelChangeObserver::changingAllUsesOfReg( void GISelChangeObserver::finishedChangingAllUsesOfReg() { for (auto *ChangedMI : ChangingAllUsesOfReg) changedInstr(*ChangedMI); + ChangingAllUsesOfReg.clear(); } RAIIDelegateInstaller::RAIIDelegateInstaller(MachineFunction &MF, diff --git a/lib/CodeGen/GlobalISel/GlobalISel.cpp b/lib/CodeGen/GlobalISel/GlobalISel.cpp index 00c6a9d63158..e0391e6f6467 100644 --- a/lib/CodeGen/GlobalISel/GlobalISel.cpp +++ b/lib/CodeGen/GlobalISel/GlobalISel.cpp @@ -1,9 +1,8 @@ //===-- llvm/CodeGen/GlobalISel/GlobalIsel.cpp --- GlobalISel ----*- C++ -*-==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp index 95f6274aa068..6e99bdbd8264 100644 --- a/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1,9 +1,8 @@ //===- llvm/CodeGen/GlobalISel/IRTranslator.cpp - IRTranslator ---*- C++ -*-==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file @@ -16,8 +15,11 @@ #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/LowLevelType.h" @@ -106,9 +108,7 @@ static void reportTranslationError(MachineFunction &MF, ORE.emit(R); } -IRTranslator::IRTranslator() : MachineFunctionPass(ID) { - initializeIRTranslatorPass(*PassRegistry::getPassRegistry()); -} +IRTranslator::IRTranslator() : MachineFunctionPass(ID) { } #ifndef NDEBUG namespace { @@ -136,7 +136,11 @@ public: LLVM_DEBUG(dbgs() << "Checking DILocation from " << *CurrInst << " was copied to " << MI); #endif - assert(CurrInst->getDebugLoc() == MI.getDebugLoc() && + // We allow insts in the entry block to have a debug loc line of 0 because + // they could have originated from constants, and we don't want a jumpy + // debug experience. + assert((CurrInst->getDebugLoc() == MI.getDebugLoc() || + MI.getDebugLoc().getLine() == 0) && "Line info was not transferred to all instructions"); } }; @@ -152,36 +156,6 @@ void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } -static void computeValueLLTs(const DataLayout &DL, Type &Ty, - SmallVectorImpl<LLT> &ValueTys, - SmallVectorImpl<uint64_t> *Offsets = nullptr, - uint64_t StartingOffset = 0) { - // Given a struct type, recursively traverse the elements. - if (StructType *STy = dyn_cast<StructType>(&Ty)) { - const StructLayout *SL = DL.getStructLayout(STy); - for (unsigned I = 0, E = STy->getNumElements(); I != E; ++I) - computeValueLLTs(DL, *STy->getElementType(I), ValueTys, Offsets, - StartingOffset + SL->getElementOffset(I)); - return; - } - // Given an array type, recursively traverse the elements. - if (ArrayType *ATy = dyn_cast<ArrayType>(&Ty)) { - Type *EltTy = ATy->getElementType(); - uint64_t EltSize = DL.getTypeAllocSize(EltTy); - for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) - computeValueLLTs(DL, *EltTy, ValueTys, Offsets, - StartingOffset + i * EltSize); - return; - } - // Interpret void as zero return values. - if (Ty.isVoidTy()) - return; - // Base case: we can get an LLT for this LLVM IR type. - ValueTys.push_back(getLLTForType(Ty, DL)); - if (Offsets != nullptr) - Offsets->push_back(StartingOffset * 8); -} - IRTranslator::ValueToVRegInfo::VRegListT & IRTranslator::allocateVRegs(const Value &Val) { assert(!VMap.contains(Val) && "Value already allocated in VMap"); @@ -195,7 +169,7 @@ IRTranslator::allocateVRegs(const Value &Val) { return *Regs; } -ArrayRef<unsigned> IRTranslator::getOrCreateVRegs(const Value &Val) { +ArrayRef<Register> IRTranslator::getOrCreateVRegs(const Value &Val) { auto VRegsIt = VMap.findVRegs(Val); if (VRegsIt != VMap.vregs_end()) return *VRegsIt->second; @@ -249,7 +223,7 @@ int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) { if (FrameIndices.find(&AI) != FrameIndices.end()) return FrameIndices[&AI]; - unsigned ElementSize = DL->getTypeStoreSize(AI.getAllocatedType()); + unsigned ElementSize = DL->getTypeAllocSize(AI.getAllocatedType()); unsigned Size = ElementSize * cast<ConstantInt>(AI.getArraySize())->getZExtValue(); @@ -311,21 +285,20 @@ void IRTranslator::addMachineCFGPred(CFGEdge Edge, MachineBasicBlock *NewPred) { bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U, MachineIRBuilder &MIRBuilder) { - // FIXME: handle signed/unsigned wrapping flags. - // Get or create a virtual register for each value. // Unless the value is a Constant => loadimm cst? // or inline constant each time? // Creation of a virtual register needs to have a size. - unsigned Op0 = getOrCreateVReg(*U.getOperand(0)); - unsigned Op1 = getOrCreateVReg(*U.getOperand(1)); - unsigned Res = getOrCreateVReg(U); - auto FBinOp = MIRBuilder.buildInstr(Opcode).addDef(Res).addUse(Op0).addUse(Op1); + Register Op0 = getOrCreateVReg(*U.getOperand(0)); + Register Op1 = getOrCreateVReg(*U.getOperand(1)); + Register Res = getOrCreateVReg(U); + uint16_t Flags = 0; if (isa<Instruction>(U)) { - MachineInstr *FBinOpMI = FBinOp.getInstr(); const Instruction &I = cast<Instruction>(U); - FBinOpMI->copyIRFlags(I); + Flags = MachineInstr::copyFlagsFromInstruction(I); } + + MIRBuilder.buildInstr(Opcode, {Res}, {Op0, Op1}, Flags); return true; } @@ -333,27 +306,38 @@ bool IRTranslator::translateFSub(const User &U, MachineIRBuilder &MIRBuilder) { // -0.0 - X --> G_FNEG if (isa<Constant>(U.getOperand(0)) && U.getOperand(0) == ConstantFP::getZeroValueForNegation(U.getType())) { - MIRBuilder.buildInstr(TargetOpcode::G_FNEG) - .addDef(getOrCreateVReg(U)) - .addUse(getOrCreateVReg(*U.getOperand(1))); + Register Op1 = getOrCreateVReg(*U.getOperand(1)); + Register Res = getOrCreateVReg(U); + uint16_t Flags = 0; + if (isa<Instruction>(U)) { + const Instruction &I = cast<Instruction>(U); + Flags = MachineInstr::copyFlagsFromInstruction(I); + } + // Negate the last operand of the FSUB + MIRBuilder.buildInstr(TargetOpcode::G_FNEG, {Res}, {Op1}, Flags); return true; } return translateBinaryOp(TargetOpcode::G_FSUB, U, MIRBuilder); } bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) { - MIRBuilder.buildInstr(TargetOpcode::G_FNEG) - .addDef(getOrCreateVReg(U)) - .addUse(getOrCreateVReg(*U.getOperand(1))); + Register Op0 = getOrCreateVReg(*U.getOperand(0)); + Register Res = getOrCreateVReg(U); + uint16_t Flags = 0; + if (isa<Instruction>(U)) { + const Instruction &I = cast<Instruction>(U); + Flags = MachineInstr::copyFlagsFromInstruction(I); + } + MIRBuilder.buildInstr(TargetOpcode::G_FNEG, {Res}, {Op0}, Flags); return true; } bool IRTranslator::translateCompare(const User &U, MachineIRBuilder &MIRBuilder) { const CmpInst *CI = dyn_cast<CmpInst>(&U); - unsigned Op0 = getOrCreateVReg(*U.getOperand(0)); - unsigned Op1 = getOrCreateVReg(*U.getOperand(1)); - unsigned Res = getOrCreateVReg(U); + Register Op0 = getOrCreateVReg(*U.getOperand(0)); + Register Op1 = getOrCreateVReg(*U.getOperand(1)); + Register Res = getOrCreateVReg(U); CmpInst::Predicate Pred = CI ? CI->getPredicate() : static_cast<CmpInst::Predicate>( cast<ConstantExpr>(U).getPredicate()); @@ -366,8 +350,8 @@ bool IRTranslator::translateCompare(const User &U, MIRBuilder.buildCopy( Res, getOrCreateVReg(*Constant::getAllOnesValue(CI->getType()))); else { - auto FCmp = MIRBuilder.buildFCmp(Pred, Res, Op0, Op1); - FCmp->copyIRFlags(*CI); + MIRBuilder.buildInstr(TargetOpcode::G_FCMP, {Res}, {Pred, Op0, Op1}, + MachineInstr::copyFlagsFromInstruction(*CI)); } return true; @@ -379,15 +363,20 @@ bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) { if (Ret && DL->getTypeStoreSize(Ret->getType()) == 0) Ret = nullptr; - ArrayRef<unsigned> VRegs; + ArrayRef<Register> VRegs; if (Ret) VRegs = getOrCreateVRegs(*Ret); + Register SwiftErrorVReg = 0; + if (CLI->supportSwiftError() && SwiftError.getFunctionArg()) { + SwiftErrorVReg = SwiftError.getOrCreateVRegUseAt( + &RI, &MIRBuilder.getMBB(), SwiftError.getFunctionArg()); + } + // The target may mess up with the insertion point, but // this is not important as a return is the last instruction // of the block anyway. - - return CLI->lowerReturn(MIRBuilder, Ret, VRegs); + return CLI->lowerReturn(MIRBuilder, Ret, VRegs, SwiftErrorVReg); } bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) { @@ -395,7 +384,7 @@ bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) { unsigned Succ = 0; if (!BrInst.isUnconditional()) { // We want a G_BRCOND to the true BB followed by an unconditional branch. - unsigned Tst = getOrCreateVReg(*BrInst.getCondition()); + Register Tst = getOrCreateVReg(*BrInst.getCondition()); const BasicBlock &TrueTgt = *cast<BasicBlock>(BrInst.getSuccessor(Succ++)); MachineBasicBlock &TrueBB = getMBB(TrueTgt); MIRBuilder.buildBrCond(Tst, TrueBB); @@ -415,48 +404,429 @@ bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) { return true; } -bool IRTranslator::translateSwitch(const User &U, - MachineIRBuilder &MIRBuilder) { - // For now, just translate as a chain of conditional branches. - // FIXME: could we share most of the logic/code in - // SelectionDAGBuilder::visitSwitch between SelectionDAG and GlobalISel? - // At first sight, it seems most of the logic in there is independent of - // SelectionDAG-specifics and a lot of work went in to optimize switch - // lowering in there. - - const SwitchInst &SwInst = cast<SwitchInst>(U); - const unsigned SwCondValue = getOrCreateVReg(*SwInst.getCondition()); - const BasicBlock *OrigBB = SwInst.getParent(); - - LLT LLTi1 = getLLTForType(*Type::getInt1Ty(U.getContext()), *DL); - for (auto &CaseIt : SwInst.cases()) { - const unsigned CaseValueReg = getOrCreateVReg(*CaseIt.getCaseValue()); - const unsigned Tst = MRI->createGenericVirtualRegister(LLTi1); - MIRBuilder.buildICmp(CmpInst::ICMP_EQ, Tst, CaseValueReg, SwCondValue); - MachineBasicBlock &CurMBB = MIRBuilder.getMBB(); - const BasicBlock *TrueBB = CaseIt.getCaseSuccessor(); - MachineBasicBlock &TrueMBB = getMBB(*TrueBB); - - MIRBuilder.buildBrCond(Tst, TrueMBB); - CurMBB.addSuccessor(&TrueMBB); - addMachineCFGPred({OrigBB, TrueBB}, &CurMBB); - - MachineBasicBlock *FalseMBB = - MF->CreateMachineBasicBlock(SwInst.getParent()); - // Insert the comparison blocks one after the other. - MF->insert(std::next(CurMBB.getIterator()), FalseMBB); - MIRBuilder.buildBr(*FalseMBB); - CurMBB.addSuccessor(FalseMBB); - - MIRBuilder.setMBB(*FalseMBB); - } - // handle default case - const BasicBlock *DefaultBB = SwInst.getDefaultDest(); - MachineBasicBlock &DefaultMBB = getMBB(*DefaultBB); - MIRBuilder.buildBr(DefaultMBB); - MachineBasicBlock &CurMBB = MIRBuilder.getMBB(); - CurMBB.addSuccessor(&DefaultMBB); - addMachineCFGPred({OrigBB, DefaultBB}, &CurMBB); +void IRTranslator::addSuccessorWithProb(MachineBasicBlock *Src, + MachineBasicBlock *Dst, + BranchProbability Prob) { + if (!FuncInfo.BPI) { + Src->addSuccessorWithoutProb(Dst); + return; + } + if (Prob.isUnknown()) + Prob = getEdgeProbability(Src, Dst); + Src->addSuccessor(Dst, Prob); +} + +BranchProbability +IRTranslator::getEdgeProbability(const MachineBasicBlock *Src, + const MachineBasicBlock *Dst) const { + const BasicBlock *SrcBB = Src->getBasicBlock(); + const BasicBlock *DstBB = Dst->getBasicBlock(); + if (!FuncInfo.BPI) { + // If BPI is not available, set the default probability as 1 / N, where N is + // the number of successors. + auto SuccSize = std::max<uint32_t>(succ_size(SrcBB), 1); + return BranchProbability(1, SuccSize); + } + return FuncInfo.BPI->getEdgeProbability(SrcBB, DstBB); +} + +bool IRTranslator::translateSwitch(const User &U, MachineIRBuilder &MIB) { + using namespace SwitchCG; + // Extract cases from the switch. + const SwitchInst &SI = cast<SwitchInst>(U); + BranchProbabilityInfo *BPI = FuncInfo.BPI; + CaseClusterVector Clusters; + Clusters.reserve(SI.getNumCases()); + for (auto &I : SI.cases()) { + MachineBasicBlock *Succ = &getMBB(*I.getCaseSuccessor()); + assert(Succ && "Could not find successor mbb in mapping"); + const ConstantInt *CaseVal = I.getCaseValue(); + BranchProbability Prob = + BPI ? BPI->getEdgeProbability(SI.getParent(), I.getSuccessorIndex()) + : BranchProbability(1, SI.getNumCases() + 1); + Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Prob)); + } + + MachineBasicBlock *DefaultMBB = &getMBB(*SI.getDefaultDest()); + + // Cluster adjacent cases with the same destination. We do this at all + // optimization levels because it's cheap to do and will make codegen faster + // if there are many clusters. + sortAndRangeify(Clusters); + + MachineBasicBlock *SwitchMBB = &getMBB(*SI.getParent()); + + // If there is only the default destination, jump there directly. + if (Clusters.empty()) { + SwitchMBB->addSuccessor(DefaultMBB); + if (DefaultMBB != SwitchMBB->getNextNode()) + MIB.buildBr(*DefaultMBB); + return true; + } + + SL->findJumpTables(Clusters, &SI, DefaultMBB); + + LLVM_DEBUG({ + dbgs() << "Case clusters: "; + for (const CaseCluster &C : Clusters) { + if (C.Kind == CC_JumpTable) + dbgs() << "JT:"; + if (C.Kind == CC_BitTests) + dbgs() << "BT:"; + + C.Low->getValue().print(dbgs(), true); + if (C.Low != C.High) { + dbgs() << '-'; + C.High->getValue().print(dbgs(), true); + } + dbgs() << ' '; + } + dbgs() << '\n'; + }); + + assert(!Clusters.empty()); + SwitchWorkList WorkList; + CaseClusterIt First = Clusters.begin(); + CaseClusterIt Last = Clusters.end() - 1; + auto DefaultProb = getEdgeProbability(SwitchMBB, DefaultMBB); + WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultProb}); + + // FIXME: At the moment we don't do any splitting optimizations here like + // SelectionDAG does, so this worklist only has one entry. + while (!WorkList.empty()) { + SwitchWorkListItem W = WorkList.back(); + WorkList.pop_back(); + if (!lowerSwitchWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB, MIB)) + return false; + } + return true; +} + +void IRTranslator::emitJumpTable(SwitchCG::JumpTable &JT, + MachineBasicBlock *MBB) { + // Emit the code for the jump table + assert(JT.Reg != -1U && "Should lower JT Header first!"); + MachineIRBuilder MIB(*MBB->getParent()); + MIB.setMBB(*MBB); + MIB.setDebugLoc(CurBuilder->getDebugLoc()); + + Type *PtrIRTy = Type::getInt8PtrTy(MF->getFunction().getContext()); + const LLT PtrTy = getLLTForType(*PtrIRTy, *DL); + + auto Table = MIB.buildJumpTable(PtrTy, JT.JTI); + MIB.buildBrJT(Table.getReg(0), JT.JTI, JT.Reg); +} + +bool IRTranslator::emitJumpTableHeader(SwitchCG::JumpTable &JT, + SwitchCG::JumpTableHeader &JTH, + MachineBasicBlock *HeaderBB) { + MachineIRBuilder MIB(*HeaderBB->getParent()); + MIB.setMBB(*HeaderBB); + MIB.setDebugLoc(CurBuilder->getDebugLoc()); + + const Value &SValue = *JTH.SValue; + // Subtract the lowest switch case value from the value being switched on. + const LLT SwitchTy = getLLTForType(*SValue.getType(), *DL); + Register SwitchOpReg = getOrCreateVReg(SValue); + auto FirstCst = MIB.buildConstant(SwitchTy, JTH.First); + auto Sub = MIB.buildSub({SwitchTy}, SwitchOpReg, FirstCst); + + // This value may be smaller or larger than the target's pointer type, and + // therefore require extension or truncating. + Type *PtrIRTy = SValue.getType()->getPointerTo(); + const LLT PtrScalarTy = LLT::scalar(DL->getTypeSizeInBits(PtrIRTy)); + Sub = MIB.buildZExtOrTrunc(PtrScalarTy, Sub); + + JT.Reg = Sub.getReg(0); + + if (JTH.OmitRangeCheck) { + if (JT.MBB != HeaderBB->getNextNode()) + MIB.buildBr(*JT.MBB); + return true; + } + + // Emit the range check for the jump table, and branch to the default block + // for the switch statement if the value being switched on exceeds the + // largest case in the switch. + auto Cst = getOrCreateVReg( + *ConstantInt::get(SValue.getType(), JTH.Last - JTH.First)); + Cst = MIB.buildZExtOrTrunc(PtrScalarTy, Cst).getReg(0); + auto Cmp = MIB.buildICmp(CmpInst::ICMP_UGT, LLT::scalar(1), Sub, Cst); + + auto BrCond = MIB.buildBrCond(Cmp.getReg(0), *JT.Default); + + // Avoid emitting unnecessary branches to the next block. + if (JT.MBB != HeaderBB->getNextNode()) + BrCond = MIB.buildBr(*JT.MBB); + return true; +} + +void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB, + MachineBasicBlock *SwitchBB, + MachineIRBuilder &MIB) { + Register CondLHS = getOrCreateVReg(*CB.CmpLHS); + Register Cond; + DebugLoc OldDbgLoc = MIB.getDebugLoc(); + MIB.setDebugLoc(CB.DbgLoc); + MIB.setMBB(*CB.ThisBB); + + if (CB.PredInfo.NoCmp) { + // Branch or fall through to TrueBB. + addSuccessorWithProb(CB.ThisBB, CB.TrueBB, CB.TrueProb); + addMachineCFGPred({SwitchBB->getBasicBlock(), CB.TrueBB->getBasicBlock()}, + CB.ThisBB); + CB.ThisBB->normalizeSuccProbs(); + if (CB.TrueBB != CB.ThisBB->getNextNode()) + MIB.buildBr(*CB.TrueBB); + MIB.setDebugLoc(OldDbgLoc); + return; + } + + const LLT i1Ty = LLT::scalar(1); + // Build the compare. + if (!CB.CmpMHS) { + Register CondRHS = getOrCreateVReg(*CB.CmpRHS); + Cond = MIB.buildICmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0); + } else { + assert(CB.PredInfo.Pred == CmpInst::ICMP_ULE && + "Can only handle ULE ranges"); + + const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue(); + const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue(); + + Register CmpOpReg = getOrCreateVReg(*CB.CmpMHS); + if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) { + Register CondRHS = getOrCreateVReg(*CB.CmpRHS); + Cond = + MIB.buildICmp(CmpInst::ICMP_ULE, i1Ty, CmpOpReg, CondRHS).getReg(0); + } else { + const LLT &CmpTy = MRI->getType(CmpOpReg); + auto Sub = MIB.buildSub({CmpTy}, CmpOpReg, CondLHS); + auto Diff = MIB.buildConstant(CmpTy, High - Low); + Cond = MIB.buildICmp(CmpInst::ICMP_ULE, i1Ty, Sub, Diff).getReg(0); + } + } + + // Update successor info + addSuccessorWithProb(CB.ThisBB, CB.TrueBB, CB.TrueProb); + + addMachineCFGPred({SwitchBB->getBasicBlock(), CB.TrueBB->getBasicBlock()}, + CB.ThisBB); + + // TrueBB and FalseBB are always different unless the incoming IR is + // degenerate. This only happens when running llc on weird IR. + if (CB.TrueBB != CB.FalseBB) + addSuccessorWithProb(CB.ThisBB, CB.FalseBB, CB.FalseProb); + CB.ThisBB->normalizeSuccProbs(); + + // if (SwitchBB->getBasicBlock() != CB.FalseBB->getBasicBlock()) + addMachineCFGPred({SwitchBB->getBasicBlock(), CB.FalseBB->getBasicBlock()}, + CB.ThisBB); + + // If the lhs block is the next block, invert the condition so that we can + // fall through to the lhs instead of the rhs block. + if (CB.TrueBB == CB.ThisBB->getNextNode()) { + std::swap(CB.TrueBB, CB.FalseBB); + auto True = MIB.buildConstant(i1Ty, 1); + Cond = MIB.buildInstr(TargetOpcode::G_XOR, {i1Ty}, {Cond, True}, None) + .getReg(0); + } + + MIB.buildBrCond(Cond, *CB.TrueBB); + MIB.buildBr(*CB.FalseBB); + MIB.setDebugLoc(OldDbgLoc); +} + +bool IRTranslator::lowerJumpTableWorkItem(SwitchCG::SwitchWorkListItem W, + MachineBasicBlock *SwitchMBB, + MachineBasicBlock *CurMBB, + MachineBasicBlock *DefaultMBB, + MachineIRBuilder &MIB, + MachineFunction::iterator BBI, + BranchProbability UnhandledProbs, + SwitchCG::CaseClusterIt I, + MachineBasicBlock *Fallthrough, + bool FallthroughUnreachable) { + using namespace SwitchCG; + MachineFunction *CurMF = SwitchMBB->getParent(); + // FIXME: Optimize away range check based on pivot comparisons. + JumpTableHeader *JTH = &SL->JTCases[I->JTCasesIndex].first; + SwitchCG::JumpTable *JT = &SL->JTCases[I->JTCasesIndex].second; + BranchProbability DefaultProb = W.DefaultProb; + + // The jump block hasn't been inserted yet; insert it here. + MachineBasicBlock *JumpMBB = JT->MBB; + CurMF->insert(BBI, JumpMBB); + + // Since the jump table block is separate from the switch block, we need + // to keep track of it as a machine predecessor to the default block, + // otherwise we lose the phi edges. + addMachineCFGPred({SwitchMBB->getBasicBlock(), DefaultMBB->getBasicBlock()}, + CurMBB); + addMachineCFGPred({SwitchMBB->getBasicBlock(), DefaultMBB->getBasicBlock()}, + JumpMBB); + + auto JumpProb = I->Prob; + auto FallthroughProb = UnhandledProbs; + + // If the default statement is a target of the jump table, we evenly + // distribute the default probability to successors of CurMBB. Also + // update the probability on the edge from JumpMBB to Fallthrough. + for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(), + SE = JumpMBB->succ_end(); + SI != SE; ++SI) { + if (*SI == DefaultMBB) { + JumpProb += DefaultProb / 2; + FallthroughProb -= DefaultProb / 2; + JumpMBB->setSuccProbability(SI, DefaultProb / 2); + JumpMBB->normalizeSuccProbs(); + } else { + // Also record edges from the jump table block to it's successors. + addMachineCFGPred({SwitchMBB->getBasicBlock(), (*SI)->getBasicBlock()}, + JumpMBB); + } + } + + // Skip the range check if the fallthrough block is unreachable. + if (FallthroughUnreachable) + JTH->OmitRangeCheck = true; + + if (!JTH->OmitRangeCheck) + addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb); + addSuccessorWithProb(CurMBB, JumpMBB, JumpProb); + CurMBB->normalizeSuccProbs(); + + // The jump table header will be inserted in our current block, do the + // range check, and fall through to our fallthrough block. + JTH->HeaderBB = CurMBB; + JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader. + + // If we're in the right place, emit the jump table header right now. + if (CurMBB == SwitchMBB) { + if (!emitJumpTableHeader(*JT, *JTH, CurMBB)) + return false; + JTH->Emitted = true; + } + return true; +} +bool IRTranslator::lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I, + Value *Cond, + MachineBasicBlock *Fallthrough, + bool FallthroughUnreachable, + BranchProbability UnhandledProbs, + MachineBasicBlock *CurMBB, + MachineIRBuilder &MIB, + MachineBasicBlock *SwitchMBB) { + using namespace SwitchCG; + const Value *RHS, *LHS, *MHS; + CmpInst::Predicate Pred; + if (I->Low == I->High) { + // Check Cond == I->Low. + Pred = CmpInst::ICMP_EQ; + LHS = Cond; + RHS = I->Low; + MHS = nullptr; + } else { + // Check I->Low <= Cond <= I->High. + Pred = CmpInst::ICMP_ULE; + LHS = I->Low; + MHS = Cond; + RHS = I->High; + } + + // If Fallthrough is unreachable, fold away the comparison. + // The false probability is the sum of all unhandled cases. + CaseBlock CB(Pred, FallthroughUnreachable, LHS, RHS, MHS, I->MBB, Fallthrough, + CurMBB, MIB.getDebugLoc(), I->Prob, UnhandledProbs); + + emitSwitchCase(CB, SwitchMBB, MIB); + return true; +} + +bool IRTranslator::lowerSwitchWorkItem(SwitchCG::SwitchWorkListItem W, + Value *Cond, + MachineBasicBlock *SwitchMBB, + MachineBasicBlock *DefaultMBB, + MachineIRBuilder &MIB) { + using namespace SwitchCG; + MachineFunction *CurMF = FuncInfo.MF; + MachineBasicBlock *NextMBB = nullptr; + MachineFunction::iterator BBI(W.MBB); + if (++BBI != FuncInfo.MF->end()) + NextMBB = &*BBI; + + if (EnableOpts) { + // Here, we order cases by probability so the most likely case will be + // checked first. However, two clusters can have the same probability in + // which case their relative ordering is non-deterministic. So we use Low + // as a tie-breaker as clusters are guaranteed to never overlap. + llvm::sort(W.FirstCluster, W.LastCluster + 1, + [](const CaseCluster &a, const CaseCluster &b) { + return a.Prob != b.Prob + ? a.Prob > b.Prob + : a.Low->getValue().slt(b.Low->getValue()); + }); + + // Rearrange the case blocks so that the last one falls through if possible + // without changing the order of probabilities. + for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster;) { + --I; + if (I->Prob > W.LastCluster->Prob) + break; + if (I->Kind == CC_Range && I->MBB == NextMBB) { + std::swap(*I, *W.LastCluster); + break; + } + } + } + + // Compute total probability. + BranchProbability DefaultProb = W.DefaultProb; + BranchProbability UnhandledProbs = DefaultProb; + for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I) + UnhandledProbs += I->Prob; + + MachineBasicBlock *CurMBB = W.MBB; + for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) { + bool FallthroughUnreachable = false; + MachineBasicBlock *Fallthrough; + if (I == W.LastCluster) { + // For the last cluster, fall through to the default destination. + Fallthrough = DefaultMBB; + FallthroughUnreachable = isa<UnreachableInst>( + DefaultMBB->getBasicBlock()->getFirstNonPHIOrDbg()); + } else { + Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock()); + CurMF->insert(BBI, Fallthrough); + } + UnhandledProbs -= I->Prob; + + switch (I->Kind) { + case CC_BitTests: { + LLVM_DEBUG(dbgs() << "Switch to bit test optimization unimplemented"); + return false; // Bit tests currently unimplemented. + } + case CC_JumpTable: { + if (!lowerJumpTableWorkItem(W, SwitchMBB, CurMBB, DefaultMBB, MIB, BBI, + UnhandledProbs, I, Fallthrough, + FallthroughUnreachable)) { + LLVM_DEBUG(dbgs() << "Failed to lower jump table"); + return false; + } + break; + } + case CC_Range: { + if (!lowerSwitchRangeWorkItem(I, Cond, Fallthrough, + FallthroughUnreachable, UnhandledProbs, + CurMBB, MIB, SwitchMBB)) { + LLVM_DEBUG(dbgs() << "Failed to lower switch range"); + return false; + } + break; + } + } + CurMBB = Fallthrough; + } return true; } @@ -465,7 +835,7 @@ bool IRTranslator::translateIndirectBr(const User &U, MachineIRBuilder &MIRBuilder) { const IndirectBrInst &BrInst = cast<IndirectBrInst>(U); - const unsigned Tgt = getOrCreateVReg(*BrInst.getAddress()); + const Register Tgt = getOrCreateVReg(*BrInst.getAddress()); MIRBuilder.buildBrIndirect(Tgt); // Link successors. @@ -476,6 +846,14 @@ bool IRTranslator::translateIndirectBr(const User &U, return true; } +static bool isSwiftError(const Value *V) { + if (auto Arg = dyn_cast<Argument>(V)) + return Arg->hasSwiftErrorAttr(); + if (auto AI = dyn_cast<AllocaInst>(V)) + return AI->isSwiftError(); + return false; +} + bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { const LoadInst &LI = cast<LoadInst>(U); @@ -486,13 +864,25 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { if (DL->getTypeStoreSize(LI.getType()) == 0) return true; - ArrayRef<unsigned> Regs = getOrCreateVRegs(LI); + ArrayRef<Register> Regs = getOrCreateVRegs(LI); ArrayRef<uint64_t> Offsets = *VMap.getOffsets(LI); - unsigned Base = getOrCreateVReg(*LI.getPointerOperand()); + Register Base = getOrCreateVReg(*LI.getPointerOperand()); + + Type *OffsetIRTy = DL->getIntPtrType(LI.getPointerOperandType()); + LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL); + + if (CLI->supportSwiftError() && isSwiftError(LI.getPointerOperand())) { + assert(Regs.size() == 1 && "swifterror should be single pointer"); + Register VReg = SwiftError.getOrCreateVRegUseAt(&LI, &MIRBuilder.getMBB(), + LI.getPointerOperand()); + MIRBuilder.buildCopy(Regs[0], VReg); + return true; + } + for (unsigned i = 0; i < Regs.size(); ++i) { - unsigned Addr = 0; - MIRBuilder.materializeGEP(Addr, Base, LLT::scalar(64), Offsets[i] / 8); + Register Addr; + MIRBuilder.materializeGEP(Addr, Base, OffsetTy, Offsets[i] / 8); MachinePointerInfo Ptr(LI.getPointerOperand(), Offsets[i] / 8); unsigned BaseAlign = getMemOpAlignment(LI); @@ -515,13 +905,25 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) { if (DL->getTypeStoreSize(SI.getValueOperand()->getType()) == 0) return true; - ArrayRef<unsigned> Vals = getOrCreateVRegs(*SI.getValueOperand()); + ArrayRef<Register> Vals = getOrCreateVRegs(*SI.getValueOperand()); ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*SI.getValueOperand()); - unsigned Base = getOrCreateVReg(*SI.getPointerOperand()); + Register Base = getOrCreateVReg(*SI.getPointerOperand()); + + Type *OffsetIRTy = DL->getIntPtrType(SI.getPointerOperandType()); + LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL); + + if (CLI->supportSwiftError() && isSwiftError(SI.getPointerOperand())) { + assert(Vals.size() == 1 && "swifterror should be single pointer"); + + Register VReg = SwiftError.getOrCreateVRegDefAt(&SI, &MIRBuilder.getMBB(), + SI.getPointerOperand()); + MIRBuilder.buildCopy(VReg, Vals[0]); + return true; + } for (unsigned i = 0; i < Vals.size(); ++i) { - unsigned Addr = 0; - MIRBuilder.materializeGEP(Addr, Base, LLT::scalar(64), Offsets[i] / 8); + Register Addr; + MIRBuilder.materializeGEP(Addr, Base, OffsetTy, Offsets[i] / 8); MachinePointerInfo Ptr(SI.getPointerOperand(), Offsets[i] / 8); unsigned BaseAlign = getMemOpAlignment(SI); @@ -562,10 +964,9 @@ bool IRTranslator::translateExtractValue(const User &U, MachineIRBuilder &MIRBuilder) { const Value *Src = U.getOperand(0); uint64_t Offset = getOffsetFromIndices(U, *DL); - ArrayRef<unsigned> SrcRegs = getOrCreateVRegs(*Src); + ArrayRef<Register> SrcRegs = getOrCreateVRegs(*Src); ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*Src); - unsigned Idx = std::lower_bound(Offsets.begin(), Offsets.end(), Offset) - - Offsets.begin(); + unsigned Idx = llvm::lower_bound(Offsets, Offset) - Offsets.begin(); auto &DstRegs = allocateVRegs(U); for (unsigned i = 0; i < DstRegs.size(); ++i) @@ -580,8 +981,8 @@ bool IRTranslator::translateInsertValue(const User &U, uint64_t Offset = getOffsetFromIndices(U, *DL); auto &DstRegs = allocateVRegs(U); ArrayRef<uint64_t> DstOffsets = *VMap.getOffsets(U); - ArrayRef<unsigned> SrcRegs = getOrCreateVRegs(*Src); - ArrayRef<unsigned> InsertedRegs = getOrCreateVRegs(*U.getOperand(1)); + ArrayRef<Register> SrcRegs = getOrCreateVRegs(*Src); + ArrayRef<Register> InsertedRegs = getOrCreateVRegs(*U.getOperand(1)); auto InsertedIt = InsertedRegs.begin(); for (unsigned i = 0; i < DstRegs.size(); ++i) { @@ -596,19 +997,19 @@ bool IRTranslator::translateInsertValue(const User &U, bool IRTranslator::translateSelect(const User &U, MachineIRBuilder &MIRBuilder) { - unsigned Tst = getOrCreateVReg(*U.getOperand(0)); - ArrayRef<unsigned> ResRegs = getOrCreateVRegs(U); - ArrayRef<unsigned> Op0Regs = getOrCreateVRegs(*U.getOperand(1)); - ArrayRef<unsigned> Op1Regs = getOrCreateVRegs(*U.getOperand(2)); + Register Tst = getOrCreateVReg(*U.getOperand(0)); + ArrayRef<Register> ResRegs = getOrCreateVRegs(U); + ArrayRef<Register> Op0Regs = getOrCreateVRegs(*U.getOperand(1)); + ArrayRef<Register> Op1Regs = getOrCreateVRegs(*U.getOperand(2)); const SelectInst &SI = cast<SelectInst>(U); - const CmpInst *Cmp = dyn_cast<CmpInst>(SI.getCondition()); + uint16_t Flags = 0; + if (const CmpInst *Cmp = dyn_cast<CmpInst>(SI.getCondition())) + Flags = MachineInstr::copyFlagsFromInstruction(*Cmp); + for (unsigned i = 0; i < ResRegs.size(); ++i) { - auto Select = - MIRBuilder.buildSelect(ResRegs[i], Tst, Op0Regs[i], Op1Regs[i]); - if (Cmp && isa<FPMathOperator>(Cmp)) { - Select->copyIRFlags(*Cmp); - } + MIRBuilder.buildInstr(TargetOpcode::G_SELECT, {ResRegs[i]}, + {Tst, Op0Regs[i], Op1Regs[i]}, Flags); } return true; @@ -619,7 +1020,7 @@ bool IRTranslator::translateBitCast(const User &U, // If we're bitcasting to the source type, we can reuse the source vreg. if (getLLTForType(*U.getOperand(0)->getType(), *DL) == getLLTForType(*U.getType(), *DL)) { - unsigned SrcReg = getOrCreateVReg(*U.getOperand(0)); + Register SrcReg = getOrCreateVReg(*U.getOperand(0)); auto &Regs = *VMap.getVRegs(U); // If we already assigned a vreg for this bitcast, we can't change that. // Emit a copy to satisfy the users we already emitted. @@ -636,9 +1037,9 @@ bool IRTranslator::translateBitCast(const User &U, bool IRTranslator::translateCast(unsigned Opcode, const User &U, MachineIRBuilder &MIRBuilder) { - unsigned Op = getOrCreateVReg(*U.getOperand(0)); - unsigned Res = getOrCreateVReg(U); - MIRBuilder.buildInstr(Opcode).addDef(Res).addUse(Op); + Register Op = getOrCreateVReg(*U.getOperand(0)); + Register Res = getOrCreateVReg(U); + MIRBuilder.buildInstr(Opcode, {Res}, {Op}); return true; } @@ -649,7 +1050,7 @@ bool IRTranslator::translateGetElementPtr(const User &U, return false; Value &Op0 = *U.getOperand(0); - unsigned BaseReg = getOrCreateVReg(Op0); + Register BaseReg = getOrCreateVReg(Op0); Type *PtrIRTy = Op0.getType(); LLT PtrTy = getLLTForType(*PtrIRTy, *DL); Type *OffsetIRTy = DL->getIntPtrType(PtrIRTy); @@ -674,43 +1075,43 @@ bool IRTranslator::translateGetElementPtr(const User &U, } if (Offset != 0) { - unsigned NewBaseReg = MRI->createGenericVirtualRegister(PtrTy); - unsigned OffsetReg = - getOrCreateVReg(*ConstantInt::get(OffsetIRTy, Offset)); - MIRBuilder.buildGEP(NewBaseReg, BaseReg, OffsetReg); + Register NewBaseReg = MRI->createGenericVirtualRegister(PtrTy); + LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL); + auto OffsetMIB = MIRBuilder.buildConstant({OffsetTy}, Offset); + MIRBuilder.buildGEP(NewBaseReg, BaseReg, OffsetMIB.getReg(0)); BaseReg = NewBaseReg; Offset = 0; } - unsigned IdxReg = getOrCreateVReg(*Idx); + Register IdxReg = getOrCreateVReg(*Idx); if (MRI->getType(IdxReg) != OffsetTy) { - unsigned NewIdxReg = MRI->createGenericVirtualRegister(OffsetTy); + Register NewIdxReg = MRI->createGenericVirtualRegister(OffsetTy); MIRBuilder.buildSExtOrTrunc(NewIdxReg, IdxReg); IdxReg = NewIdxReg; } // N = N + Idx * ElementSize; // Avoid doing it for ElementSize of 1. - unsigned GepOffsetReg; + Register GepOffsetReg; if (ElementSize != 1) { - unsigned ElementSizeReg = - getOrCreateVReg(*ConstantInt::get(OffsetIRTy, ElementSize)); - GepOffsetReg = MRI->createGenericVirtualRegister(OffsetTy); - MIRBuilder.buildMul(GepOffsetReg, ElementSizeReg, IdxReg); + auto ElementSizeMIB = MIRBuilder.buildConstant( + getLLTForType(*OffsetIRTy, *DL), ElementSize); + MIRBuilder.buildMul(GepOffsetReg, ElementSizeMIB.getReg(0), IdxReg); } else GepOffsetReg = IdxReg; - unsigned NewBaseReg = MRI->createGenericVirtualRegister(PtrTy); + Register NewBaseReg = MRI->createGenericVirtualRegister(PtrTy); MIRBuilder.buildGEP(NewBaseReg, BaseReg, GepOffsetReg); BaseReg = NewBaseReg; } } if (Offset != 0) { - unsigned OffsetReg = getOrCreateVReg(*ConstantInt::get(OffsetIRTy, Offset)); - MIRBuilder.buildGEP(getOrCreateVReg(U), BaseReg, OffsetReg); + auto OffsetMIB = + MIRBuilder.buildConstant(getLLTForType(*OffsetIRTy, *DL), Offset); + MIRBuilder.buildGEP(getOrCreateVReg(U), BaseReg, OffsetMIB.getReg(0)); return true; } @@ -721,6 +1122,19 @@ bool IRTranslator::translateGetElementPtr(const User &U, bool IRTranslator::translateMemfunc(const CallInst &CI, MachineIRBuilder &MIRBuilder, unsigned ID) { + + // If the source is undef, then just emit a nop. + if (isa<UndefValue>(CI.getArgOperand(1))) { + switch (ID) { + case Intrinsic::memmove: + case Intrinsic::memcpy: + case Intrinsic::memset: + return true; + default: + break; + } + } + LLT SizeTy = getLLTForType(*CI.getArgOperand(2)->getType(), *DL); Type *DstTy = CI.getArgOperand(0)->getType(); if (cast<PointerType>(DstTy)->getAddressSpace() != 0 || @@ -752,10 +1166,10 @@ bool IRTranslator::translateMemfunc(const CallInst &CI, return CLI->lowerCall(MIRBuilder, CI.getCallingConv(), MachineOperand::CreateES(Callee), - CallLowering::ArgInfo(0, CI.getType()), Args); + CallLowering::ArgInfo({0}, CI.getType()), Args); } -void IRTranslator::getStackGuard(unsigned DstReg, +void IRTranslator::getStackGuard(Register DstReg, MachineIRBuilder &MIRBuilder) { const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); MRI->setRegClass(DstReg, TRI->getPointerRegClass(*MF)); @@ -778,7 +1192,7 @@ void IRTranslator::getStackGuard(unsigned DstReg, bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op, MachineIRBuilder &MIRBuilder) { - ArrayRef<unsigned> ResRegs = getOrCreateVRegs(CI); + ArrayRef<Register> ResRegs = getOrCreateVRegs(CI); MIRBuilder.buildInstr(Op) .addDef(ResRegs[0]) .addDef(ResRegs[1]) @@ -788,19 +1202,123 @@ bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op, return true; } +unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) { + switch (ID) { + default: + break; + case Intrinsic::bswap: + return TargetOpcode::G_BSWAP; + case Intrinsic::ceil: + return TargetOpcode::G_FCEIL; + case Intrinsic::cos: + return TargetOpcode::G_FCOS; + case Intrinsic::ctpop: + return TargetOpcode::G_CTPOP; + case Intrinsic::exp: + return TargetOpcode::G_FEXP; + case Intrinsic::exp2: + return TargetOpcode::G_FEXP2; + case Intrinsic::fabs: + return TargetOpcode::G_FABS; + case Intrinsic::copysign: + return TargetOpcode::G_FCOPYSIGN; + case Intrinsic::minnum: + return TargetOpcode::G_FMINNUM; + case Intrinsic::maxnum: + return TargetOpcode::G_FMAXNUM; + case Intrinsic::minimum: + return TargetOpcode::G_FMINIMUM; + case Intrinsic::maximum: + return TargetOpcode::G_FMAXIMUM; + case Intrinsic::canonicalize: + return TargetOpcode::G_FCANONICALIZE; + case Intrinsic::floor: + return TargetOpcode::G_FFLOOR; + case Intrinsic::fma: + return TargetOpcode::G_FMA; + case Intrinsic::log: + return TargetOpcode::G_FLOG; + case Intrinsic::log2: + return TargetOpcode::G_FLOG2; + case Intrinsic::log10: + return TargetOpcode::G_FLOG10; + case Intrinsic::nearbyint: + return TargetOpcode::G_FNEARBYINT; + case Intrinsic::pow: + return TargetOpcode::G_FPOW; + case Intrinsic::rint: + return TargetOpcode::G_FRINT; + case Intrinsic::round: + return TargetOpcode::G_INTRINSIC_ROUND; + case Intrinsic::sin: + return TargetOpcode::G_FSIN; + case Intrinsic::sqrt: + return TargetOpcode::G_FSQRT; + case Intrinsic::trunc: + return TargetOpcode::G_INTRINSIC_TRUNC; + } + return Intrinsic::not_intrinsic; +} + +bool IRTranslator::translateSimpleIntrinsic(const CallInst &CI, + Intrinsic::ID ID, + MachineIRBuilder &MIRBuilder) { + + unsigned Op = getSimpleIntrinsicOpcode(ID); + + // Is this a simple intrinsic? + if (Op == Intrinsic::not_intrinsic) + return false; + + // Yes. Let's translate it. + SmallVector<llvm::SrcOp, 4> VRegs; + for (auto &Arg : CI.arg_operands()) + VRegs.push_back(getOrCreateVReg(*Arg)); + + MIRBuilder.buildInstr(Op, {getOrCreateVReg(CI)}, VRegs, + MachineInstr::copyFlagsFromInstruction(CI)); + return true; +} + bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, MachineIRBuilder &MIRBuilder) { + + // If this is a simple intrinsic (that is, we just need to add a def of + // a vreg, and uses for each arg operand, then translate it. + if (translateSimpleIntrinsic(CI, ID, MIRBuilder)) + return true; + switch (ID) { default: break; case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - // Stack coloring is not enabled in O0 (which we care about now) so we can - // drop these. Make sure someone notices when we start compiling at higher - // opts though. - if (MF->getTarget().getOptLevel() != CodeGenOpt::None) - return false; + case Intrinsic::lifetime_end: { + // No stack colouring in O0, discard region information. + if (MF->getTarget().getOptLevel() == CodeGenOpt::None) + return true; + + unsigned Op = ID == Intrinsic::lifetime_start ? TargetOpcode::LIFETIME_START + : TargetOpcode::LIFETIME_END; + + // Get the underlying objects for the location passed on the lifetime + // marker. + SmallVector<const Value *, 4> Allocas; + GetUnderlyingObjects(CI.getArgOperand(1), Allocas, *DL); + + // Iterate over each underlying object, creating lifetime markers for each + // static alloca. Quit if we find a non-static alloca. + for (const Value *V : Allocas) { + const AllocaInst *AI = dyn_cast<AllocaInst>(V); + if (!AI) + continue; + + if (!AI->isStaticAlloca()) + return true; + + MIRBuilder.buildInstr(Op).addFrameIndex(getOrCreateFrameIndex(*AI)); + } return true; + } case Intrinsic::dbg_declare: { const DbgDeclareInst &DI = cast<DbgDeclareInst>(CI); assert(DI.getVariable() && "Missing variable"); @@ -848,10 +1366,11 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, Value *Ptr = CI.getArgOperand(0); unsigned ListSize = TLI.getVaListSizeInBits(*DL) / 8; + // FIXME: Get alignment MIRBuilder.buildInstr(TargetOpcode::G_VASTART) .addUse(getOrCreateVReg(*Ptr)) .addMemOperand(MF->getMachineMemOperand( - MachinePointerInfo(Ptr), MachineMemOperand::MOStore, ListSize, 0)); + MachinePointerInfo(Ptr), MachineMemOperand::MOStore, ListSize, 1)); return true; } case Intrinsic::dbg_value: { @@ -868,7 +1387,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, } else if (const auto *CI = dyn_cast<Constant>(V)) { MIRBuilder.buildConstDbgValue(*CI, DI.getVariable(), DI.getExpression()); } else { - unsigned Reg = getOrCreateVReg(*V); + Register Reg = getOrCreateVReg(*V); // FIXME: This does not handle register-indirect values at offset 0. The // direct/indirect thing shouldn't really be handled by something as // implicit as reg+noreg vs reg+imm in the first palce, but it seems @@ -889,94 +1408,25 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, return translateOverflowIntrinsic(CI, TargetOpcode::G_UMULO, MIRBuilder); case Intrinsic::smul_with_overflow: return translateOverflowIntrinsic(CI, TargetOpcode::G_SMULO, MIRBuilder); - case Intrinsic::pow: { - auto Pow = MIRBuilder.buildInstr(TargetOpcode::G_FPOW) - .addDef(getOrCreateVReg(CI)) - .addUse(getOrCreateVReg(*CI.getArgOperand(0))) - .addUse(getOrCreateVReg(*CI.getArgOperand(1))); - Pow->copyIRFlags(CI); - return true; - } - case Intrinsic::exp: { - auto Exp = MIRBuilder.buildInstr(TargetOpcode::G_FEXP) - .addDef(getOrCreateVReg(CI)) - .addUse(getOrCreateVReg(*CI.getArgOperand(0))); - Exp->copyIRFlags(CI); - return true; - } - case Intrinsic::exp2: { - auto Exp2 = MIRBuilder.buildInstr(TargetOpcode::G_FEXP2) - .addDef(getOrCreateVReg(CI)) - .addUse(getOrCreateVReg(*CI.getArgOperand(0))); - Exp2->copyIRFlags(CI); - return true; - } - case Intrinsic::log: { - auto Log = MIRBuilder.buildInstr(TargetOpcode::G_FLOG) - .addDef(getOrCreateVReg(CI)) - .addUse(getOrCreateVReg(*CI.getArgOperand(0))); - Log->copyIRFlags(CI); - return true; - } - case Intrinsic::log2: { - auto Log2 = MIRBuilder.buildInstr(TargetOpcode::G_FLOG2) - .addDef(getOrCreateVReg(CI)) - .addUse(getOrCreateVReg(*CI.getArgOperand(0))); - Log2->copyIRFlags(CI); - return true; - } - case Intrinsic::log10: { - auto Log10 = MIRBuilder.buildInstr(TargetOpcode::G_FLOG10) - .addDef(getOrCreateVReg(CI)) - .addUse(getOrCreateVReg(*CI.getArgOperand(0))); - Log10->copyIRFlags(CI); - return true; - } - case Intrinsic::fabs: { - auto Fabs = MIRBuilder.buildInstr(TargetOpcode::G_FABS) - .addDef(getOrCreateVReg(CI)) - .addUse(getOrCreateVReg(*CI.getArgOperand(0))); - Fabs->copyIRFlags(CI); - return true; - } - case Intrinsic::trunc: - MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_TRUNC) - .addDef(getOrCreateVReg(CI)) - .addUse(getOrCreateVReg(*CI.getArgOperand(0))); - return true; - case Intrinsic::round: - MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND) - .addDef(getOrCreateVReg(CI)) - .addUse(getOrCreateVReg(*CI.getArgOperand(0))); - return true; - case Intrinsic::fma: { - auto FMA = MIRBuilder.buildInstr(TargetOpcode::G_FMA) - .addDef(getOrCreateVReg(CI)) - .addUse(getOrCreateVReg(*CI.getArgOperand(0))) - .addUse(getOrCreateVReg(*CI.getArgOperand(1))) - .addUse(getOrCreateVReg(*CI.getArgOperand(2))); - FMA->copyIRFlags(CI); - return true; - } case Intrinsic::fmuladd: { const TargetMachine &TM = MF->getTarget(); const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering(); - unsigned Dst = getOrCreateVReg(CI); - unsigned Op0 = getOrCreateVReg(*CI.getArgOperand(0)); - unsigned Op1 = getOrCreateVReg(*CI.getArgOperand(1)); - unsigned Op2 = getOrCreateVReg(*CI.getArgOperand(2)); + Register Dst = getOrCreateVReg(CI); + Register Op0 = getOrCreateVReg(*CI.getArgOperand(0)); + Register Op1 = getOrCreateVReg(*CI.getArgOperand(1)); + Register Op2 = getOrCreateVReg(*CI.getArgOperand(2)); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && TLI.isFMAFasterThanFMulAndFAdd(TLI.getValueType(*DL, CI.getType()))) { // TODO: Revisit this to see if we should move this part of the // lowering to the combiner. - auto FMA = MIRBuilder.buildInstr(TargetOpcode::G_FMA, {Dst}, {Op0, Op1, Op2}); - FMA->copyIRFlags(CI); + MIRBuilder.buildInstr(TargetOpcode::G_FMA, {Dst}, {Op0, Op1, Op2}, + MachineInstr::copyFlagsFromInstruction(CI)); } else { LLT Ty = getLLTForType(*CI.getType(), *DL); - auto FMul = MIRBuilder.buildInstr(TargetOpcode::G_FMUL, {Ty}, {Op0, Op1}); - FMul->copyIRFlags(CI); - auto FAdd = MIRBuilder.buildInstr(TargetOpcode::G_FADD, {Dst}, {FMul, Op2}); - FAdd->copyIRFlags(CI); + auto FMul = MIRBuilder.buildInstr(TargetOpcode::G_FMUL, {Ty}, {Op0, Op1}, + MachineInstr::copyFlagsFromInstruction(CI)); + MIRBuilder.buildInstr(TargetOpcode::G_FADD, {Dst}, {FMul, Op2}, + MachineInstr::copyFlagsFromInstruction(CI)); } return true; } @@ -986,7 +1436,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, return translateMemfunc(CI, MIRBuilder, ID); case Intrinsic::eh_typeid_for: { GlobalValue *GV = ExtractTypeInfo(CI.getArgOperand(0)); - unsigned Reg = getOrCreateVReg(CI); + Register Reg = getOrCreateVReg(CI); unsigned TypeID = MF->getTypeIDFor(GV); MIRBuilder.buildConstant(Reg, TypeID); return true; @@ -1008,7 +1458,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, return true; case Intrinsic::stackprotector: { LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL); - unsigned GuardVal = MRI->createGenericVirtualRegister(PtrTy); + Register GuardVal = MRI->createGenericVirtualRegister(PtrTy); getStackGuard(GuardVal, MIRBuilder); AllocaInst *Slot = cast<AllocaInst>(CI.getArgOperand(1)); @@ -1023,6 +1473,34 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, PtrTy.getSizeInBits() / 8, 8)); return true; } + case Intrinsic::stacksave: { + // Save the stack pointer to the location provided by the intrinsic. + Register Reg = getOrCreateVReg(CI); + Register StackPtr = MF->getSubtarget() + .getTargetLowering() + ->getStackPointerRegisterToSaveRestore(); + + // If the target doesn't specify a stack pointer, then fall back. + if (!StackPtr) + return false; + + MIRBuilder.buildCopy(Reg, StackPtr); + return true; + } + case Intrinsic::stackrestore: { + // Restore the stack pointer from the location provided by the intrinsic. + Register Reg = getOrCreateVReg(*CI.getArgOperand(0)); + Register StackPtr = MF->getSubtarget() + .getTargetLowering() + ->getStackPointerRegisterToSaveRestore(); + + // If the target doesn't specify a stack pointer, then fall back. + if (!StackPtr) + return false; + + MIRBuilder.buildCopy(StackPtr, Reg); + return true; + } case Intrinsic::cttz: case Intrinsic::ctlz: { ConstantInt *Cst = cast<ConstantInt>(CI.getArgOperand(1)); @@ -1037,24 +1515,18 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, .addUse(getOrCreateVReg(*CI.getArgOperand(0))); return true; } - case Intrinsic::ctpop: { - MIRBuilder.buildInstr(TargetOpcode::G_CTPOP) - .addDef(getOrCreateVReg(CI)) - .addUse(getOrCreateVReg(*CI.getArgOperand(0))); - return true; - } case Intrinsic::invariant_start: { LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL); - unsigned Undef = MRI->createGenericVirtualRegister(PtrTy); + Register Undef = MRI->createGenericVirtualRegister(PtrTy); MIRBuilder.buildUndef(Undef); return true; } case Intrinsic::invariant_end: return true; - case Intrinsic::ceil: - MIRBuilder.buildInstr(TargetOpcode::G_FCEIL) - .addDef(getOrCreateVReg(CI)) - .addUse(getOrCreateVReg(*CI.getArgOperand(0))); + case Intrinsic::assume: + case Intrinsic::var_annotation: + case Intrinsic::sideeffect: + // Discard annotate attributes, assumptions, and artificial side-effects. return true; } return false; @@ -1079,34 +1551,6 @@ bool IRTranslator::translateInlineAsm(const CallInst &CI, return true; } -unsigned IRTranslator::packRegs(const Value &V, - MachineIRBuilder &MIRBuilder) { - ArrayRef<unsigned> Regs = getOrCreateVRegs(V); - ArrayRef<uint64_t> Offsets = *VMap.getOffsets(V); - LLT BigTy = getLLTForType(*V.getType(), *DL); - - if (Regs.size() == 1) - return Regs[0]; - - unsigned Dst = MRI->createGenericVirtualRegister(BigTy); - MIRBuilder.buildUndef(Dst); - for (unsigned i = 0; i < Regs.size(); ++i) { - unsigned NewDst = MRI->createGenericVirtualRegister(BigTy); - MIRBuilder.buildInsert(NewDst, Dst, Regs[i], Offsets[i]); - Dst = NewDst; - } - return Dst; -} - -void IRTranslator::unpackRegs(const Value &V, unsigned Src, - MachineIRBuilder &MIRBuilder) { - ArrayRef<unsigned> Regs = getOrCreateVRegs(V); - ArrayRef<uint64_t> Offsets = *VMap.getOffsets(V); - - for (unsigned i = 0; i < Regs.size(); ++i) - MIRBuilder.buildExtract(Regs[i], Src, Offsets[i]); -} - bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { const CallInst &CI = cast<CallInst>(U); auto TII = MF->getTarget().getIntrinsicInfo(); @@ -1126,23 +1570,32 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { ID = static_cast<Intrinsic::ID>(TII->getIntrinsicID(F)); } - bool IsSplitType = valueIsSplit(CI); if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic) { - unsigned Res = IsSplitType ? MRI->createGenericVirtualRegister( - getLLTForType(*CI.getType(), *DL)) - : getOrCreateVReg(CI); - - SmallVector<unsigned, 8> Args; - for (auto &Arg: CI.arg_operands()) - Args.push_back(packRegs(*Arg, MIRBuilder)); + ArrayRef<Register> Res = getOrCreateVRegs(CI); + + SmallVector<ArrayRef<Register>, 8> Args; + Register SwiftInVReg = 0; + Register SwiftErrorVReg = 0; + for (auto &Arg: CI.arg_operands()) { + if (CLI->supportSwiftError() && isSwiftError(Arg)) { + assert(SwiftInVReg == 0 && "Expected only one swift error argument"); + LLT Ty = getLLTForType(*Arg->getType(), *DL); + SwiftInVReg = MRI->createGenericVirtualRegister(Ty); + MIRBuilder.buildCopy(SwiftInVReg, SwiftError.getOrCreateVRegUseAt( + &CI, &MIRBuilder.getMBB(), Arg)); + Args.emplace_back(makeArrayRef(SwiftInVReg)); + SwiftErrorVReg = + SwiftError.getOrCreateVRegDefAt(&CI, &MIRBuilder.getMBB(), Arg); + continue; + } + Args.push_back(getOrCreateVRegs(*Arg)); + } MF->getFrameInfo().setHasCalls(true); - bool Success = CLI->lowerCall(MIRBuilder, &CI, Res, Args, [&]() { - return getOrCreateVReg(*CI.getCalledValue()); - }); + bool Success = + CLI->lowerCall(MIRBuilder, &CI, Res, Args, SwiftErrorVReg, + [&]() { return getOrCreateVReg(*CI.getCalledValue()); }); - if (IsSplitType) - unpackRegs(CI, Res, MIRBuilder); return Success; } @@ -1151,35 +1604,39 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { if (translateKnownIntrinsic(CI, ID, MIRBuilder)) return true; - unsigned Res = 0; - if (!CI.getType()->isVoidTy()) { - if (IsSplitType) - Res = - MRI->createGenericVirtualRegister(getLLTForType(*CI.getType(), *DL)); - else - Res = getOrCreateVReg(CI); - } + ArrayRef<Register> ResultRegs; + if (!CI.getType()->isVoidTy()) + ResultRegs = getOrCreateVRegs(CI); + + // Ignore the callsite attributes. Backend code is most likely not expecting + // an intrinsic to sometimes have side effects and sometimes not. MachineInstrBuilder MIB = - MIRBuilder.buildIntrinsic(ID, Res, !CI.doesNotAccessMemory()); + MIRBuilder.buildIntrinsic(ID, ResultRegs, !F->doesNotAccessMemory()); + if (isa<FPMathOperator>(CI)) + MIB->copyIRFlags(CI); for (auto &Arg : CI.arg_operands()) { // Some intrinsics take metadata parameters. Reject them. if (isa<MetadataAsValue>(Arg)) return false; - MIB.addUse(packRegs(*Arg, MIRBuilder)); + ArrayRef<Register> VRegs = getOrCreateVRegs(*Arg); + if (VRegs.size() > 1) + return false; + MIB.addUse(VRegs[0]); } - if (IsSplitType) - unpackRegs(CI, Res, MIRBuilder); - // Add a MachineMemOperand if it is a target mem intrinsic. const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering(); TargetLowering::IntrinsicInfo Info; // TODO: Add a GlobalISel version of getTgtMemIntrinsic. if (TLI.getTgtMemIntrinsic(Info, CI, *MF, ID)) { + unsigned Align = Info.align; + if (Align == 0) + Align = DL->getABITypeAlignment(Info.memVT.getTypeForEVT(F->getContext())); + uint64_t Size = Info.memVT.getStoreSize(); MIB.addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Info.ptrVal), - Info.flags, Size, Info.align)); + Info.flags, Size, Align)); } return true; @@ -1215,18 +1672,32 @@ bool IRTranslator::translateInvoke(const User &U, MCSymbol *BeginSymbol = Context.createTempSymbol(); MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol); - unsigned Res = - MRI->createGenericVirtualRegister(getLLTForType(*I.getType(), *DL)); - SmallVector<unsigned, 8> Args; - for (auto &Arg: I.arg_operands()) - Args.push_back(packRegs(*Arg, MIRBuilder)); + ArrayRef<Register> Res; + if (!I.getType()->isVoidTy()) + Res = getOrCreateVRegs(I); + SmallVector<ArrayRef<Register>, 8> Args; + Register SwiftErrorVReg = 0; + Register SwiftInVReg = 0; + for (auto &Arg : I.arg_operands()) { + if (CLI->supportSwiftError() && isSwiftError(Arg)) { + assert(SwiftInVReg == 0 && "Expected only one swift error argument"); + LLT Ty = getLLTForType(*Arg->getType(), *DL); + SwiftInVReg = MRI->createGenericVirtualRegister(Ty); + MIRBuilder.buildCopy(SwiftInVReg, SwiftError.getOrCreateVRegUseAt( + &I, &MIRBuilder.getMBB(), Arg)); + Args.push_back(makeArrayRef(SwiftInVReg)); + SwiftErrorVReg = + SwiftError.getOrCreateVRegDefAt(&I, &MIRBuilder.getMBB(), Arg); + continue; + } + + Args.push_back(getOrCreateVRegs(*Arg)); + } - if (!CLI->lowerCall(MIRBuilder, &I, Res, Args, + if (!CLI->lowerCall(MIRBuilder, &I, Res, Args, SwiftErrorVReg, [&]() { return getOrCreateVReg(*I.getCalledValue()); })) return false; - unpackRegs(I, Res, MIRBuilder); - MCSymbol *EndSymbol = Context.createTempSymbol(); MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol); @@ -1241,6 +1712,12 @@ bool IRTranslator::translateInvoke(const User &U, return true; } +bool IRTranslator::translateCallBr(const User &U, + MachineIRBuilder &MIRBuilder) { + // FIXME: Implement this. + return false; +} + bool IRTranslator::translateLandingPad(const User &U, MachineIRBuilder &MIRBuilder) { const LandingPadInst &LP = cast<LandingPadInst>(U); @@ -1270,7 +1747,7 @@ bool IRTranslator::translateLandingPad(const User &U, .addSym(MF->addLandingPad(&MBB)); LLT Ty = getLLTForType(*LP.getType(), *DL); - unsigned Undef = MRI->createGenericVirtualRegister(Ty); + Register Undef = MRI->createGenericVirtualRegister(Ty); MIRBuilder.buildUndef(Undef); SmallVector<LLT, 2> Tys; @@ -1279,20 +1756,20 @@ bool IRTranslator::translateLandingPad(const User &U, assert(Tys.size() == 2 && "Only two-valued landingpads are supported"); // Mark exception register as live in. - unsigned ExceptionReg = TLI.getExceptionPointerRegister(PersonalityFn); + Register ExceptionReg = TLI.getExceptionPointerRegister(PersonalityFn); if (!ExceptionReg) return false; MBB.addLiveIn(ExceptionReg); - ArrayRef<unsigned> ResRegs = getOrCreateVRegs(LP); + ArrayRef<Register> ResRegs = getOrCreateVRegs(LP); MIRBuilder.buildCopy(ResRegs[0], ExceptionReg); - unsigned SelectorReg = TLI.getExceptionSelectorRegister(PersonalityFn); + Register SelectorReg = TLI.getExceptionSelectorRegister(PersonalityFn); if (!SelectorReg) return false; MBB.addLiveIn(SelectorReg); - unsigned PtrVReg = MRI->createGenericVirtualRegister(Tys[0]); + Register PtrVReg = MRI->createGenericVirtualRegister(Tys[0]); MIRBuilder.buildCopy(PtrVReg, SelectorReg); MIRBuilder.buildCast(ResRegs[1], PtrVReg); @@ -1304,10 +1781,10 @@ bool IRTranslator::translateAlloca(const User &U, auto &AI = cast<AllocaInst>(U); if (AI.isSwiftError()) - return false; + return true; if (AI.isStaticAlloca()) { - unsigned Res = getOrCreateVReg(AI); + Register Res = getOrCreateVReg(AI); int FI = getOrCreateFrameIndex(AI); MIRBuilder.buildFrameIndex(Res, FI); return true; @@ -1322,29 +1799,29 @@ bool IRTranslator::translateAlloca(const User &U, unsigned Align = std::max((unsigned)DL->getPrefTypeAlignment(Ty), AI.getAlignment()); - unsigned NumElts = getOrCreateVReg(*AI.getArraySize()); + Register NumElts = getOrCreateVReg(*AI.getArraySize()); Type *IntPtrIRTy = DL->getIntPtrType(AI.getType()); LLT IntPtrTy = getLLTForType(*IntPtrIRTy, *DL); if (MRI->getType(NumElts) != IntPtrTy) { - unsigned ExtElts = MRI->createGenericVirtualRegister(IntPtrTy); + Register ExtElts = MRI->createGenericVirtualRegister(IntPtrTy); MIRBuilder.buildZExtOrTrunc(ExtElts, NumElts); NumElts = ExtElts; } - unsigned AllocSize = MRI->createGenericVirtualRegister(IntPtrTy); - unsigned TySize = + Register AllocSize = MRI->createGenericVirtualRegister(IntPtrTy); + Register TySize = getOrCreateVReg(*ConstantInt::get(IntPtrIRTy, -DL->getTypeAllocSize(Ty))); MIRBuilder.buildMul(AllocSize, NumElts, TySize); LLT PtrTy = getLLTForType(*AI.getType(), *DL); auto &TLI = *MF->getSubtarget().getTargetLowering(); - unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore(); + Register SPReg = TLI.getStackPointerRegisterToSaveRestore(); - unsigned SPTmp = MRI->createGenericVirtualRegister(PtrTy); + Register SPTmp = MRI->createGenericVirtualRegister(PtrTy); MIRBuilder.buildCopy(SPTmp, SPReg); - unsigned AllocTmp = MRI->createGenericVirtualRegister(PtrTy); + Register AllocTmp = MRI->createGenericVirtualRegister(PtrTy); MIRBuilder.buildGEP(AllocTmp, SPTmp, AllocSize); // Handle alignment. We have to realign if the allocation granule was smaller @@ -1357,7 +1834,7 @@ bool IRTranslator::translateAlloca(const User &U, // Round the size of the allocation up to the stack alignment size // by add SA-1 to the size. This doesn't overflow because we're computing // an address inside an alloca. - unsigned AlignedAlloc = MRI->createGenericVirtualRegister(PtrTy); + Register AlignedAlloc = MRI->createGenericVirtualRegister(PtrTy); MIRBuilder.buildPtrMask(AlignedAlloc, AllocTmp, Log2_32(Align)); AllocTmp = AlignedAlloc; } @@ -1387,7 +1864,7 @@ bool IRTranslator::translateInsertElement(const User &U, // If it is a <1 x Ty> vector, use the scalar as it is // not a legal vector type in LLT. if (U.getType()->getVectorNumElements() == 1) { - unsigned Elt = getOrCreateVReg(*U.getOperand(1)); + Register Elt = getOrCreateVReg(*U.getOperand(1)); auto &Regs = *VMap.getVRegs(U); if (Regs.empty()) { Regs.push_back(Elt); @@ -1398,10 +1875,10 @@ bool IRTranslator::translateInsertElement(const User &U, return true; } - unsigned Res = getOrCreateVReg(U); - unsigned Val = getOrCreateVReg(*U.getOperand(0)); - unsigned Elt = getOrCreateVReg(*U.getOperand(1)); - unsigned Idx = getOrCreateVReg(*U.getOperand(2)); + Register Res = getOrCreateVReg(U); + Register Val = getOrCreateVReg(*U.getOperand(0)); + Register Elt = getOrCreateVReg(*U.getOperand(1)); + Register Idx = getOrCreateVReg(*U.getOperand(2)); MIRBuilder.buildInsertVectorElement(Res, Val, Elt, Idx); return true; } @@ -1411,7 +1888,7 @@ bool IRTranslator::translateExtractElement(const User &U, // If it is a <1 x Ty> vector, use the scalar as it is // not a legal vector type in LLT. if (U.getOperand(0)->getType()->getVectorNumElements() == 1) { - unsigned Elt = getOrCreateVReg(*U.getOperand(0)); + Register Elt = getOrCreateVReg(*U.getOperand(0)); auto &Regs = *VMap.getVRegs(U); if (Regs.empty()) { Regs.push_back(Elt); @@ -1421,11 +1898,11 @@ bool IRTranslator::translateExtractElement(const User &U, } return true; } - unsigned Res = getOrCreateVReg(U); - unsigned Val = getOrCreateVReg(*U.getOperand(0)); + Register Res = getOrCreateVReg(U); + Register Val = getOrCreateVReg(*U.getOperand(0)); const auto &TLI = *MF->getSubtarget().getTargetLowering(); unsigned PreferredVecIdxWidth = TLI.getVectorIdxTy(*DL).getSizeInBits(); - unsigned Idx = 0; + Register Idx; if (auto *CI = dyn_cast<ConstantInt>(U.getOperand(1))) { if (CI->getBitWidth() != PreferredVecIdxWidth) { APInt NewIdx = CI->getValue().sextOrTrunc(PreferredVecIdxWidth); @@ -1481,11 +1958,11 @@ bool IRTranslator::translateAtomicCmpXchg(const User &U, Type *ValType = ResType->Type::getStructElementType(0); auto Res = getOrCreateVRegs(I); - unsigned OldValRes = Res[0]; - unsigned SuccessRes = Res[1]; - unsigned Addr = getOrCreateVReg(*I.getPointerOperand()); - unsigned Cmp = getOrCreateVReg(*I.getCompareOperand()); - unsigned NewVal = getOrCreateVReg(*I.getNewValOperand()); + Register OldValRes = Res[0]; + Register SuccessRes = Res[1]; + Register Addr = getOrCreateVReg(*I.getPointerOperand()); + Register Cmp = getOrCreateVReg(*I.getCompareOperand()); + Register NewVal = getOrCreateVReg(*I.getNewValOperand()); MIRBuilder.buildAtomicCmpXchgWithSuccess( OldValRes, SuccessRes, Addr, Cmp, NewVal, @@ -1507,9 +1984,9 @@ bool IRTranslator::translateAtomicRMW(const User &U, Type *ResType = I.getType(); - unsigned Res = getOrCreateVReg(I); - unsigned Addr = getOrCreateVReg(*I.getPointerOperand()); - unsigned Val = getOrCreateVReg(*I.getValOperand()); + Register Res = getOrCreateVReg(I); + Register Addr = getOrCreateVReg(*I.getPointerOperand()); + Register Val = getOrCreateVReg(*I.getValOperand()); unsigned Opcode = 0; switch (I.getOperation()) { @@ -1560,6 +2037,14 @@ bool IRTranslator::translateAtomicRMW(const User &U, return true; } +bool IRTranslator::translateFence(const User &U, + MachineIRBuilder &MIRBuilder) { + const FenceInst &Fence = cast<FenceInst>(U); + MIRBuilder.buildFence(static_cast<unsigned>(Fence.getOrdering()), + Fence.getSyncScopeID()); + return true; +} + void IRTranslator::finishPendingPhis() { #ifndef NDEBUG DILocationVerifier Verifier; @@ -1569,27 +2054,20 @@ void IRTranslator::finishPendingPhis() { for (auto &Phi : PendingPHIs) { const PHINode *PI = Phi.first; ArrayRef<MachineInstr *> ComponentPHIs = Phi.second; + MachineBasicBlock *PhiMBB = ComponentPHIs[0]->getParent(); EntryBuilder->setDebugLoc(PI->getDebugLoc()); #ifndef NDEBUG Verifier.setCurrentInst(PI); #endif // ifndef NDEBUG - // All MachineBasicBlocks exist, add them to the PHI. We assume IRTranslator - // won't create extra control flow here, otherwise we need to find the - // dominating predecessor here (or perhaps force the weirder IRTranslators - // to provide a simple boundary). - SmallSet<const BasicBlock *, 4> HandledPreds; - + SmallSet<const MachineBasicBlock *, 16> SeenPreds; for (unsigned i = 0; i < PI->getNumIncomingValues(); ++i) { auto IRPred = PI->getIncomingBlock(i); - if (HandledPreds.count(IRPred)) - continue; - - HandledPreds.insert(IRPred); - ArrayRef<unsigned> ValRegs = getOrCreateVRegs(*PI->getIncomingValue(i)); + ArrayRef<Register> ValRegs = getOrCreateVRegs(*PI->getIncomingValue(i)); for (auto Pred : getMachinePredBBs({IRPred, PI->getParent()})) { - assert(Pred->isSuccessor(ComponentPHIs[0]->getParent()) && - "incorrect CFG at MachineBasicBlock level"); + if (SeenPreds.count(Pred) || !PhiMBB->isPredecessor(Pred)) + continue; + SeenPreds.insert(Pred); for (unsigned j = 0; j < ValRegs.size(); ++j) { MachineInstrBuilder MIB(*MF, ComponentPHIs[j]); MIB.addUse(ValRegs[j]); @@ -1611,8 +2089,15 @@ bool IRTranslator::valueIsSplit(const Value &V, bool IRTranslator::translate(const Instruction &Inst) { CurBuilder->setDebugLoc(Inst.getDebugLoc()); - EntryBuilder->setDebugLoc(Inst.getDebugLoc()); - switch(Inst.getOpcode()) { + // We only emit constants into the entry block from here. To prevent jumpy + // debug behaviour set the line to 0. + if (const DebugLoc &DL = Inst.getDebugLoc()) + EntryBuilder->setDebugLoc( + DebugLoc::get(0, 0, DL.getScope(), DL.getInlinedAt())); + else + EntryBuilder->setDebugLoc(DebugLoc()); + + switch (Inst.getOpcode()) { #define HANDLE_INST(NUM, OPCODE, CLASS) \ case Instruction::OPCODE: \ return translate##OPCODE(Inst, *CurBuilder.get()); @@ -1622,7 +2107,7 @@ bool IRTranslator::translate(const Instruction &Inst) { } } -bool IRTranslator::translate(const Constant &C, unsigned Reg) { +bool IRTranslator::translate(const Constant &C, Register Reg) { if (auto CI = dyn_cast<ConstantInt>(&C)) EntryBuilder->buildConstant(Reg, *CI); else if (auto CF = dyn_cast<ConstantFP>(&C)) @@ -1635,7 +2120,7 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) { unsigned NullSize = DL->getTypeSizeInBits(C.getType()); auto *ZeroTy = Type::getIntNTy(C.getContext(), NullSize); auto *ZeroVal = ConstantInt::get(ZeroTy, 0); - unsigned ZeroReg = getOrCreateVReg(*ZeroVal); + Register ZeroReg = getOrCreateVReg(*ZeroVal); EntryBuilder->buildCast(Reg, ZeroReg); } else if (auto GV = dyn_cast<GlobalValue>(&C)) EntryBuilder->buildGlobalValue(Reg, GV); @@ -1645,7 +2130,7 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) { // Return the scalar if it is a <1 x Ty> vector. if (CAZ->getNumElements() == 1) return translate(*CAZ->getElementValue(0u), Reg); - SmallVector<unsigned, 4> Ops; + SmallVector<Register, 4> Ops; for (unsigned i = 0; i < CAZ->getNumElements(); ++i) { Constant &Elt = *CAZ->getElementValue(i); Ops.push_back(getOrCreateVReg(Elt)); @@ -1655,7 +2140,7 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) { // Return the scalar if it is a <1 x Ty> vector. if (CV->getNumElements() == 1) return translate(*CV->getElementAsConstant(0), Reg); - SmallVector<unsigned, 4> Ops; + SmallVector<Register, 4> Ops; for (unsigned i = 0; i < CV->getNumElements(); ++i) { Constant &Elt = *CV->getElementAsConstant(i); Ops.push_back(getOrCreateVReg(Elt)); @@ -1673,7 +2158,7 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) { } else if (auto CV = dyn_cast<ConstantVector>(&C)) { if (CV->getNumOperands() == 1) return translate(*CV->getOperand(0), Reg); - SmallVector<unsigned, 4> Ops; + SmallVector<Register, 4> Ops; for (unsigned i = 0; i < CV->getNumOperands(); ++i) { Ops.push_back(getOrCreateVReg(*CV->getOperand(i))); } @@ -1686,6 +2171,17 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) { return true; } +void IRTranslator::finalizeBasicBlock() { + for (auto &JTCase : SL->JTCases) { + // Emit header first, if it wasn't already emitted. + if (!JTCase.first.Emitted) + emitJumpTableHeader(JTCase.second, JTCase.first, JTCase.first.HeaderBB); + + emitJumpTable(JTCase.second, JTCase.second.MBB); + } + SL->JTCases.clear(); +} + void IRTranslator::finalizeFunction() { // Release the memory used by the different maps we // needed during the translation. @@ -1698,6 +2194,7 @@ void IRTranslator::finalizeFunction() { // destroying it twice (in ~IRTranslator() and ~LLVMContext()) EntryBuilder.reset(); CurBuilder.reset(); + FuncInfo.clear(); } bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { @@ -1710,13 +2207,13 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { // Set the CSEConfig and run the analysis. GISelCSEInfo *CSEInfo = nullptr; TPC = &getAnalysis<TargetPassConfig>(); - bool IsO0 = TPC->getOptLevel() == CodeGenOpt::Level::None; - // Disable CSE for O0. - bool EnableCSE = !IsO0 && EnableCSEInIRTranslator; + bool EnableCSE = EnableCSEInIRTranslator.getNumOccurrences() + ? EnableCSEInIRTranslator + : TPC->isGISelCSEEnabled(); + if (EnableCSE) { EntryBuilder = make_unique<CSEMIRBuilder>(CurMF); - std::unique_ptr<CSEConfig> Config = make_unique<CSEConfig>(); - CSEInfo = &Wrapper.get(std::move(Config)); + CSEInfo = &Wrapper.get(TPC->getCSEConfig()); EntryBuilder->setCSEInfo(CSEInfo); CurBuilder = make_unique<CSEMIRBuilder>(CurMF); CurBuilder->setCSEInfo(CSEInfo); @@ -1730,6 +2227,14 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { MRI = &MF->getRegInfo(); DL = &F.getParent()->getDataLayout(); ORE = llvm::make_unique<OptimizationRemarkEmitter>(&F); + FuncInfo.MF = MF; + FuncInfo.BPI = nullptr; + const auto &TLI = *MF->getSubtarget().getTargetLowering(); + const TargetMachine &TM = MF->getTarget(); + SL = make_unique<GISelSwitchLowering>(this, FuncInfo); + SL->init(TLI, TM, *DL); + + EnableOpts = TM.getOptLevel() != CodeGenOpt::None && !skipFunction(F); assert(PendingPHIs.empty() && "stale PHIs"); @@ -1749,6 +2254,10 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { MF->push_back(EntryBB); EntryBuilder->setMBB(*EntryBB); + DebugLoc DbgLoc = F.getEntryBlock().getFirstNonPHI()->getDebugLoc(); + SwiftError.setFunction(CurMF); + SwiftError.createEntriesInEntryBlock(DbgLoc); + // Create all blocks, in IR order, to preserve the layout. for (const BasicBlock &BB: F) { auto *&MBB = BBToMBB[&BB]; @@ -1764,20 +2273,25 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { EntryBB->addSuccessor(&getMBB(F.front())); // Lower the actual args into this basic block. - SmallVector<unsigned, 8> VRegArgs; + SmallVector<ArrayRef<Register>, 8> VRegArgs; for (const Argument &Arg: F.args()) { if (DL->getTypeStoreSize(Arg.getType()) == 0) continue; // Don't handle zero sized types. - VRegArgs.push_back( - MRI->createGenericVirtualRegister(getLLTForType(*Arg.getType(), *DL))); + ArrayRef<Register> VRegs = getOrCreateVRegs(Arg); + VRegArgs.push_back(VRegs); + + if (Arg.hasSwiftErrorAttr()) { + assert(VRegs.size() == 1 && "Too many vregs for Swift error"); + SwiftError.setCurrentVReg(EntryBB, SwiftError.getFunctionArg(), VRegs[0]); + } } // We don't currently support translating swifterror or swiftself functions. for (auto &Arg : F.args()) { - if (Arg.hasSwiftErrorAttr() || Arg.hasSwiftSelfAttr()) { + if (Arg.hasSwiftSelfAttr()) { OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", F.getSubprogram(), &F.getEntryBlock()); - R << "unable to lower arguments due to swifterror/swiftself: " + R << "unable to lower arguments due to swiftself: " << ore::NV("Prototype", F.getType()); reportTranslationError(*MF, *TPC, *ORE, R); return false; @@ -1792,20 +2306,6 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { return false; } - auto ArgIt = F.arg_begin(); - for (auto &VArg : VRegArgs) { - // If the argument is an unsplit scalar then don't use unpackRegs to avoid - // creating redundant copies. - if (!valueIsSplit(*ArgIt, VMap.getOffsets(*ArgIt))) { - auto &VRegs = *VMap.getVRegs(cast<Value>(*ArgIt)); - assert(VRegs.empty() && "VRegs already populated?"); - VRegs.push_back(VArg); - } else { - unpackRegs(*ArgIt, VArg, *EntryBuilder.get()); - } - ArgIt++; - } - // Need to visit defs before uses when translating instructions. GISelObserverWrapper WrapperObserver; if (EnableCSE && CSEInfo) @@ -1845,6 +2345,8 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { reportTranslationError(*MF, *TPC, *ORE, R); return false; } + + finalizeBasicBlock(); } #ifndef NDEBUG WrapperObserver.removeObserver(&Verifier); @@ -1853,6 +2355,8 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { finishPendingPhis(); + SwiftError.propagateVRegs(); + // Merge the argument lowering and constants block with its single // successor, the LLVM-IR entry block. We want the basic block to // be maximal. diff --git a/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/lib/CodeGen/GlobalISel/InstructionSelect.cpp index c83c791327e4..70694fe6b6c8 100644 --- a/lib/CodeGen/GlobalISel/InstructionSelect.cpp +++ b/lib/CodeGen/GlobalISel/InstructionSelect.cpp @@ -1,9 +1,8 @@ //===- llvm/CodeGen/GlobalISel/InstructionSelect.cpp - InstructionSelect ---==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file @@ -50,9 +49,7 @@ INITIALIZE_PASS_END(InstructionSelect, DEBUG_TYPE, "Select target instructions out of generic instructions", false, false) -InstructionSelect::InstructionSelect() : MachineFunctionPass(ID) { - initializeInstructionSelectPass(*PassRegistry::getPassRegistry()); -} +InstructionSelect::InstructionSelect() : MachineFunctionPass(ID) { } void InstructionSelect::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<TargetPassConfig>(); @@ -90,10 +87,10 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { "instruction is not legal", *MI); return false; } -#endif // FIXME: We could introduce new blocks and will need to fix the outer loop. // Until then, keep track of the number of blocks to assert that we don't. const size_t NumBlocks = MF.size(); +#endif for (MachineBasicBlock *MBB : post_order(&MF)) { if (MBB->empty()) @@ -145,8 +142,6 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { } } - const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); - for (MachineBasicBlock &MBB : MF) { if (MBB.empty()) continue; @@ -178,6 +173,8 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { } } +#ifndef NDEBUG + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); // Now that selection is complete, there are no more generic vregs. Verify // that the size of the now-constrained vreg is unchanged and that it has a // register class. @@ -216,7 +213,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { reportGISelFailure(MF, TPC, MORE, R); return false; } - +#endif auto &TLI = *MF.getSubtarget().getTargetLowering(); TLI.finalizeLowering(MF); diff --git a/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/lib/CodeGen/GlobalISel/InstructionSelector.cpp index 38913e4afcba..2ad35b3a72c9 100644 --- a/lib/CodeGen/GlobalISel/InstructionSelector.cpp +++ b/lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -1,9 +1,8 @@ //===- llvm/CodeGen/GlobalISel/InstructionSelector.cpp --------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -42,16 +41,16 @@ bool InstructionSelector::constrainOperandRegToRegClass( MachineFunction &MF = *MBB.getParent(); MachineRegisterInfo &MRI = MF.getRegInfo(); - return - constrainRegToClass(MRI, TII, RBI, I, I.getOperand(OpIdx).getReg(), RC); + return constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, RC, + I.getOperand(OpIdx), OpIdx); } bool InstructionSelector::isOperandImmEqual( const MachineOperand &MO, int64_t Value, const MachineRegisterInfo &MRI) const { if (MO.isReg() && MO.getReg()) - if (auto VRegVal = getConstantVRegVal(MO.getReg(), MRI)) - return *VRegVal == Value; + if (auto VRegVal = getConstantVRegValWithLookThrough(MO.getReg(), MRI)) + return VRegVal->Value == Value; return false; } @@ -79,6 +78,6 @@ bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI, std::next(MI.getIterator()) == IntoMI.getIterator()) return true; - return !MI.mayLoadOrStore() && !MI.hasUnmodeledSideEffects() && - empty(MI.implicit_operands()); + return !MI.mayLoadOrStore() && !MI.mayRaiseFPException() && + !MI.hasUnmodeledSideEffects() && empty(MI.implicit_operands()); } diff --git a/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/lib/CodeGen/GlobalISel/LegalityPredicates.cpp index 94eab9ae00c8..601d50e9806f 100644 --- a/lib/CodeGen/GlobalISel/LegalityPredicates.cpp +++ b/lib/CodeGen/GlobalISel/LegalityPredicates.cpp @@ -1,9 +1,8 @@ //===- lib/CodeGen/GlobalISel/LegalizerPredicates.cpp - Predicates --------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -39,15 +38,19 @@ LegalityPredicate LegalityPredicates::typePairInSet( }; } -LegalityPredicate LegalityPredicates::typePairAndMemSizeInSet( +LegalityPredicate LegalityPredicates::typePairAndMemDescInSet( unsigned TypeIdx0, unsigned TypeIdx1, unsigned MMOIdx, - std::initializer_list<TypePairAndMemSize> TypesAndMemSizeInit) { - SmallVector<TypePairAndMemSize, 4> TypesAndMemSize = TypesAndMemSizeInit; + std::initializer_list<TypePairAndMemDesc> TypesAndMemDescInit) { + SmallVector<TypePairAndMemDesc, 4> TypesAndMemDesc = TypesAndMemDescInit; return [=](const LegalityQuery &Query) { - TypePairAndMemSize Match = {Query.Types[TypeIdx0], Query.Types[TypeIdx1], - Query.MMODescrs[MMOIdx].SizeInBits}; - return std::find(TypesAndMemSize.begin(), TypesAndMemSize.end(), Match) != - TypesAndMemSize.end(); + TypePairAndMemDesc Match = {Query.Types[TypeIdx0], Query.Types[TypeIdx1], + Query.MMODescrs[MMOIdx].SizeInBits, + Query.MMODescrs[MMOIdx].AlignInBits}; + return std::find_if( + TypesAndMemDesc.begin(), TypesAndMemDesc.end(), + [=](const TypePairAndMemDesc &Entry) ->bool { + return Match.isCompatible(Entry); + }) != TypesAndMemDesc.end(); }; } @@ -57,10 +60,30 @@ LegalityPredicate LegalityPredicates::isScalar(unsigned TypeIdx) { }; } +LegalityPredicate LegalityPredicates::isVector(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + return Query.Types[TypeIdx].isVector(); + }; +} + +LegalityPredicate LegalityPredicates::isPointer(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + return Query.Types[TypeIdx].isPointer(); + }; +} + +LegalityPredicate LegalityPredicates::isPointer(unsigned TypeIdx, + unsigned AddrSpace) { + return [=](const LegalityQuery &Query) { + LLT Ty = Query.Types[TypeIdx]; + return Ty.isPointer() && Ty.getAddressSpace() == AddrSpace; + }; +} + LegalityPredicate LegalityPredicates::narrowerThan(unsigned TypeIdx, unsigned Size) { return [=](const LegalityQuery &Query) { - const LLT &QueryTy = Query.Types[TypeIdx]; + const LLT QueryTy = Query.Types[TypeIdx]; return QueryTy.isScalar() && QueryTy.getSizeInBits() < Size; }; } @@ -68,18 +91,49 @@ LegalityPredicate LegalityPredicates::narrowerThan(unsigned TypeIdx, LegalityPredicate LegalityPredicates::widerThan(unsigned TypeIdx, unsigned Size) { return [=](const LegalityQuery &Query) { - const LLT &QueryTy = Query.Types[TypeIdx]; + const LLT QueryTy = Query.Types[TypeIdx]; return QueryTy.isScalar() && QueryTy.getSizeInBits() > Size; }; } +LegalityPredicate LegalityPredicates::scalarOrEltNarrowerThan(unsigned TypeIdx, + unsigned Size) { + return [=](const LegalityQuery &Query) { + const LLT QueryTy = Query.Types[TypeIdx]; + return QueryTy.getScalarSizeInBits() < Size; + }; +} + +LegalityPredicate LegalityPredicates::scalarOrEltWiderThan(unsigned TypeIdx, + unsigned Size) { + return [=](const LegalityQuery &Query) { + const LLT QueryTy = Query.Types[TypeIdx]; + return QueryTy.getScalarSizeInBits() > Size; + }; +} + +LegalityPredicate LegalityPredicates::scalarOrEltSizeNotPow2(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + const LLT QueryTy = Query.Types[TypeIdx]; + return !isPowerOf2_32(QueryTy.getScalarSizeInBits()); + }; +} + LegalityPredicate LegalityPredicates::sizeNotPow2(unsigned TypeIdx) { return [=](const LegalityQuery &Query) { - const LLT &QueryTy = Query.Types[TypeIdx]; + const LLT QueryTy = Query.Types[TypeIdx]; return QueryTy.isScalar() && !isPowerOf2_32(QueryTy.getSizeInBits()); }; } +LegalityPredicate LegalityPredicates::sameSize(unsigned TypeIdx0, + unsigned TypeIdx1) { + return [=](const LegalityQuery &Query) { + return Query.Types[TypeIdx0].getSizeInBits() == + Query.Types[TypeIdx1].getSizeInBits(); + }; +} + LegalityPredicate LegalityPredicates::memSizeInBytesNotPow2(unsigned MMOIdx) { return [=](const LegalityQuery &Query) { return !isPowerOf2_32(Query.MMODescrs[MMOIdx].SizeInBits / 8); @@ -88,8 +142,8 @@ LegalityPredicate LegalityPredicates::memSizeInBytesNotPow2(unsigned MMOIdx) { LegalityPredicate LegalityPredicates::numElementsNotPow2(unsigned TypeIdx) { return [=](const LegalityQuery &Query) { - const LLT &QueryTy = Query.Types[TypeIdx]; - return QueryTy.isVector() && isPowerOf2_32(QueryTy.getNumElements()); + const LLT QueryTy = Query.Types[TypeIdx]; + return QueryTy.isVector() && !isPowerOf2_32(QueryTy.getNumElements()); }; } diff --git a/lib/CodeGen/GlobalISel/LegalizeMutations.cpp b/lib/CodeGen/GlobalISel/LegalizeMutations.cpp index a29b32ecdc03..fcbecf90a845 100644 --- a/lib/CodeGen/GlobalISel/LegalizeMutations.cpp +++ b/lib/CodeGen/GlobalISel/LegalizeMutations.cpp @@ -1,9 +1,8 @@ //===- lib/CodeGen/GlobalISel/LegalizerMutations.cpp - Mutations ----------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -27,25 +26,46 @@ LegalizeMutation LegalizeMutations::changeTo(unsigned TypeIdx, }; } -LegalizeMutation LegalizeMutations::widenScalarToNextPow2(unsigned TypeIdx, - unsigned Min) { +LegalizeMutation LegalizeMutations::changeElementTo(unsigned TypeIdx, + unsigned FromTypeIdx) { return [=](const LegalityQuery &Query) { - unsigned NewSizeInBits = - 1 << Log2_32_Ceil(Query.Types[TypeIdx].getSizeInBits()); - if (NewSizeInBits < Min) - NewSizeInBits = Min; - return std::make_pair(TypeIdx, LLT::scalar(NewSizeInBits)); + const LLT OldTy = Query.Types[TypeIdx]; + const LLT NewTy = Query.Types[FromTypeIdx]; + return std::make_pair(TypeIdx, OldTy.changeElementType(NewTy)); + }; +} + +LegalizeMutation LegalizeMutations::changeElementTo(unsigned TypeIdx, + LLT NewEltTy) { + return [=](const LegalityQuery &Query) { + const LLT OldTy = Query.Types[TypeIdx]; + return std::make_pair(TypeIdx, OldTy.changeElementType(NewEltTy)); + }; +} + +LegalizeMutation LegalizeMutations::widenScalarOrEltToNextPow2(unsigned TypeIdx, + unsigned Min) { + return [=](const LegalityQuery &Query) { + const LLT Ty = Query.Types[TypeIdx]; + unsigned NewEltSizeInBits = + std::max(1u << Log2_32_Ceil(Ty.getScalarSizeInBits()), Min); + return std::make_pair(TypeIdx, Ty.changeElementSize(NewEltSizeInBits)); }; } LegalizeMutation LegalizeMutations::moreElementsToNextPow2(unsigned TypeIdx, unsigned Min) { return [=](const LegalityQuery &Query) { - const LLT &VecTy = Query.Types[TypeIdx]; - unsigned NewNumElements = 1 << Log2_32_Ceil(VecTy.getNumElements()); - if (NewNumElements < Min) - NewNumElements = Min; - return std::make_pair( - TypeIdx, LLT::vector(NewNumElements, VecTy.getScalarSizeInBits())); + const LLT VecTy = Query.Types[TypeIdx]; + unsigned NewNumElements = + std::max(1u << Log2_32_Ceil(VecTy.getNumElements()), Min); + return std::make_pair(TypeIdx, + LLT::vector(NewNumElements, VecTy.getElementType())); + }; +} + +LegalizeMutation LegalizeMutations::scalarize(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + return std::make_pair(TypeIdx, Query.Types[TypeIdx].getElementType()); }; } diff --git a/lib/CodeGen/GlobalISel/Legalizer.cpp b/lib/CodeGen/GlobalISel/Legalizer.cpp index 84131e59948c..b5b26bff34bb 100644 --- a/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -1,9 +1,8 @@ //===-- llvm/CodeGen/GlobalISel/Legalizer.cpp -----------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -28,6 +27,7 @@ #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Support/Debug.h" +#include "llvm/Target/TargetMachine.h" #include <iterator> @@ -50,9 +50,7 @@ INITIALIZE_PASS_END(Legalizer, DEBUG_TYPE, "Legalize the Machine IR a function's Machine IR", false, false) -Legalizer::Legalizer() : MachineFunctionPass(ID) { - initializeLegalizerPass(*PassRegistry::getPassRegistry()); -} +Legalizer::Legalizer() : MachineFunctionPass(ID) { } void Legalizer::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<TargetPassConfig>(); @@ -77,6 +75,7 @@ static bool isArtifact(const MachineInstr &MI) { case TargetOpcode::G_UNMERGE_VALUES: case TargetOpcode::G_CONCAT_VECTORS: case TargetOpcode::G_BUILD_VECTOR: + case TargetOpcode::G_EXTRACT: return true; } } @@ -87,12 +86,15 @@ namespace { class LegalizerWorkListManager : public GISelChangeObserver { InstListTy &InstList; ArtifactListTy &ArtifactList; +#ifndef NDEBUG + SmallVector<MachineInstr *, 4> NewMIs; +#endif public: LegalizerWorkListManager(InstListTy &Insts, ArtifactListTy &Arts) : InstList(Insts), ArtifactList(Arts) {} - void createdInstr(MachineInstr &MI) override { + void createdOrChangedInstr(MachineInstr &MI) { // Only legalize pre-isel generic instructions. // Legalization process could generate Target specific pseudo // instructions with generic types. Don't record them @@ -102,7 +104,20 @@ public: else InstList.insert(&MI); } + } + + void createdInstr(MachineInstr &MI) override { LLVM_DEBUG(dbgs() << ".. .. New MI: " << MI); + LLVM_DEBUG(NewMIs.push_back(&MI)); + createdOrChangedInstr(MI); + } + + void printNewInstrs() { + LLVM_DEBUG({ + for (const auto *MI : NewMIs) + dbgs() << ".. .. New MI: " << *MI; + NewMIs.clear(); + }); } void erasingInstr(MachineInstr &MI) override { @@ -119,7 +134,7 @@ public: // When insts change, we want to revisit them to legalize them again. // We'll consider them the same as created. LLVM_DEBUG(dbgs() << ".. .. Changed MI: " << MI); - createdInstr(MI); + createdOrChangedInstr(MI); } }; } // namespace @@ -155,20 +170,22 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { if (!isPreISelGenericOpcode(MI.getOpcode())) continue; if (isArtifact(MI)) - ArtifactList.insert(&MI); + ArtifactList.deferred_insert(&MI); else - InstList.insert(&MI); + InstList.deferred_insert(&MI); } } + ArtifactList.finalize(); + InstList.finalize(); std::unique_ptr<MachineIRBuilder> MIRBuilder; GISelCSEInfo *CSEInfo = nullptr; - bool IsO0 = TPC.getOptLevel() == CodeGenOpt::Level::None; - // Disable CSE for O0. - bool EnableCSE = !IsO0 && EnableCSEInLegalizer; + bool EnableCSE = EnableCSEInLegalizer.getNumOccurrences() + ? EnableCSEInLegalizer + : TPC.isGISelCSEEnabled(); + if (EnableCSE) { MIRBuilder = make_unique<CSEMIRBuilder>(); - std::unique_ptr<CSEConfig> Config = make_unique<CSEConfig>(); - CSEInfo = &Wrapper.get(std::move(Config)); + CSEInfo = &Wrapper.get(TPC.getCSEConfig()); MIRBuilder->setCSEInfo(CSEInfo); } else MIRBuilder = make_unique<MachineIRBuilder>(); @@ -210,6 +227,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { "unable to legalize instruction", MI); return false; } + WorkListObserver.printNewInstrs(); Changed |= Res == LegalizerHelper::Legalized; } while (!ArtifactList.empty()) { @@ -222,7 +240,9 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { continue; } SmallVector<MachineInstr *, 4> DeadInstructions; - if (ArtCombiner.tryCombineInstruction(MI, DeadInstructions)) { + if (ArtCombiner.tryCombineInstruction(MI, DeadInstructions, + WrapperObserver)) { + WorkListObserver.printNewInstrs(); for (auto *DeadMI : DeadInstructions) { LLVM_DEBUG(dbgs() << *DeadMI << "Is dead\n"); RemoveDeadInstFromLists(DeadMI); diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index b3fc94cdec60..f5cf7fc9bd9b 100644 --- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1,9 +1,8 @@ //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -30,6 +29,39 @@ using namespace llvm; using namespace LegalizeActions; +/// Try to break down \p OrigTy into \p NarrowTy sized pieces. +/// +/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy, +/// with any leftover piece as type \p LeftoverTy +/// +/// Returns -1 in the first element of the pair if the breakdown is not +/// satisfiable. +static std::pair<int, int> +getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) { + assert(!LeftoverTy.isValid() && "this is an out argument"); + + unsigned Size = OrigTy.getSizeInBits(); + unsigned NarrowSize = NarrowTy.getSizeInBits(); + unsigned NumParts = Size / NarrowSize; + unsigned LeftoverSize = Size - NumParts * NarrowSize; + assert(Size > NarrowSize); + + if (LeftoverSize == 0) + return {NumParts, 0}; + + if (NarrowTy.isVector()) { + unsigned EltSize = OrigTy.getScalarSizeInBits(); + if (LeftoverSize % EltSize != 0) + return {-1, -1}; + LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize); + } else { + LeftoverTy = LLT::scalar(LeftoverSize); + } + + int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits(); + return std::make_pair(NumParts, NumLeftover); +} + LegalizerHelper::LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &Builder) @@ -50,6 +82,10 @@ LegalizerHelper::LegalizeResult LegalizerHelper::legalizeInstrStep(MachineInstr &MI) { LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs())); + if (MI.getOpcode() == TargetOpcode::G_INTRINSIC || + MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS) + return LI.legalizeIntrinsic(MI, MRI, MIRBuilder) ? Legalized + : UnableToLegalize; auto Step = LI.getAction(MI, MRI); switch (Step.Action) { case Legal: @@ -70,6 +106,9 @@ LegalizerHelper::legalizeInstrStep(MachineInstr &MI) { case FewerElements: LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n"); return fewerElementsVector(MI, Step.TypeIdx, Step.NewType); + case MoreElements: + LLVM_DEBUG(dbgs() << ".. Increase number of elements\n"); + return moreElementsVector(MI, Step.TypeIdx, Step.NewType); case Custom: LLVM_DEBUG(dbgs() << ".. Custom legalization\n"); return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized @@ -80,13 +119,103 @@ LegalizerHelper::legalizeInstrStep(MachineInstr &MI) { } } -void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts, - SmallVectorImpl<unsigned> &VRegs) { +void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts, + SmallVectorImpl<Register> &VRegs) { for (int i = 0; i < NumParts; ++i) VRegs.push_back(MRI.createGenericVirtualRegister(Ty)); MIRBuilder.buildUnmerge(VRegs, Reg); } +bool LegalizerHelper::extractParts(Register Reg, LLT RegTy, + LLT MainTy, LLT &LeftoverTy, + SmallVectorImpl<Register> &VRegs, + SmallVectorImpl<Register> &LeftoverRegs) { + assert(!LeftoverTy.isValid() && "this is an out argument"); + + unsigned RegSize = RegTy.getSizeInBits(); + unsigned MainSize = MainTy.getSizeInBits(); + unsigned NumParts = RegSize / MainSize; + unsigned LeftoverSize = RegSize - NumParts * MainSize; + + // Use an unmerge when possible. + if (LeftoverSize == 0) { + for (unsigned I = 0; I < NumParts; ++I) + VRegs.push_back(MRI.createGenericVirtualRegister(MainTy)); + MIRBuilder.buildUnmerge(VRegs, Reg); + return true; + } + + if (MainTy.isVector()) { + unsigned EltSize = MainTy.getScalarSizeInBits(); + if (LeftoverSize % EltSize != 0) + return false; + LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize); + } else { + LeftoverTy = LLT::scalar(LeftoverSize); + } + + // For irregular sizes, extract the individual parts. + for (unsigned I = 0; I != NumParts; ++I) { + Register NewReg = MRI.createGenericVirtualRegister(MainTy); + VRegs.push_back(NewReg); + MIRBuilder.buildExtract(NewReg, Reg, MainSize * I); + } + + for (unsigned Offset = MainSize * NumParts; Offset < RegSize; + Offset += LeftoverSize) { + Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy); + LeftoverRegs.push_back(NewReg); + MIRBuilder.buildExtract(NewReg, Reg, Offset); + } + + return true; +} + +void LegalizerHelper::insertParts(Register DstReg, + LLT ResultTy, LLT PartTy, + ArrayRef<Register> PartRegs, + LLT LeftoverTy, + ArrayRef<Register> LeftoverRegs) { + if (!LeftoverTy.isValid()) { + assert(LeftoverRegs.empty()); + + if (!ResultTy.isVector()) { + MIRBuilder.buildMerge(DstReg, PartRegs); + return; + } + + if (PartTy.isVector()) + MIRBuilder.buildConcatVectors(DstReg, PartRegs); + else + MIRBuilder.buildBuildVector(DstReg, PartRegs); + return; + } + + unsigned PartSize = PartTy.getSizeInBits(); + unsigned LeftoverPartSize = LeftoverTy.getSizeInBits(); + + Register CurResultReg = MRI.createGenericVirtualRegister(ResultTy); + MIRBuilder.buildUndef(CurResultReg); + + unsigned Offset = 0; + for (Register PartReg : PartRegs) { + Register NewResultReg = MRI.createGenericVirtualRegister(ResultTy); + MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset); + CurResultReg = NewResultReg; + Offset += PartSize; + } + + for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) { + // Use the original output register for the final insert to avoid a copy. + Register NewResultReg = (I + 1 == E) ? + DstReg : MRI.createGenericVirtualRegister(ResultTy); + + MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset); + CurResultReg = NewResultReg; + Offset += LeftoverPartSize; + } +} + static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { switch (Opcode) { case TargetOpcode::G_SDIV: @@ -116,6 +245,12 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { case TargetOpcode::G_FDIV: assert((Size == 32 || Size == 64) && "Unsupported size"); return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32; + case TargetOpcode::G_FEXP: + assert((Size == 32 || Size == 64) && "Unsupported size"); + return Size == 64 ? RTLIB::EXP_F64 : RTLIB::EXP_F32; + case TargetOpcode::G_FEXP2: + assert((Size == 32 || Size == 64) && "Unsupported size"); + return Size == 64 ? RTLIB::EXP2_F64 : RTLIB::EXP2_F32; case TargetOpcode::G_FREM: return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32; case TargetOpcode::G_FPOW: @@ -123,6 +258,32 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { case TargetOpcode::G_FMA: assert((Size == 32 || Size == 64) && "Unsupported size"); return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32; + case TargetOpcode::G_FSIN: + assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); + return Size == 128 ? RTLIB::SIN_F128 + : Size == 64 ? RTLIB::SIN_F64 : RTLIB::SIN_F32; + case TargetOpcode::G_FCOS: + assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); + return Size == 128 ? RTLIB::COS_F128 + : Size == 64 ? RTLIB::COS_F64 : RTLIB::COS_F32; + case TargetOpcode::G_FLOG10: + assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); + return Size == 128 ? RTLIB::LOG10_F128 + : Size == 64 ? RTLIB::LOG10_F64 : RTLIB::LOG10_F32; + case TargetOpcode::G_FLOG: + assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); + return Size == 128 ? RTLIB::LOG_F128 + : Size == 64 ? RTLIB::LOG_F64 : RTLIB::LOG_F32; + case TargetOpcode::G_FLOG2: + assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); + return Size == 128 ? RTLIB::LOG2_F128 + : Size == 64 ? RTLIB::LOG2_F64 : RTLIB::LOG2_F32; + case TargetOpcode::G_FCEIL: + assert((Size == 32 || Size == 64) && "Unsupported size"); + return Size == 64 ? RTLIB::CEIL_F64 : RTLIB::CEIL_F32; + case TargetOpcode::G_FFLOOR: + assert((Size == 32 || Size == 64) && "Unsupported size"); + return Size == 64 ? RTLIB::FLOOR_F64 : RTLIB::FLOOR_F32; } llvm_unreachable("Unknown libcall function"); } @@ -214,7 +375,20 @@ LegalizerHelper::libcall(MachineInstr &MI) { case TargetOpcode::G_FDIV: case TargetOpcode::G_FMA: case TargetOpcode::G_FPOW: - case TargetOpcode::G_FREM: { + case TargetOpcode::G_FREM: + case TargetOpcode::G_FCOS: + case TargetOpcode::G_FSIN: + case TargetOpcode::G_FLOG10: + case TargetOpcode::G_FLOG: + case TargetOpcode::G_FLOG2: + case TargetOpcode::G_FEXP: + case TargetOpcode::G_FEXP2: + case TargetOpcode::G_FCEIL: + case TargetOpcode::G_FFLOOR: { + if (Size > 64) { + LLVM_DEBUG(dbgs() << "Size " << Size << " too large to legalize.\n"); + return UnableToLegalize; + } Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx); auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); if (Status != Legalized) @@ -250,10 +424,11 @@ LegalizerHelper::libcall(MachineInstr &MI) { // FIXME: Support other types unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); - if (ToSize != 32 || (FromSize != 32 && FromSize != 64)) + if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64)) return UnableToLegalize; LegalizeResult Status = conversionLibcall( - MI, MIRBuilder, Type::getInt32Ty(Ctx), + MI, MIRBuilder, + ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx), FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx)); if (Status != Legalized) return Status; @@ -264,12 +439,12 @@ LegalizerHelper::libcall(MachineInstr &MI) { // FIXME: Support other types unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); - if (FromSize != 32 || (ToSize != 32 && ToSize != 64)) + if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64)) return UnableToLegalize; LegalizeResult Status = conversionLibcall( MI, MIRBuilder, ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx), - Type::getInt32Ty(Ctx)); + FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx)); if (Status != Legalized) return Status; break; @@ -283,10 +458,6 @@ LegalizerHelper::libcall(MachineInstr &MI) { LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { - // FIXME: Don't know how to handle secondary types yet. - if (TypeIdx != 0 && MI.getOpcode() != TargetOpcode::G_EXTRACT) - return UnableToLegalize; - MIRBuilder.setInstr(MI); uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); @@ -302,12 +473,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, return UnableToLegalize; int NumParts = SizeOp0 / NarrowSize; - SmallVector<unsigned, 2> DstRegs; + SmallVector<Register, 2> DstRegs; for (int i = 0; i < NumParts; ++i) DstRegs.push_back( MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg()); - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); if(MRI.getType(DstReg).isVector()) MIRBuilder.buildBuildVector(DstReg, DstRegs); else @@ -315,6 +486,38 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_CONSTANT: { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + const APInt &Val = MI.getOperand(1).getCImm()->getValue(); + unsigned TotalSize = Ty.getSizeInBits(); + unsigned NarrowSize = NarrowTy.getSizeInBits(); + int NumParts = TotalSize / NarrowSize; + + SmallVector<Register, 4> PartRegs; + for (int I = 0; I != NumParts; ++I) { + unsigned Offset = I * NarrowSize; + auto K = MIRBuilder.buildConstant(NarrowTy, + Val.lshr(Offset).trunc(NarrowSize)); + PartRegs.push_back(K.getReg(0)); + } + + LLT LeftoverTy; + unsigned LeftoverBits = TotalSize - NumParts * NarrowSize; + SmallVector<Register, 1> LeftoverRegs; + if (LeftoverBits != 0) { + LeftoverTy = LLT::scalar(LeftoverBits); + auto K = MIRBuilder.buildConstant( + LeftoverTy, + Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits)); + LeftoverRegs.push_back(K.getReg(0)); + } + + insertParts(MI.getOperand(0).getReg(), + Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs); + + MI.eraseFromParent(); + return Legalized; + } case TargetOpcode::G_ADD: { // FIXME: add support for when SizeOp0 isn't an exact multiple of // NarrowSize. @@ -323,16 +526,16 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, // Expand in terms of carry-setting/consuming G_ADDE instructions. int NumParts = SizeOp0 / NarrowTy.getSizeInBits(); - SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; + SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs; extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); - unsigned CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1)); + Register CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1)); MIRBuilder.buildConstant(CarryIn, 0); for (int i = 0; i < NumParts; ++i) { - unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); - unsigned CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); + Register DstReg = MRI.createGenericVirtualRegister(NarrowTy); + Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i], Src2Regs[i], CarryIn); @@ -340,67 +543,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, DstRegs.push_back(DstReg); CarryIn = CarryOut; } - unsigned DstReg = MI.getOperand(0).getReg(); - if(MRI.getType(DstReg).isVector()) - MIRBuilder.buildBuildVector(DstReg, DstRegs); - else - MIRBuilder.buildMerge(DstReg, DstRegs); - MI.eraseFromParent(); - return Legalized; - } - case TargetOpcode::G_EXTRACT: { - if (TypeIdx != 1) - return UnableToLegalize; - - int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); - // FIXME: add support for when SizeOp1 isn't an exact multiple of - // NarrowSize. - if (SizeOp1 % NarrowSize != 0) - return UnableToLegalize; - int NumParts = SizeOp1 / NarrowSize; - - SmallVector<unsigned, 2> SrcRegs, DstRegs; - SmallVector<uint64_t, 2> Indexes; - extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); - - unsigned OpReg = MI.getOperand(0).getReg(); - uint64_t OpStart = MI.getOperand(2).getImm(); - uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); - for (int i = 0; i < NumParts; ++i) { - unsigned SrcStart = i * NarrowSize; - - if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) { - // No part of the extract uses this subregister, ignore it. - continue; - } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) { - // The entire subregister is extracted, forward the value. - DstRegs.push_back(SrcRegs[i]); - continue; - } - - // OpSegStart is where this destination segment would start in OpReg if it - // extended infinitely in both directions. - int64_t ExtractOffset; - uint64_t SegSize; - if (OpStart < SrcStart) { - ExtractOffset = 0; - SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart); - } else { - ExtractOffset = OpStart - SrcStart; - SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize); - } - - unsigned SegReg = SrcRegs[i]; - if (ExtractOffset != 0 || SegSize != NarrowSize) { - // A genuine extract is needed. - SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); - MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset); - } - - DstRegs.push_back(SegReg); - } - - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); if(MRI.getType(DstReg).isVector()) MIRBuilder.buildBuildVector(DstReg, DstRegs); else @@ -408,178 +551,117 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, MI.eraseFromParent(); return Legalized; } - case TargetOpcode::G_INSERT: { + case TargetOpcode::G_SUB: { // FIXME: add support for when SizeOp0 isn't an exact multiple of // NarrowSize. if (SizeOp0 % NarrowSize != 0) return UnableToLegalize; - int NumParts = SizeOp0 / NarrowSize; - - SmallVector<unsigned, 2> SrcRegs, DstRegs; - SmallVector<uint64_t, 2> Indexes; - extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); + int NumParts = SizeOp0 / NarrowTy.getSizeInBits(); - unsigned OpReg = MI.getOperand(2).getReg(); - uint64_t OpStart = MI.getOperand(3).getImm(); - uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); - for (int i = 0; i < NumParts; ++i) { - unsigned DstStart = i * NarrowSize; - - if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) { - // No part of the insert affects this subregister, forward the original. - DstRegs.push_back(SrcRegs[i]); - continue; - } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) { - // The entire subregister is defined by this insert, forward the new - // value. - DstRegs.push_back(OpReg); - continue; - } + SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs; + extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); + extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); - // OpSegStart is where this destination segment would start in OpReg if it - // extended infinitely in both directions. - int64_t ExtractOffset, InsertOffset; - uint64_t SegSize; - if (OpStart < DstStart) { - InsertOffset = 0; - ExtractOffset = DstStart - OpStart; - SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart); - } else { - InsertOffset = OpStart - DstStart; - ExtractOffset = 0; - SegSize = - std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart); - } + Register DstReg = MRI.createGenericVirtualRegister(NarrowTy); + Register BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); + MIRBuilder.buildInstr(TargetOpcode::G_USUBO, {DstReg, BorrowOut}, + {Src1Regs[0], Src2Regs[0]}); + DstRegs.push_back(DstReg); + Register BorrowIn = BorrowOut; + for (int i = 1; i < NumParts; ++i) { + DstReg = MRI.createGenericVirtualRegister(NarrowTy); + BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); - unsigned SegReg = OpReg; - if (ExtractOffset != 0 || SegSize != OpSize) { - // A genuine extract is needed. - SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); - MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset); - } + MIRBuilder.buildInstr(TargetOpcode::G_USUBE, {DstReg, BorrowOut}, + {Src1Regs[i], Src2Regs[i], BorrowIn}); - unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); - MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset); DstRegs.push_back(DstReg); + BorrowIn = BorrowOut; } - - assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered"); - unsigned DstReg = MI.getOperand(0).getReg(); - if(MRI.getType(DstReg).isVector()) - MIRBuilder.buildBuildVector(DstReg, DstRegs); - else - MIRBuilder.buildMerge(DstReg, DstRegs); + MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_MUL: + case TargetOpcode::G_UMULH: + return narrowScalarMul(MI, NarrowTy); + case TargetOpcode::G_EXTRACT: + return narrowScalarExtract(MI, TypeIdx, NarrowTy); + case TargetOpcode::G_INSERT: + return narrowScalarInsert(MI, TypeIdx, NarrowTy); case TargetOpcode::G_LOAD: { - // FIXME: add support for when SizeOp0 isn't an exact multiple of - // NarrowSize. - if (SizeOp0 % NarrowSize != 0) - return UnableToLegalize; - const auto &MMO = **MI.memoperands_begin(); - // This implementation doesn't work for atomics. Give up instead of doing - // something invalid. - if (MMO.getOrdering() != AtomicOrdering::NotAtomic || - MMO.getFailureOrdering() != AtomicOrdering::NotAtomic) + Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + if (DstTy.isVector()) return UnableToLegalize; - int NumParts = SizeOp0 / NarrowSize; - LLT OffsetTy = LLT::scalar( - MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); - - SmallVector<unsigned, 2> DstRegs; - for (int i = 0; i < NumParts; ++i) { - unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); - unsigned SrcReg = 0; - unsigned Adjustment = i * NarrowSize / 8; - unsigned Alignment = MinAlign(MMO.getAlignment(), Adjustment); - - MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand( - MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(), - NarrowSize / 8, Alignment, MMO.getAAInfo(), MMO.getRanges(), - MMO.getSyncScopeID(), MMO.getOrdering(), MMO.getFailureOrdering()); - - MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy, - Adjustment); + if (8 * MMO.getSize() != DstTy.getSizeInBits()) { + Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); + auto &MMO = **MI.memoperands_begin(); + MIRBuilder.buildLoad(TmpReg, MI.getOperand(1).getReg(), MMO); + MIRBuilder.buildAnyExt(DstReg, TmpReg); + MI.eraseFromParent(); + return Legalized; + } - MIRBuilder.buildLoad(DstReg, SrcReg, *SplitMMO); + return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy); + } + case TargetOpcode::G_ZEXTLOAD: + case TargetOpcode::G_SEXTLOAD: { + bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD; + Register DstReg = MI.getOperand(0).getReg(); + Register PtrReg = MI.getOperand(1).getReg(); - DstRegs.push_back(DstReg); + Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); + auto &MMO = **MI.memoperands_begin(); + if (MMO.getSizeInBits() == NarrowSize) { + MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); + } else { + unsigned ExtLoad = ZExt ? TargetOpcode::G_ZEXTLOAD + : TargetOpcode::G_SEXTLOAD; + MIRBuilder.buildInstr(ExtLoad) + .addDef(TmpReg) + .addUse(PtrReg) + .addMemOperand(&MMO); } - unsigned DstReg = MI.getOperand(0).getReg(); - if(MRI.getType(DstReg).isVector()) - MIRBuilder.buildBuildVector(DstReg, DstRegs); + + if (ZExt) + MIRBuilder.buildZExt(DstReg, TmpReg); else - MIRBuilder.buildMerge(DstReg, DstRegs); + MIRBuilder.buildSExt(DstReg, TmpReg); + MI.eraseFromParent(); return Legalized; } case TargetOpcode::G_STORE: { - // FIXME: add support for when SizeOp0 isn't an exact multiple of - // NarrowSize. - if (SizeOp0 % NarrowSize != 0) - return UnableToLegalize; - const auto &MMO = **MI.memoperands_begin(); - // This implementation doesn't work for atomics. Give up instead of doing - // something invalid. - if (MMO.getOrdering() != AtomicOrdering::NotAtomic || - MMO.getFailureOrdering() != AtomicOrdering::NotAtomic) + + Register SrcReg = MI.getOperand(0).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + if (SrcTy.isVector()) return UnableToLegalize; int NumParts = SizeOp0 / NarrowSize; - LLT OffsetTy = LLT::scalar( - MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); - - SmallVector<unsigned, 2> SrcRegs; - extractParts(MI.getOperand(0).getReg(), NarrowTy, NumParts, SrcRegs); - - for (int i = 0; i < NumParts; ++i) { - unsigned DstReg = 0; - unsigned Adjustment = i * NarrowSize / 8; - unsigned Alignment = MinAlign(MMO.getAlignment(), Adjustment); - - MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand( - MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(), - NarrowSize / 8, Alignment, MMO.getAAInfo(), MMO.getRanges(), - MMO.getSyncScopeID(), MMO.getOrdering(), MMO.getFailureOrdering()); - - MIRBuilder.materializeGEP(DstReg, MI.getOperand(1).getReg(), OffsetTy, - Adjustment); - - MIRBuilder.buildStore(SrcRegs[i], DstReg, *SplitMMO); - } - MI.eraseFromParent(); - return Legalized; - } - case TargetOpcode::G_CONSTANT: { - // FIXME: add support for when SizeOp0 isn't an exact multiple of - // NarrowSize. - if (SizeOp0 % NarrowSize != 0) + unsigned HandledSize = NumParts * NarrowTy.getSizeInBits(); + unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize; + if (SrcTy.isVector() && LeftoverBits != 0) return UnableToLegalize; - int NumParts = SizeOp0 / NarrowSize; - const APInt &Cst = MI.getOperand(1).getCImm()->getValue(); - LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); - SmallVector<unsigned, 2> DstRegs; - for (int i = 0; i < NumParts; ++i) { - unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); - ConstantInt *CI = - ConstantInt::get(Ctx, Cst.lshr(NarrowSize * i).trunc(NarrowSize)); - MIRBuilder.buildConstant(DstReg, *CI); - DstRegs.push_back(DstReg); + if (8 * MMO.getSize() != SrcTy.getSizeInBits()) { + Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); + auto &MMO = **MI.memoperands_begin(); + MIRBuilder.buildTrunc(TmpReg, SrcReg); + MIRBuilder.buildStore(TmpReg, MI.getOperand(1).getReg(), MMO); + MI.eraseFromParent(); + return Legalized; } - unsigned DstReg = MI.getOperand(0).getReg(); - if(MRI.getType(DstReg).isVector()) - MIRBuilder.buildBuildVector(DstReg, DstRegs); - else - MIRBuilder.buildMerge(DstReg, DstRegs); - MI.eraseFromParent(); - return Legalized; + + return reduceLoadStoreWidth(MI, 0, NarrowTy); } + case TargetOpcode::G_SELECT: + return narrowScalarSelect(MI, TypeIdx, NarrowTy); case TargetOpcode::G_AND: case TargetOpcode::G_OR: case TargetOpcode::G_XOR: { @@ -592,44 +674,112 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, // ... // AN = BinOp<Ty/N> BN, CN // A = G_MERGE_VALUES A1, ..., AN + return narrowScalarBasic(MI, TypeIdx, NarrowTy); + } + case TargetOpcode::G_SHL: + case TargetOpcode::G_LSHR: + case TargetOpcode::G_ASHR: + return narrowScalarShift(MI, TypeIdx, NarrowTy); + case TargetOpcode::G_CTLZ: + case TargetOpcode::G_CTLZ_ZERO_UNDEF: + case TargetOpcode::G_CTTZ: + case TargetOpcode::G_CTTZ_ZERO_UNDEF: + case TargetOpcode::G_CTPOP: + if (TypeIdx != 0) + return UnableToLegalize; // TODO - // FIXME: add support for when SizeOp0 isn't an exact multiple of - // NarrowSize. - if (SizeOp0 % NarrowSize != 0) + Observer.changingInstr(MI); + narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT); + Observer.changedInstr(MI); + return Legalized; + case TargetOpcode::G_INTTOPTR: + if (TypeIdx != 1) return UnableToLegalize; - int NumParts = SizeOp0 / NarrowSize; - // List the registers where the destination will be scattered. - SmallVector<unsigned, 2> DstRegs; - // List the registers where the first argument will be split. - SmallVector<unsigned, 2> SrcsReg1; - // List the registers where the second argument will be split. - SmallVector<unsigned, 2> SrcsReg2; - // Create all the temporary registers. - for (int i = 0; i < NumParts; ++i) { - unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); - unsigned SrcReg1 = MRI.createGenericVirtualRegister(NarrowTy); - unsigned SrcReg2 = MRI.createGenericVirtualRegister(NarrowTy); + Observer.changingInstr(MI); + narrowScalarSrc(MI, NarrowTy, 1); + Observer.changedInstr(MI); + return Legalized; + case TargetOpcode::G_PTRTOINT: + if (TypeIdx != 0) + return UnableToLegalize; - DstRegs.push_back(DstReg); - SrcsReg1.push_back(SrcReg1); - SrcsReg2.push_back(SrcReg2); + Observer.changingInstr(MI); + narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT); + Observer.changedInstr(MI); + return Legalized; + case TargetOpcode::G_PHI: { + unsigned NumParts = SizeOp0 / NarrowSize; + SmallVector<Register, 2> DstRegs; + SmallVector<SmallVector<Register, 2>, 2> SrcRegs; + DstRegs.resize(NumParts); + SrcRegs.resize(MI.getNumOperands() / 2); + Observer.changingInstr(MI); + for (unsigned i = 1; i < MI.getNumOperands(); i += 2) { + MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB(); + MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); + extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts, + SrcRegs[i / 2]); } - // Explode the big arguments into smaller chunks. - MIRBuilder.buildUnmerge(SrcsReg1, MI.getOperand(1).getReg()); - MIRBuilder.buildUnmerge(SrcsReg2, MI.getOperand(2).getReg()); + MachineBasicBlock &MBB = *MI.getParent(); + MIRBuilder.setInsertPt(MBB, MI); + for (unsigned i = 0; i < NumParts; ++i) { + DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy); + MachineInstrBuilder MIB = + MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]); + for (unsigned j = 1; j < MI.getNumOperands(); j += 2) + MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1)); + } + MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI()); + MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); + Observer.changedInstr(MI); + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_EXTRACT_VECTOR_ELT: + case TargetOpcode::G_INSERT_VECTOR_ELT: { + if (TypeIdx != 2) + return UnableToLegalize; - // Do the operation on each small part. - for (int i = 0; i < NumParts; ++i) - MIRBuilder.buildInstr(MI.getOpcode(), {DstRegs[i]}, - {SrcsReg1[i], SrcsReg2[i]}); + int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3; + Observer.changingInstr(MI); + narrowScalarSrc(MI, NarrowTy, OpIdx); + Observer.changedInstr(MI); + return Legalized; + } + case TargetOpcode::G_ICMP: { + uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); + if (NarrowSize * 2 != SrcSize) + return UnableToLegalize; - // Gather the destination registers into the final destination. - unsigned DstReg = MI.getOperand(0).getReg(); - if(MRI.getType(DstReg).isVector()) - MIRBuilder.buildBuildVector(DstReg, DstRegs); - else - MIRBuilder.buildMerge(DstReg, DstRegs); + Observer.changingInstr(MI); + Register LHSL = MRI.createGenericVirtualRegister(NarrowTy); + Register LHSH = MRI.createGenericVirtualRegister(NarrowTy); + MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2).getReg()); + + Register RHSL = MRI.createGenericVirtualRegister(NarrowTy); + Register RHSH = MRI.createGenericVirtualRegister(NarrowTy); + MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3).getReg()); + + CmpInst::Predicate Pred = + static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); + + if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) { + MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL); + MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH); + MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH); + MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0); + MIRBuilder.buildICmp(Pred, MI.getOperand(0).getReg(), Or, Zero); + } else { + const LLT s1 = LLT::scalar(1); + MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, s1, LHSH, RHSH); + MachineInstrBuilder CmpHEQ = + MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, s1, LHSH, RHSH); + MachineInstrBuilder CmpLU = MIRBuilder.buildICmp( + ICmpInst::getUnsignedPredicate(Pred), s1, LHSL, RHSL); + MIRBuilder.buildSelect(MI.getOperand(0).getReg(), CmpHEQ, CmpLU, CmpH); + } + Observer.changedInstr(MI); MI.eraseFromParent(); return Legalized; } @@ -643,15 +793,322 @@ void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy, MO.setReg(ExtB->getOperand(0).getReg()); } +void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, + unsigned OpIdx) { + MachineOperand &MO = MI.getOperand(OpIdx); + auto ExtB = MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {NarrowTy}, + {MO.getReg()}); + MO.setReg(ExtB->getOperand(0).getReg()); +} + void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned TruncOpcode) { MachineOperand &MO = MI.getOperand(OpIdx); - unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); + Register DstExt = MRI.createGenericVirtualRegister(WideTy); MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt}); MO.setReg(DstExt); } +void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy, + unsigned OpIdx, unsigned ExtOpcode) { + MachineOperand &MO = MI.getOperand(OpIdx); + Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy); + MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); + MIRBuilder.buildInstr(ExtOpcode, {MO.getReg()}, {DstTrunc}); + MO.setReg(DstTrunc); +} + +void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy, + unsigned OpIdx) { + MachineOperand &MO = MI.getOperand(OpIdx); + Register DstExt = MRI.createGenericVirtualRegister(WideTy); + MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); + MIRBuilder.buildExtract(MO.getReg(), DstExt, 0); + MO.setReg(DstExt); +} + +void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, + unsigned OpIdx) { + MachineOperand &MO = MI.getOperand(OpIdx); + + LLT OldTy = MRI.getType(MO.getReg()); + unsigned OldElts = OldTy.getNumElements(); + unsigned NewElts = MoreTy.getNumElements(); + + unsigned NumParts = NewElts / OldElts; + + // Use concat_vectors if the result is a multiple of the number of elements. + if (NumParts * OldElts == NewElts) { + SmallVector<Register, 8> Parts; + Parts.push_back(MO.getReg()); + + Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0); + for (unsigned I = 1; I != NumParts; ++I) + Parts.push_back(ImpDef); + + auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts); + MO.setReg(Concat.getReg(0)); + return; + } + + Register MoreReg = MRI.createGenericVirtualRegister(MoreTy); + Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0); + MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0); + MO.setReg(MoreReg); +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, + LLT WideTy) { + if (TypeIdx != 1) + return UnableToLegalize; + + Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + if (DstTy.isVector()) + return UnableToLegalize; + + Register Src1 = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(Src1); + const int DstSize = DstTy.getSizeInBits(); + const int SrcSize = SrcTy.getSizeInBits(); + const int WideSize = WideTy.getSizeInBits(); + const int NumMerge = (DstSize + WideSize - 1) / WideSize; + + unsigned NumOps = MI.getNumOperands(); + unsigned NumSrc = MI.getNumOperands() - 1; + unsigned PartSize = DstTy.getSizeInBits() / NumSrc; + + if (WideSize >= DstSize) { + // Directly pack the bits in the target type. + Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0); + + for (unsigned I = 2; I != NumOps; ++I) { + const unsigned Offset = (I - 1) * PartSize; + + Register SrcReg = MI.getOperand(I).getReg(); + assert(MRI.getType(SrcReg) == LLT::scalar(PartSize)); + + auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg); + + Register NextResult = I + 1 == NumOps && WideSize == DstSize ? DstReg : + MRI.createGenericVirtualRegister(WideTy); + + auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset); + auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt); + MIRBuilder.buildOr(NextResult, ResultReg, Shl); + ResultReg = NextResult; + } + + if (WideSize > DstSize) + MIRBuilder.buildTrunc(DstReg, ResultReg); + + MI.eraseFromParent(); + return Legalized; + } + + // Unmerge the original values to the GCD type, and recombine to the next + // multiple greater than the original type. + // + // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6 + // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0 + // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1 + // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2 + // %10:_(s6) = G_MERGE_VALUES %4, %5, %6 + // %11:_(s6) = G_MERGE_VALUES %7, %8, %9 + // %12:_(s12) = G_MERGE_VALUES %10, %11 + // + // Padding with undef if necessary: + // + // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6 + // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0 + // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1 + // %7:_(s2) = G_IMPLICIT_DEF + // %8:_(s6) = G_MERGE_VALUES %3, %4, %5 + // %9:_(s6) = G_MERGE_VALUES %6, %7, %7 + // %10:_(s12) = G_MERGE_VALUES %8, %9 + + const int GCD = greatestCommonDivisor(SrcSize, WideSize); + LLT GCDTy = LLT::scalar(GCD); + + SmallVector<Register, 8> Parts; + SmallVector<Register, 8> NewMergeRegs; + SmallVector<Register, 8> Unmerges; + LLT WideDstTy = LLT::scalar(NumMerge * WideSize); + + // Decompose the original operands if they don't evenly divide. + for (int I = 1, E = MI.getNumOperands(); I != E; ++I) { + Register SrcReg = MI.getOperand(I).getReg(); + if (GCD == SrcSize) { + Unmerges.push_back(SrcReg); + } else { + auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg); + for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J) + Unmerges.push_back(Unmerge.getReg(J)); + } + } + + // Pad with undef to the next size that is a multiple of the requested size. + if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) { + Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0); + for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I) + Unmerges.push_back(UndefReg); + } + + const int PartsPerGCD = WideSize / GCD; + + // Build merges of each piece. + ArrayRef<Register> Slicer(Unmerges); + for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) { + auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD)); + NewMergeRegs.push_back(Merge.getReg(0)); + } + + // A truncate may be necessary if the requested type doesn't evenly divide the + // original result type. + if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) { + MIRBuilder.buildMerge(DstReg, NewMergeRegs); + } else { + auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs); + MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0)); + } + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx, + LLT WideTy) { + if (TypeIdx != 0) + return UnableToLegalize; + + unsigned NumDst = MI.getNumOperands() - 1; + Register SrcReg = MI.getOperand(NumDst).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + if (!SrcTy.isScalar()) + return UnableToLegalize; + + Register Dst0Reg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(Dst0Reg); + if (!DstTy.isScalar()) + return UnableToLegalize; + + unsigned NewSrcSize = NumDst * WideTy.getSizeInBits(); + LLT NewSrcTy = LLT::scalar(NewSrcSize); + unsigned SizeDiff = WideTy.getSizeInBits() - DstTy.getSizeInBits(); + + auto WideSrc = MIRBuilder.buildZExt(NewSrcTy, SrcReg); + + for (unsigned I = 1; I != NumDst; ++I) { + auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, SizeDiff * I); + auto Shl = MIRBuilder.buildShl(NewSrcTy, WideSrc, ShiftAmt); + WideSrc = MIRBuilder.buildOr(NewSrcTy, WideSrc, Shl); + } + + Observer.changingInstr(MI); + + MI.getOperand(NumDst).setReg(WideSrc->getOperand(0).getReg()); + for (unsigned I = 0; I != NumDst; ++I) + widenScalarDst(MI, WideTy, I); + + Observer.changedInstr(MI); + + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx, + LLT WideTy) { + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + + LLT DstTy = MRI.getType(DstReg); + unsigned Offset = MI.getOperand(2).getImm(); + + if (TypeIdx == 0) { + if (SrcTy.isVector() || DstTy.isVector()) + return UnableToLegalize; + + SrcOp Src(SrcReg); + if (SrcTy.isPointer()) { + // Extracts from pointers can be handled only if they are really just + // simple integers. + const DataLayout &DL = MIRBuilder.getDataLayout(); + if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) + return UnableToLegalize; + + LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits()); + Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src); + SrcTy = SrcAsIntTy; + } + + if (DstTy.isPointer()) + return UnableToLegalize; + + if (Offset == 0) { + // Avoid a shift in the degenerate case. + MIRBuilder.buildTrunc(DstReg, + MIRBuilder.buildAnyExtOrTrunc(WideTy, Src)); + MI.eraseFromParent(); + return Legalized; + } + + // Do a shift in the source type. + LLT ShiftTy = SrcTy; + if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) { + Src = MIRBuilder.buildAnyExt(WideTy, Src); + ShiftTy = WideTy; + } else if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) + return UnableToLegalize; + + auto LShr = MIRBuilder.buildLShr( + ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset)); + MIRBuilder.buildTrunc(DstReg, LShr); + MI.eraseFromParent(); + return Legalized; + } + + if (SrcTy.isScalar()) { + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); + Observer.changedInstr(MI); + return Legalized; + } + + if (!SrcTy.isVector()) + return UnableToLegalize; + + if (DstTy != SrcTy.getElementType()) + return UnableToLegalize; + + if (Offset % SrcTy.getScalarSizeInBits() != 0) + return UnableToLegalize; + + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); + + MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) * + Offset); + widenScalarDst(MI, WideTy.getScalarType(), 0); + Observer.changedInstr(MI); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx, + LLT WideTy) { + if (TypeIdx != 0) + return UnableToLegalize; + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); + widenScalarDst(MI, WideTy); + Observer.changedInstr(MI); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { MIRBuilder.setInstr(MI); @@ -659,6 +1116,14 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { switch (MI.getOpcode()) { default: return UnableToLegalize; + case TargetOpcode::G_EXTRACT: + return widenScalarExtract(MI, TypeIdx, WideTy); + case TargetOpcode::G_INSERT: + return widenScalarInsert(MI, TypeIdx, WideTy); + case TargetOpcode::G_MERGE_VALUES: + return widenScalarMergeValues(MI, TypeIdx, WideTy); + case TargetOpcode::G_UNMERGE_VALUES: + return widenScalarUnmergeValues(MI, TypeIdx, WideTy); case TargetOpcode::G_UADDO: case TargetOpcode::G_USUBO: { if (TypeIdx == 1) @@ -690,19 +1155,28 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { case TargetOpcode::G_CTLZ: case TargetOpcode::G_CTLZ_ZERO_UNDEF: case TargetOpcode::G_CTPOP: { + if (TypeIdx == 0) { + Observer.changingInstr(MI); + widenScalarDst(MI, WideTy, 0); + Observer.changedInstr(MI); + return Legalized; + } + + Register SrcReg = MI.getOperand(1).getReg(); + // First ZEXT the input. - auto MIBSrc = MIRBuilder.buildZExt(WideTy, MI.getOperand(1).getReg()); - LLT CurTy = MRI.getType(MI.getOperand(0).getReg()); + auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg); + LLT CurTy = MRI.getType(SrcReg); if (MI.getOpcode() == TargetOpcode::G_CTTZ) { // The count is the same in the larger type except if the original // value was zero. This can be handled by setting the bit just off // the top of the original type. auto TopBit = APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits()); - MIBSrc = MIRBuilder.buildInstr( - TargetOpcode::G_OR, {WideTy}, - {MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit.getSExtValue())}); + MIBSrc = MIRBuilder.buildOr( + WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit)); } + // Perform the operation at the larger size. auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc}); // This is already the correct result for CTPOP and CTTZs @@ -714,22 +1188,43 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { TargetOpcode::G_SUB, {WideTy}, {MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)}); } - auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); - // Make the original instruction a trunc now, and update its source. + + MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp); + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_BSWAP: { Observer.changingInstr(MI); - MI.setDesc(TII.get(TargetOpcode::G_TRUNC)); - MI.getOperand(1).setReg(MIBNewOp->getOperand(0).getReg()); + Register DstReg = MI.getOperand(0).getReg(); + + Register ShrReg = MRI.createGenericVirtualRegister(WideTy); + Register DstExt = MRI.createGenericVirtualRegister(WideTy); + Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); + + MI.getOperand(0).setReg(DstExt); + + MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); + + LLT Ty = MRI.getType(DstReg); + unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits(); + MIRBuilder.buildConstant(ShiftAmtReg, DiffBits); + MIRBuilder.buildInstr(TargetOpcode::G_LSHR) + .addDef(ShrReg) + .addUse(DstExt) + .addUse(ShiftAmtReg); + + MIRBuilder.buildTrunc(DstReg, ShrReg); Observer.changedInstr(MI); return Legalized; } - case TargetOpcode::G_ADD: case TargetOpcode::G_AND: case TargetOpcode::G_MUL: case TargetOpcode::G_OR: case TargetOpcode::G_XOR: case TargetOpcode::G_SUB: - // Perform operation at larger width (any extension is fine here, high bits + // Perform operation at larger width (any extension is fines here, high bits // don't affect the result) and then truncate the result back to the // original type. Observer.changingInstr(MI); @@ -741,16 +1236,24 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { case TargetOpcode::G_SHL: Observer.changingInstr(MI); - widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); - // The "number of bits to shift" operand must preserve its value as an - // unsigned integer: - widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); - widenScalarDst(MI, WideTy); + + if (TypeIdx == 0) { + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); + widenScalarDst(MI, WideTy); + } else { + assert(TypeIdx == 1); + // The "number of bits to shift" operand must preserve its value as an + // unsigned integer: + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); + } + Observer.changedInstr(MI); return Legalized; case TargetOpcode::G_SDIV: case TargetOpcode::G_SREM: + case TargetOpcode::G_SMIN: + case TargetOpcode::G_SMAX: Observer.changingInstr(MI); widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); @@ -759,18 +1262,28 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { return Legalized; case TargetOpcode::G_ASHR: + case TargetOpcode::G_LSHR: Observer.changingInstr(MI); - widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); - // The "number of bits to shift" operand must preserve its value as an - // unsigned integer: - widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); - widenScalarDst(MI, WideTy); + + if (TypeIdx == 0) { + unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ? + TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; + + widenScalarSrc(MI, WideTy, 1, CvtOp); + widenScalarDst(MI, WideTy); + } else { + assert(TypeIdx == 1); + // The "number of bits to shift" operand must preserve its value as an + // unsigned integer: + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); + } + Observer.changedInstr(MI); return Legalized; - case TargetOpcode::G_UDIV: case TargetOpcode::G_UREM: - case TargetOpcode::G_LSHR: + case TargetOpcode::G_UMIN: + case TargetOpcode::G_UMAX: Observer.changingInstr(MI); widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); @@ -788,8 +1301,9 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT); widenScalarDst(MI, WideTy); } else { + bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector(); // Explicit extension is required here since high bits affect the result. - widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); + widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false)); } Observer.changedInstr(MI); return Legalized; @@ -819,23 +1333,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { Observer.changedInstr(MI); return Legalized; - case TargetOpcode::G_INSERT: - if (TypeIdx != 0) - return UnableToLegalize; - Observer.changingInstr(MI); - widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); - widenScalarDst(MI, WideTy); - Observer.changedInstr(MI); - return Legalized; - case TargetOpcode::G_LOAD: - // For some types like i24, we might try to widen to i32. To properly handle - // this we should be using a dedicated extending load, until then avoid - // trying to legalize. - if (alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) != - WideTy.getSizeInBits()) - return UnableToLegalize; - LLVM_FALLTHROUGH; case TargetOpcode::G_SEXTLOAD: case TargetOpcode::G_ZEXTLOAD: Observer.changingInstr(MI); @@ -844,12 +1342,19 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { return Legalized; case TargetOpcode::G_STORE: { - if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(1) || - WideTy != LLT::scalar(8)) + if (TypeIdx != 0) + return UnableToLegalize; + + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + if (!isPowerOf2_32(Ty.getSizeInBits())) return UnableToLegalize; Observer.changingInstr(MI); - widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ZEXT); + + unsigned ExtType = Ty.getScalarSizeInBits() == 1 ? + TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT; + widenScalarSrc(MI, WideTy, 0, ExtType); + Observer.changedInstr(MI); return Legalized; } @@ -871,14 +1376,19 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { bool LosesInfo; switch (WideTy.getSizeInBits()) { case 32: - Val.convert(APFloat::IEEEsingle(), APFloat::rmTowardZero, &LosesInfo); + Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, + &LosesInfo); break; case 64: - Val.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &LosesInfo); + Val.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, + &LosesInfo); break; default: - llvm_unreachable("Unhandled fp widen type"); + return UnableToLegalize; } + + assert(!LosesInfo && "extend should always be lossless"); + Observer.changingInstr(MI); SrcMO.setFPImm(ConstantFP::get(Ctx, Val)); @@ -894,7 +1404,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { } case TargetOpcode::G_BRCOND: Observer.changingInstr(MI); - widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT); + widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false)); Observer.changedInstr(MI); return Legalized; @@ -947,23 +1457,103 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { Observer.changedInstr(MI); return Legalized; } - case TargetOpcode::G_EXTRACT_VECTOR_ELT: + case TargetOpcode::G_EXTRACT_VECTOR_ELT: { + if (TypeIdx == 0) { + Register VecReg = MI.getOperand(1).getReg(); + LLT VecTy = MRI.getType(VecReg); + Observer.changingInstr(MI); + + widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(), + WideTy.getSizeInBits()), + 1, TargetOpcode::G_SEXT); + + widenScalarDst(MI, WideTy, 0); + Observer.changedInstr(MI); + return Legalized; + } + if (TypeIdx != 2) return UnableToLegalize; Observer.changingInstr(MI); widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); Observer.changedInstr(MI); return Legalized; - + } + case TargetOpcode::G_FADD: + case TargetOpcode::G_FMUL: + case TargetOpcode::G_FSUB: + case TargetOpcode::G_FMA: + case TargetOpcode::G_FNEG: + case TargetOpcode::G_FABS: + case TargetOpcode::G_FCANONICALIZE: + case TargetOpcode::G_FMINNUM: + case TargetOpcode::G_FMAXNUM: + case TargetOpcode::G_FMINNUM_IEEE: + case TargetOpcode::G_FMAXNUM_IEEE: + case TargetOpcode::G_FMINIMUM: + case TargetOpcode::G_FMAXIMUM: + case TargetOpcode::G_FDIV: + case TargetOpcode::G_FREM: case TargetOpcode::G_FCEIL: + case TargetOpcode::G_FFLOOR: + case TargetOpcode::G_FCOS: + case TargetOpcode::G_FSIN: + case TargetOpcode::G_FLOG10: + case TargetOpcode::G_FLOG: + case TargetOpcode::G_FLOG2: + case TargetOpcode::G_FRINT: + case TargetOpcode::G_FNEARBYINT: + case TargetOpcode::G_FSQRT: + case TargetOpcode::G_FEXP: + case TargetOpcode::G_FEXP2: + case TargetOpcode::G_FPOW: + case TargetOpcode::G_INTRINSIC_TRUNC: + case TargetOpcode::G_INTRINSIC_ROUND: + assert(TypeIdx == 0); + Observer.changingInstr(MI); + + for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) + widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT); + + widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); + Observer.changedInstr(MI); + return Legalized; + case TargetOpcode::G_INTTOPTR: + if (TypeIdx != 1) + return UnableToLegalize; + + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); + Observer.changedInstr(MI); + return Legalized; + case TargetOpcode::G_PTRTOINT: if (TypeIdx != 0) return UnableToLegalize; + Observer.changingInstr(MI); - widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT); - widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); + widenScalarDst(MI, WideTy, 0); + Observer.changedInstr(MI); + return Legalized; + case TargetOpcode::G_BUILD_VECTOR: { + Observer.changingInstr(MI); + + const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType(); + for (int I = 1, E = MI.getNumOperands(); I != E; ++I) + widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT); + + // Avoid changing the result vector type if the source element type was + // requested. + if (TypeIdx == 1) { + auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); + MI.setDesc(TII.get(TargetOpcode::G_BUILD_VECTOR_TRUNC)); + } else { + widenScalarDst(MI, WideTy, 0); + } + Observer.changedInstr(MI); return Legalized; } + } } LegalizerHelper::LegalizeResult @@ -976,13 +1566,13 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return UnableToLegalize; case TargetOpcode::G_SREM: case TargetOpcode::G_UREM: { - unsigned QuotReg = MRI.createGenericVirtualRegister(Ty); + Register QuotReg = MRI.createGenericVirtualRegister(Ty); MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV) .addDef(QuotReg) .addUse(MI.getOperand(1).getReg()) .addUse(MI.getOperand(2).getReg()); - unsigned ProdReg = MRI.createGenericVirtualRegister(Ty); + Register ProdReg = MRI.createGenericVirtualRegister(Ty); MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg()); MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), ProdReg); @@ -993,10 +1583,10 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { case TargetOpcode::G_UMULO: { // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the // result. - unsigned Res = MI.getOperand(0).getReg(); - unsigned Overflow = MI.getOperand(1).getReg(); - unsigned LHS = MI.getOperand(2).getReg(); - unsigned RHS = MI.getOperand(3).getReg(); + Register Res = MI.getOperand(0).getReg(); + Register Overflow = MI.getOperand(1).getReg(); + Register LHS = MI.getOperand(2).getReg(); + Register RHS = MI.getOperand(3).getReg(); MIRBuilder.buildMul(Res, LHS, RHS); @@ -1004,20 +1594,20 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { ? TargetOpcode::G_SMULH : TargetOpcode::G_UMULH; - unsigned HiPart = MRI.createGenericVirtualRegister(Ty); + Register HiPart = MRI.createGenericVirtualRegister(Ty); MIRBuilder.buildInstr(Opcode) .addDef(HiPart) .addUse(LHS) .addUse(RHS); - unsigned Zero = MRI.createGenericVirtualRegister(Ty); + Register Zero = MRI.createGenericVirtualRegister(Ty); MIRBuilder.buildConstant(Zero, 0); // For *signed* multiply, overflow is detected by checking: // (hi != (lo >> bitwidth-1)) if (Opcode == TargetOpcode::G_SMULH) { - unsigned Shifted = MRI.createGenericVirtualRegister(Ty); - unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty); + Register Shifted = MRI.createGenericVirtualRegister(Ty); + Register ShiftAmt = MRI.createGenericVirtualRegister(Ty); MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1); MIRBuilder.buildInstr(TargetOpcode::G_ASHR) .addDef(Shifted) @@ -1035,7 +1625,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { // represent them. if (Ty.isVector()) return UnableToLegalize; - unsigned Res = MI.getOperand(0).getReg(); + Register Res = MI.getOperand(0).getReg(); Type *ZeroTy; LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); switch (Ty.getSizeInBits()) { @@ -1057,10 +1647,10 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { ConstantFP &ZeroForNegation = *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy)); auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation); - MIRBuilder.buildInstr(TargetOpcode::G_FSUB) - .addDef(Res) - .addUse(Zero->getOperand(0).getReg()) - .addUse(MI.getOperand(1).getReg()); + Register SubByReg = MI.getOperand(1).getReg(); + Register ZeroReg = Zero->getOperand(0).getReg(); + MIRBuilder.buildInstr(TargetOpcode::G_FSUB, {Res}, {ZeroReg, SubByReg}, + MI.getFlags()); MI.eraseFromParent(); return Legalized; } @@ -1070,24 +1660,21 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { // end up with an infinite loop as G_FSUB is used to legalize G_FNEG. if (LI.getAction({G_FNEG, {Ty}}).Action == Lower) return UnableToLegalize; - unsigned Res = MI.getOperand(0).getReg(); - unsigned LHS = MI.getOperand(1).getReg(); - unsigned RHS = MI.getOperand(2).getReg(); - unsigned Neg = MRI.createGenericVirtualRegister(Ty); + Register Res = MI.getOperand(0).getReg(); + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + Register Neg = MRI.createGenericVirtualRegister(Ty); MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS); - MIRBuilder.buildInstr(TargetOpcode::G_FADD) - .addDef(Res) - .addUse(LHS) - .addUse(Neg); + MIRBuilder.buildInstr(TargetOpcode::G_FADD, {Res}, {LHS, Neg}, MI.getFlags()); MI.eraseFromParent(); return Legalized; } case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { - unsigned OldValRes = MI.getOperand(0).getReg(); - unsigned SuccessRes = MI.getOperand(1).getReg(); - unsigned Addr = MI.getOperand(2).getReg(); - unsigned CmpVal = MI.getOperand(3).getReg(); - unsigned NewVal = MI.getOperand(4).getReg(); + Register OldValRes = MI.getOperand(0).getReg(); + Register SuccessRes = MI.getOperand(1).getReg(); + Register Addr = MI.getOperand(2).getReg(); + Register CmpVal = MI.getOperand(3).getReg(); + Register NewVal = MI.getOperand(4).getReg(); MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal, **MI.memoperands_begin()); MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal); @@ -1098,8 +1685,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { case TargetOpcode::G_SEXTLOAD: case TargetOpcode::G_ZEXTLOAD: { // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned PtrReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); + Register PtrReg = MI.getOperand(1).getReg(); LLT DstTy = MRI.getType(DstReg); auto &MMO = **MI.memoperands_begin(); @@ -1114,8 +1701,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { } if (DstTy.isScalar()) { - unsigned TmpReg = MRI.createGenericVirtualRegister( - LLT::scalar(MMO.getSize() /* in bytes */ * 8)); + Register TmpReg = + MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits())); MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); switch (MI.getOpcode()) { default: @@ -1142,15 +1729,27 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { case TargetOpcode::G_CTTZ: case TargetOpcode::G_CTPOP: return lowerBitCount(MI, TypeIdx, Ty); + case G_UADDO: { + Register Res = MI.getOperand(0).getReg(); + Register CarryOut = MI.getOperand(1).getReg(); + Register LHS = MI.getOperand(2).getReg(); + Register RHS = MI.getOperand(3).getReg(); + + MIRBuilder.buildAdd(Res, LHS, RHS); + MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS); + + MI.eraseFromParent(); + return Legalized; + } case G_UADDE: { - unsigned Res = MI.getOperand(0).getReg(); - unsigned CarryOut = MI.getOperand(1).getReg(); - unsigned LHS = MI.getOperand(2).getReg(); - unsigned RHS = MI.getOperand(3).getReg(); - unsigned CarryIn = MI.getOperand(4).getReg(); + Register Res = MI.getOperand(0).getReg(); + Register CarryOut = MI.getOperand(1).getReg(); + Register LHS = MI.getOperand(2).getReg(); + Register RHS = MI.getOperand(3).getReg(); + Register CarryIn = MI.getOperand(4).getReg(); - unsigned TmpRes = MRI.createGenericVirtualRegister(Ty); - unsigned ZExtCarryIn = MRI.createGenericVirtualRegister(Ty); + Register TmpRes = MRI.createGenericVirtualRegister(Ty); + Register ZExtCarryIn = MRI.createGenericVirtualRegister(Ty); MIRBuilder.buildAdd(TmpRes, LHS, RHS); MIRBuilder.buildZExt(ZExtCarryIn, CarryIn); @@ -1160,113 +1759,1325 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { MI.eraseFromParent(); return Legalized; } + case G_USUBO: { + Register Res = MI.getOperand(0).getReg(); + Register BorrowOut = MI.getOperand(1).getReg(); + Register LHS = MI.getOperand(2).getReg(); + Register RHS = MI.getOperand(3).getReg(); + + MIRBuilder.buildSub(Res, LHS, RHS); + MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS); + + MI.eraseFromParent(); + return Legalized; + } + case G_USUBE: { + Register Res = MI.getOperand(0).getReg(); + Register BorrowOut = MI.getOperand(1).getReg(); + Register LHS = MI.getOperand(2).getReg(); + Register RHS = MI.getOperand(3).getReg(); + Register BorrowIn = MI.getOperand(4).getReg(); + + Register TmpRes = MRI.createGenericVirtualRegister(Ty); + Register ZExtBorrowIn = MRI.createGenericVirtualRegister(Ty); + Register LHS_EQ_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1)); + Register LHS_ULT_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1)); + + MIRBuilder.buildSub(TmpRes, LHS, RHS); + MIRBuilder.buildZExt(ZExtBorrowIn, BorrowIn); + MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn); + MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LHS_EQ_RHS, LHS, RHS); + MIRBuilder.buildICmp(CmpInst::ICMP_ULT, LHS_ULT_RHS, LHS, RHS); + MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS); + + MI.eraseFromParent(); + return Legalized; + } + case G_UITOFP: + return lowerUITOFP(MI, TypeIdx, Ty); + case G_SITOFP: + return lowerSITOFP(MI, TypeIdx, Ty); + case G_SMIN: + case G_SMAX: + case G_UMIN: + case G_UMAX: + return lowerMinMax(MI, TypeIdx, Ty); + case G_FCOPYSIGN: + return lowerFCopySign(MI, TypeIdx, Ty); + case G_FMINNUM: + case G_FMAXNUM: + return lowerFMinNumMaxNum(MI); } } +LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef( + MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { + SmallVector<Register, 2> DstRegs; + + unsigned NarrowSize = NarrowTy.getSizeInBits(); + Register DstReg = MI.getOperand(0).getReg(); + unsigned Size = MRI.getType(DstReg).getSizeInBits(); + int NumParts = Size / NarrowSize; + // FIXME: Don't know how to handle the situation where the small vectors + // aren't all the same size yet. + if (Size % NarrowSize != 0) + return UnableToLegalize; + + for (int i = 0; i < NumParts; ++i) { + Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); + MIRBuilder.buildUndef(TmpReg); + DstRegs.push_back(TmpReg); + } + + if (NarrowTy.isVector()) + MIRBuilder.buildConcatVectors(DstReg, DstRegs); + else + MIRBuilder.buildBuildVector(DstReg, DstRegs); + + MI.eraseFromParent(); + return Legalized; +} + LegalizerHelper::LegalizeResult -LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy) { - // FIXME: Don't know how to handle secondary types yet. +LegalizerHelper::fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + const unsigned Opc = MI.getOpcode(); + const unsigned NumOps = MI.getNumOperands() - 1; + const unsigned NarrowSize = NarrowTy.getSizeInBits(); + const Register DstReg = MI.getOperand(0).getReg(); + const unsigned Flags = MI.getFlags(); + const LLT DstTy = MRI.getType(DstReg); + const unsigned Size = DstTy.getSizeInBits(); + const int NumParts = Size / NarrowSize; + const LLT EltTy = DstTy.getElementType(); + const unsigned EltSize = EltTy.getSizeInBits(); + const unsigned BitsForNumParts = NarrowSize * NumParts; + + // Check if we have any leftovers. If we do, then only handle the case where + // the leftover is one element. + if (BitsForNumParts != Size && BitsForNumParts + EltSize != Size) + return UnableToLegalize; + + if (BitsForNumParts != Size) { + Register AccumDstReg = MRI.createGenericVirtualRegister(DstTy); + MIRBuilder.buildUndef(AccumDstReg); + + // Handle the pieces which evenly divide into the requested type with + // extract/op/insert sequence. + for (unsigned Offset = 0; Offset < BitsForNumParts; Offset += NarrowSize) { + SmallVector<SrcOp, 4> SrcOps; + for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { + Register PartOpReg = MRI.createGenericVirtualRegister(NarrowTy); + MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(), Offset); + SrcOps.push_back(PartOpReg); + } + + Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy); + MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags); + + Register PartInsertReg = MRI.createGenericVirtualRegister(DstTy); + MIRBuilder.buildInsert(PartInsertReg, AccumDstReg, PartDstReg, Offset); + AccumDstReg = PartInsertReg; + } + + // Handle the remaining element sized leftover piece. + SmallVector<SrcOp, 4> SrcOps; + for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { + Register PartOpReg = MRI.createGenericVirtualRegister(EltTy); + MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(), + BitsForNumParts); + SrcOps.push_back(PartOpReg); + } + + Register PartDstReg = MRI.createGenericVirtualRegister(EltTy); + MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags); + MIRBuilder.buildInsert(DstReg, AccumDstReg, PartDstReg, BitsForNumParts); + MI.eraseFromParent(); + + return Legalized; + } + + SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs; + + extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src0Regs); + + if (NumOps >= 2) + extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src1Regs); + + if (NumOps >= 3) + extractParts(MI.getOperand(3).getReg(), NarrowTy, NumParts, Src2Regs); + + for (int i = 0; i < NumParts; ++i) { + Register DstReg = MRI.createGenericVirtualRegister(NarrowTy); + + if (NumOps == 1) + MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i]}, Flags); + else if (NumOps == 2) { + MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i], Src1Regs[i]}, Flags); + } else if (NumOps == 3) { + MIRBuilder.buildInstr(Opc, {DstReg}, + {Src0Regs[i], Src1Regs[i], Src2Regs[i]}, Flags); + } + + DstRegs.push_back(DstReg); + } + + if (NarrowTy.isVector()) + MIRBuilder.buildConcatVectors(DstReg, DstRegs); + else + MIRBuilder.buildBuildVector(DstReg, DstRegs); + + MI.eraseFromParent(); + return Legalized; +} + +// Handle splitting vector operations which need to have the same number of +// elements in each type index, but each type index may have a different element +// type. +// +// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> -> +// <2 x s64> = G_SHL <2 x s64>, <2 x s32> +// <2 x s64> = G_SHL <2 x s64>, <2 x s32> +// +// Also handles some irregular breakdown cases, e.g. +// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> -> +// <2 x s64> = G_SHL <2 x s64>, <2 x s32> +// s64 = G_SHL s64, s32 +LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVectorMultiEltType( + MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) { if (TypeIdx != 0) return UnableToLegalize; - MIRBuilder.setInstr(MI); - switch (MI.getOpcode()) { - default: + const LLT NarrowTy0 = NarrowTyArg; + const unsigned NewNumElts = + NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1; + + const Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT LeftoverTy0; + + // All of the operands need to have the same number of elements, so if we can + // determine a type breakdown for the result type, we can for all of the + // source types. + int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first; + if (NumParts < 0) return UnableToLegalize; - case TargetOpcode::G_IMPLICIT_DEF: { - SmallVector<unsigned, 2> DstRegs; - unsigned NarrowSize = NarrowTy.getSizeInBits(); - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned Size = MRI.getType(DstReg).getSizeInBits(); - int NumParts = Size / NarrowSize; - // FIXME: Don't know how to handle the situation where the small vectors - // aren't all the same size yet. - if (Size % NarrowSize != 0) + SmallVector<MachineInstrBuilder, 4> NewInsts; + + SmallVector<Register, 4> DstRegs, LeftoverDstRegs; + SmallVector<Register, 4> PartRegs, LeftoverRegs; + + for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { + LLT LeftoverTy; + Register SrcReg = MI.getOperand(I).getReg(); + LLT SrcTyI = MRI.getType(SrcReg); + LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType()); + LLT LeftoverTyI; + + // Split this operand into the requested typed registers, and any leftover + // required to reproduce the original type. + if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs, + LeftoverRegs)) return UnableToLegalize; - for (int i = 0; i < NumParts; ++i) { - unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy); - MIRBuilder.buildUndef(TmpReg); - DstRegs.push_back(TmpReg); + if (I == 1) { + // For the first operand, create an instruction for each part and setup + // the result. + for (Register PartReg : PartRegs) { + Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0); + NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode()) + .addDef(PartDstReg) + .addUse(PartReg)); + DstRegs.push_back(PartDstReg); + } + + for (Register LeftoverReg : LeftoverRegs) { + Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0); + NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode()) + .addDef(PartDstReg) + .addUse(LeftoverReg)); + LeftoverDstRegs.push_back(PartDstReg); + } + } else { + assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size()); + + // Add the newly created operand splits to the existing instructions. The + // odd-sized pieces are ordered after the requested NarrowTyArg sized + // pieces. + unsigned InstCount = 0; + for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J) + NewInsts[InstCount++].addUse(PartRegs[J]); + for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J) + NewInsts[InstCount++].addUse(LeftoverRegs[J]); } - if (NarrowTy.isVector()) - MIRBuilder.buildConcatVectors(DstReg, DstRegs); - else - MIRBuilder.buildBuildVector(DstReg, DstRegs); + PartRegs.clear(); + LeftoverRegs.clear(); + } - MI.eraseFromParent(); - return Legalized; + // Insert the newly built operations and rebuild the result register. + for (auto &MIB : NewInsts) + MIRBuilder.insertInstr(MIB); + + insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + if (TypeIdx != 0) + return UnableToLegalize; + + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT SrcTy = MRI.getType(SrcReg); + + LLT NarrowTy0 = NarrowTy; + LLT NarrowTy1; + unsigned NumParts; + + if (NarrowTy.isVector()) { + // Uneven breakdown not handled. + NumParts = DstTy.getNumElements() / NarrowTy.getNumElements(); + if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements()) + return UnableToLegalize; + + NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits()); + } else { + NumParts = DstTy.getNumElements(); + NarrowTy1 = SrcTy.getElementType(); } - case TargetOpcode::G_ADD: { - unsigned NarrowSize = NarrowTy.getSizeInBits(); - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned Size = MRI.getType(DstReg).getSizeInBits(); - int NumParts = Size / NarrowSize; + + SmallVector<Register, 4> SrcRegs, DstRegs; + extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs); + + for (unsigned I = 0; I < NumParts; ++I) { + Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0); + MachineInstr *NewInst = MIRBuilder.buildInstr(MI.getOpcode()) + .addDef(DstReg) + .addUse(SrcRegs[I]); + + NewInst->setFlags(MI.getFlags()); + DstRegs.push_back(DstReg); + } + + if (NarrowTy.isVector()) + MIRBuilder.buildConcatVectors(DstReg, DstRegs); + else + MIRBuilder.buildBuildVector(DstReg, DstRegs); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + Register DstReg = MI.getOperand(0).getReg(); + Register Src0Reg = MI.getOperand(2).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT SrcTy = MRI.getType(Src0Reg); + + unsigned NumParts; + LLT NarrowTy0, NarrowTy1; + + if (TypeIdx == 0) { + unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; + unsigned OldElts = DstTy.getNumElements(); + + NarrowTy0 = NarrowTy; + NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements(); + NarrowTy1 = NarrowTy.isVector() ? + LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) : + SrcTy.getElementType(); + + } else { + unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; + unsigned OldElts = SrcTy.getNumElements(); + + NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : + NarrowTy.getNumElements(); + NarrowTy0 = LLT::vector(NarrowTy.getNumElements(), + DstTy.getScalarSizeInBits()); + NarrowTy1 = NarrowTy; + } + + // FIXME: Don't know how to handle the situation where the small vectors + // aren't all the same size yet. + if (NarrowTy1.isVector() && + NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements()) + return UnableToLegalize; + + CmpInst::Predicate Pred + = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); + + SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs; + extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs); + extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs); + + for (unsigned I = 0; I < NumParts; ++I) { + Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0); + DstRegs.push_back(DstReg); + + if (MI.getOpcode() == TargetOpcode::G_ICMP) + MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]); + else { + MachineInstr *NewCmp + = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]); + NewCmp->setFlags(MI.getFlags()); + } + } + + if (NarrowTy1.isVector()) + MIRBuilder.buildConcatVectors(DstReg, DstRegs); + else + MIRBuilder.buildBuildVector(DstReg, DstRegs); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + Register DstReg = MI.getOperand(0).getReg(); + Register CondReg = MI.getOperand(1).getReg(); + + unsigned NumParts = 0; + LLT NarrowTy0, NarrowTy1; + + LLT DstTy = MRI.getType(DstReg); + LLT CondTy = MRI.getType(CondReg); + unsigned Size = DstTy.getSizeInBits(); + + assert(TypeIdx == 0 || CondTy.isVector()); + + if (TypeIdx == 0) { + NarrowTy0 = NarrowTy; + NarrowTy1 = CondTy; + + unsigned NarrowSize = NarrowTy0.getSizeInBits(); // FIXME: Don't know how to handle the situation where the small vectors // aren't all the same size yet. if (Size % NarrowSize != 0) return UnableToLegalize; - SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; - extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); - extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); + NumParts = Size / NarrowSize; - for (int i = 0; i < NumParts; ++i) { - unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); - MIRBuilder.buildAdd(DstReg, Src1Regs[i], Src2Regs[i]); - DstRegs.push_back(DstReg); + // Need to break down the condition type + if (CondTy.isVector()) { + if (CondTy.getNumElements() == NumParts) + NarrowTy1 = CondTy.getElementType(); + else + NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts, + CondTy.getScalarSizeInBits()); + } + } else { + NumParts = CondTy.getNumElements(); + if (NarrowTy.isVector()) { + // TODO: Handle uneven breakdown. + if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements()) + return UnableToLegalize; + + return UnableToLegalize; + } else { + NarrowTy0 = DstTy.getElementType(); + NarrowTy1 = NarrowTy; } + } + + SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs; + if (CondTy.isVector()) + extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs); + + extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs); + extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs); + for (unsigned i = 0; i < NumParts; ++i) { + Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0); + MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg, + Src1Regs[i], Src2Regs[i]); + DstRegs.push_back(DstReg); + } + + if (NarrowTy0.isVector()) MIRBuilder.buildConcatVectors(DstReg, DstRegs); - MI.eraseFromParent(); - return Legalized; + else + MIRBuilder.buildBuildVector(DstReg, DstRegs); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + const Register DstReg = MI.getOperand(0).getReg(); + LLT PhiTy = MRI.getType(DstReg); + LLT LeftoverTy; + + // All of the operands need to have the same number of elements, so if we can + // determine a type breakdown for the result type, we can for all of the + // source types. + int NumParts, NumLeftover; + std::tie(NumParts, NumLeftover) + = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy); + if (NumParts < 0) + return UnableToLegalize; + + SmallVector<Register, 4> DstRegs, LeftoverDstRegs; + SmallVector<MachineInstrBuilder, 4> NewInsts; + + const int TotalNumParts = NumParts + NumLeftover; + + // Insert the new phis in the result block first. + for (int I = 0; I != TotalNumParts; ++I) { + LLT Ty = I < NumParts ? NarrowTy : LeftoverTy; + Register PartDstReg = MRI.createGenericVirtualRegister(Ty); + NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI) + .addDef(PartDstReg)); + if (I < NumParts) + DstRegs.push_back(PartDstReg); + else + LeftoverDstRegs.push_back(PartDstReg); } - case TargetOpcode::G_LOAD: - case TargetOpcode::G_STORE: { - bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD; - unsigned ValReg = MI.getOperand(0).getReg(); - unsigned AddrReg = MI.getOperand(1).getReg(); - unsigned NarrowSize = NarrowTy.getSizeInBits(); - unsigned Size = MRI.getType(ValReg).getSizeInBits(); - unsigned NumParts = Size / NarrowSize; - - SmallVector<unsigned, 8> NarrowRegs; - if (!IsLoad) - extractParts(ValReg, NarrowTy, NumParts, NarrowRegs); - - const LLT OffsetTy = - LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits()); - MachineFunction &MF = *MI.getMF(); - MachineMemOperand *MMO = *MI.memoperands_begin(); - for (unsigned Idx = 0; Idx < NumParts; ++Idx) { - unsigned Adjustment = Idx * NarrowTy.getSizeInBits() / 8; - unsigned Alignment = MinAlign(MMO->getAlignment(), Adjustment); - unsigned NewAddrReg = 0; - MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, Adjustment); - MachineMemOperand &NewMMO = *MF.getMachineMemOperand( - MMO->getPointerInfo().getWithOffset(Adjustment), MMO->getFlags(), - NarrowTy.getSizeInBits() / 8, Alignment); + + MachineBasicBlock *MBB = MI.getParent(); + MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI()); + insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs); + + SmallVector<Register, 4> PartRegs, LeftoverRegs; + + // Insert code to extract the incoming values in each predecessor block. + for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { + PartRegs.clear(); + LeftoverRegs.clear(); + + Register SrcReg = MI.getOperand(I).getReg(); + MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); + MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); + + LLT Unused; + if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs, + LeftoverRegs)) + return UnableToLegalize; + + // Add the newly created operand splits to the existing instructions. The + // odd-sized pieces are ordered after the requested NarrowTyArg sized + // pieces. + for (int J = 0; J != TotalNumParts; ++J) { + MachineInstrBuilder MIB = NewInsts[J]; + MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]); + MIB.addMBB(&OpMBB); + } + } + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + // FIXME: Don't know how to handle secondary types yet. + if (TypeIdx != 0) + return UnableToLegalize; + + MachineMemOperand *MMO = *MI.memoperands_begin(); + + // This implementation doesn't work for atomics. Give up instead of doing + // something invalid. + if (MMO->getOrdering() != AtomicOrdering::NotAtomic || + MMO->getFailureOrdering() != AtomicOrdering::NotAtomic) + return UnableToLegalize; + + bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD; + Register ValReg = MI.getOperand(0).getReg(); + Register AddrReg = MI.getOperand(1).getReg(); + LLT ValTy = MRI.getType(ValReg); + + int NumParts = -1; + int NumLeftover = -1; + LLT LeftoverTy; + SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs; + if (IsLoad) { + std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy); + } else { + if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs, + NarrowLeftoverRegs)) { + NumParts = NarrowRegs.size(); + NumLeftover = NarrowLeftoverRegs.size(); + } + } + + if (NumParts == -1) + return UnableToLegalize; + + const LLT OffsetTy = LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits()); + + unsigned TotalSize = ValTy.getSizeInBits(); + + // Split the load/store into PartTy sized pieces starting at Offset. If this + // is a load, return the new registers in ValRegs. For a store, each elements + // of ValRegs should be PartTy. Returns the next offset that needs to be + // handled. + auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs, + unsigned Offset) -> unsigned { + MachineFunction &MF = MIRBuilder.getMF(); + unsigned PartSize = PartTy.getSizeInBits(); + for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize; + Offset += PartSize, ++Idx) { + unsigned ByteSize = PartSize / 8; + unsigned ByteOffset = Offset / 8; + Register NewAddrReg; + + MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, ByteOffset); + + MachineMemOperand *NewMMO = + MF.getMachineMemOperand(MMO, ByteOffset, ByteSize); + if (IsLoad) { - unsigned Dst = MRI.createGenericVirtualRegister(NarrowTy); - NarrowRegs.push_back(Dst); - MIRBuilder.buildLoad(Dst, NewAddrReg, NewMMO); + Register Dst = MRI.createGenericVirtualRegister(PartTy); + ValRegs.push_back(Dst); + MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO); } else { - MIRBuilder.buildStore(NarrowRegs[Idx], NewAddrReg, NewMMO); + MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO); } } - if (IsLoad) { - if (NarrowTy.isVector()) - MIRBuilder.buildConcatVectors(ValReg, NarrowRegs); - else - MIRBuilder.buildBuildVector(ValReg, NarrowRegs); - } + + return Offset; + }; + + unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0); + + // Handle the rest of the register if this isn't an even type breakdown. + if (LeftoverTy.isValid()) + splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset); + + if (IsLoad) { + insertParts(ValReg, ValTy, NarrowTy, NarrowRegs, + LeftoverTy, NarrowLeftoverRegs); + } + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + using namespace TargetOpcode; + + MIRBuilder.setInstr(MI); + switch (MI.getOpcode()) { + case G_IMPLICIT_DEF: + return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy); + case G_AND: + case G_OR: + case G_XOR: + case G_ADD: + case G_SUB: + case G_MUL: + case G_SMULH: + case G_UMULH: + case G_FADD: + case G_FMUL: + case G_FSUB: + case G_FNEG: + case G_FABS: + case G_FCANONICALIZE: + case G_FDIV: + case G_FREM: + case G_FMA: + case G_FPOW: + case G_FEXP: + case G_FEXP2: + case G_FLOG: + case G_FLOG2: + case G_FLOG10: + case G_FNEARBYINT: + case G_FCEIL: + case G_FFLOOR: + case G_FRINT: + case G_INTRINSIC_ROUND: + case G_INTRINSIC_TRUNC: + case G_FCOS: + case G_FSIN: + case G_FSQRT: + case G_BSWAP: + case G_SDIV: + case G_SMIN: + case G_SMAX: + case G_UMIN: + case G_UMAX: + case G_FMINNUM: + case G_FMAXNUM: + case G_FMINNUM_IEEE: + case G_FMAXNUM_IEEE: + case G_FMINIMUM: + case G_FMAXIMUM: + return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy); + case G_SHL: + case G_LSHR: + case G_ASHR: + case G_CTLZ: + case G_CTLZ_ZERO_UNDEF: + case G_CTTZ: + case G_CTTZ_ZERO_UNDEF: + case G_CTPOP: + case G_FCOPYSIGN: + return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy); + case G_ZEXT: + case G_SEXT: + case G_ANYEXT: + case G_FPEXT: + case G_FPTRUNC: + case G_SITOFP: + case G_UITOFP: + case G_FPTOSI: + case G_FPTOUI: + case G_INTTOPTR: + case G_PTRTOINT: + case G_ADDRSPACE_CAST: + return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy); + case G_ICMP: + case G_FCMP: + return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy); + case G_SELECT: + return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy); + case G_PHI: + return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy); + case G_LOAD: + case G_STORE: + return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy); + default: + return UnableToLegalize; + } +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, + const LLT HalfTy, const LLT AmtTy) { + + Register InL = MRI.createGenericVirtualRegister(HalfTy); + Register InH = MRI.createGenericVirtualRegister(HalfTy); + MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg()); + + if (Amt.isNullValue()) { + MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {InL, InH}); MI.eraseFromParent(); return Legalized; } + + LLT NVT = HalfTy; + unsigned NVTBits = HalfTy.getSizeInBits(); + unsigned VTBits = 2 * NVTBits; + + SrcOp Lo(Register(0)), Hi(Register(0)); + if (MI.getOpcode() == TargetOpcode::G_SHL) { + if (Amt.ugt(VTBits)) { + Lo = Hi = MIRBuilder.buildConstant(NVT, 0); + } else if (Amt.ugt(NVTBits)) { + Lo = MIRBuilder.buildConstant(NVT, 0); + Hi = MIRBuilder.buildShl(NVT, InL, + MIRBuilder.buildConstant(AmtTy, Amt - NVTBits)); + } else if (Amt == NVTBits) { + Lo = MIRBuilder.buildConstant(NVT, 0); + Hi = InL; + } else { + Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt)); + auto OrLHS = + MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt)); + auto OrRHS = MIRBuilder.buildLShr( + NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits)); + Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS); + } + } else if (MI.getOpcode() == TargetOpcode::G_LSHR) { + if (Amt.ugt(VTBits)) { + Lo = Hi = MIRBuilder.buildConstant(NVT, 0); + } else if (Amt.ugt(NVTBits)) { + Lo = MIRBuilder.buildLShr(NVT, InH, + MIRBuilder.buildConstant(AmtTy, Amt - NVTBits)); + Hi = MIRBuilder.buildConstant(NVT, 0); + } else if (Amt == NVTBits) { + Lo = InH; + Hi = MIRBuilder.buildConstant(NVT, 0); + } else { + auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt); + + auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst); + auto OrRHS = MIRBuilder.buildShl( + NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits)); + + Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS); + Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst); + } + } else { + if (Amt.ugt(VTBits)) { + Hi = Lo = MIRBuilder.buildAShr( + NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1)); + } else if (Amt.ugt(NVTBits)) { + Lo = MIRBuilder.buildAShr(NVT, InH, + MIRBuilder.buildConstant(AmtTy, Amt - NVTBits)); + Hi = MIRBuilder.buildAShr(NVT, InH, + MIRBuilder.buildConstant(AmtTy, NVTBits - 1)); + } else if (Amt == NVTBits) { + Lo = InH; + Hi = MIRBuilder.buildAShr(NVT, InH, + MIRBuilder.buildConstant(AmtTy, NVTBits - 1)); + } else { + auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt); + + auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst); + auto OrRHS = MIRBuilder.buildShl( + NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits)); + + Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS); + Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst); + } } + + MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {Lo.getReg(), Hi.getReg()}); + MI.eraseFromParent(); + + return Legalized; +} + +// TODO: Optimize if constant shift amount. +LegalizerHelper::LegalizeResult +LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, + LLT RequestedTy) { + if (TypeIdx == 1) { + Observer.changingInstr(MI); + narrowScalarSrc(MI, RequestedTy, 2); + Observer.changedInstr(MI); + return Legalized; + } + + Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + if (DstTy.isVector()) + return UnableToLegalize; + + Register Amt = MI.getOperand(2).getReg(); + LLT ShiftAmtTy = MRI.getType(Amt); + const unsigned DstEltSize = DstTy.getScalarSizeInBits(); + if (DstEltSize % 2 != 0) + return UnableToLegalize; + + // Ignore the input type. We can only go to exactly half the size of the + // input. If that isn't small enough, the resulting pieces will be further + // legalized. + const unsigned NewBitSize = DstEltSize / 2; + const LLT HalfTy = LLT::scalar(NewBitSize); + const LLT CondTy = LLT::scalar(1); + + if (const MachineInstr *KShiftAmt = + getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) { + return narrowScalarShiftByConstant( + MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy); + } + + // TODO: Expand with known bits. + + // Handle the fully general expansion by an unknown amount. + auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize); + + Register InL = MRI.createGenericVirtualRegister(HalfTy); + Register InH = MRI.createGenericVirtualRegister(HalfTy); + MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg()); + + auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits); + auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt); + + auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0); + auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits); + auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero); + + Register ResultRegs[2]; + switch (MI.getOpcode()) { + case TargetOpcode::G_SHL: { + // Short: ShAmt < NewBitSize + auto LoS = MIRBuilder.buildShl(HalfTy, InH, Amt); + + auto OrLHS = MIRBuilder.buildShl(HalfTy, InH, Amt); + auto OrRHS = MIRBuilder.buildLShr(HalfTy, InL, AmtLack); + auto HiS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS); + + // Long: ShAmt >= NewBitSize + auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero. + auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part. + + auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL); + auto Hi = MIRBuilder.buildSelect( + HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL)); + + ResultRegs[0] = Lo.getReg(0); + ResultRegs[1] = Hi.getReg(0); + break; + } + case TargetOpcode::G_LSHR: { + // Short: ShAmt < NewBitSize + auto HiS = MIRBuilder.buildLShr(HalfTy, InH, Amt); + + auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt); + auto OrRHS = MIRBuilder.buildShl(HalfTy, InH, AmtLack); + auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS); + + // Long: ShAmt >= NewBitSize + auto HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero. + auto LoL = MIRBuilder.buildLShr(HalfTy, InH, AmtExcess); // Lo from Hi part. + + auto Lo = MIRBuilder.buildSelect( + HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL)); + auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL); + + ResultRegs[0] = Lo.getReg(0); + ResultRegs[1] = Hi.getReg(0); + break; + } + case TargetOpcode::G_ASHR: { + // Short: ShAmt < NewBitSize + auto HiS = MIRBuilder.buildAShr(HalfTy, InH, Amt); + + auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt); + auto OrRHS = MIRBuilder.buildLShr(HalfTy, InH, AmtLack); + auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS); + + // Long: ShAmt >= NewBitSize + + // Sign of Hi part. + auto HiL = MIRBuilder.buildAShr( + HalfTy, InH, MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1)); + + auto LoL = MIRBuilder.buildAShr(HalfTy, InH, AmtExcess); // Lo from Hi part. + + auto Lo = MIRBuilder.buildSelect( + HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL)); + + auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL); + + ResultRegs[0] = Lo.getReg(0); + ResultRegs[1] = Hi.getReg(0); + break; + } + default: + llvm_unreachable("not a shift"); + } + + MIRBuilder.buildMerge(DstReg, ResultRegs); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, + LLT MoreTy) { + assert(TypeIdx == 0 && "Expecting only Idx 0"); + + Observer.changingInstr(MI); + for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { + MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); + MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); + moreElementsVectorSrc(MI, MoreTy, I); + } + + MachineBasicBlock &MBB = *MI.getParent(); + MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI()); + moreElementsVectorDst(MI, MoreTy, 0); + Observer.changedInstr(MI); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, + LLT MoreTy) { + MIRBuilder.setInstr(MI); + unsigned Opc = MI.getOpcode(); + switch (Opc) { + case TargetOpcode::G_IMPLICIT_DEF: { + Observer.changingInstr(MI); + moreElementsVectorDst(MI, MoreTy, 0); + Observer.changedInstr(MI); + return Legalized; + } + case TargetOpcode::G_AND: + case TargetOpcode::G_OR: + case TargetOpcode::G_XOR: + case TargetOpcode::G_SMIN: + case TargetOpcode::G_SMAX: + case TargetOpcode::G_UMIN: + case TargetOpcode::G_UMAX: { + Observer.changingInstr(MI); + moreElementsVectorSrc(MI, MoreTy, 1); + moreElementsVectorSrc(MI, MoreTy, 2); + moreElementsVectorDst(MI, MoreTy, 0); + Observer.changedInstr(MI); + return Legalized; + } + case TargetOpcode::G_EXTRACT: + if (TypeIdx != 1) + return UnableToLegalize; + Observer.changingInstr(MI); + moreElementsVectorSrc(MI, MoreTy, 1); + Observer.changedInstr(MI); + return Legalized; + case TargetOpcode::G_INSERT: + if (TypeIdx != 0) + return UnableToLegalize; + Observer.changingInstr(MI); + moreElementsVectorSrc(MI, MoreTy, 1); + moreElementsVectorDst(MI, MoreTy, 0); + Observer.changedInstr(MI); + return Legalized; + case TargetOpcode::G_SELECT: + if (TypeIdx != 0) + return UnableToLegalize; + if (MRI.getType(MI.getOperand(1).getReg()).isVector()) + return UnableToLegalize; + + Observer.changingInstr(MI); + moreElementsVectorSrc(MI, MoreTy, 2); + moreElementsVectorSrc(MI, MoreTy, 3); + moreElementsVectorDst(MI, MoreTy, 0); + Observer.changedInstr(MI); + return Legalized; + case TargetOpcode::G_PHI: + return moreElementsVectorPhi(MI, TypeIdx, MoreTy); + default: + return UnableToLegalize; + } +} + +void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs, + ArrayRef<Register> Src1Regs, + ArrayRef<Register> Src2Regs, + LLT NarrowTy) { + MachineIRBuilder &B = MIRBuilder; + unsigned SrcParts = Src1Regs.size(); + unsigned DstParts = DstRegs.size(); + + unsigned DstIdx = 0; // Low bits of the result. + Register FactorSum = + B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0); + DstRegs[DstIdx] = FactorSum; + + unsigned CarrySumPrevDstIdx; + SmallVector<Register, 4> Factors; + + for (DstIdx = 1; DstIdx < DstParts; DstIdx++) { + // Collect low parts of muls for DstIdx. + for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1; + i <= std::min(DstIdx, SrcParts - 1); ++i) { + MachineInstrBuilder Mul = + B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]); + Factors.push_back(Mul.getReg(0)); + } + // Collect high parts of muls from previous DstIdx. + for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts; + i <= std::min(DstIdx - 1, SrcParts - 1); ++i) { + MachineInstrBuilder Umulh = + B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]); + Factors.push_back(Umulh.getReg(0)); + } + // Add CarrySum from additons calculated for previous DstIdx. + if (DstIdx != 1) { + Factors.push_back(CarrySumPrevDstIdx); + } + + Register CarrySum; + // Add all factors and accumulate all carries into CarrySum. + if (DstIdx != DstParts - 1) { + MachineInstrBuilder Uaddo = + B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]); + FactorSum = Uaddo.getReg(0); + CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0); + for (unsigned i = 2; i < Factors.size(); ++i) { + MachineInstrBuilder Uaddo = + B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]); + FactorSum = Uaddo.getReg(0); + MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1)); + CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0); + } + } else { + // Since value for the next index is not calculated, neither is CarrySum. + FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0); + for (unsigned i = 2; i < Factors.size(); ++i) + FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0); + } + + CarrySumPrevDstIdx = CarrySum; + DstRegs[DstIdx] = FactorSum; + Factors.clear(); + } +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) { + Register DstReg = MI.getOperand(0).getReg(); + Register Src1 = MI.getOperand(1).getReg(); + Register Src2 = MI.getOperand(2).getReg(); + + LLT Ty = MRI.getType(DstReg); + if (Ty.isVector()) + return UnableToLegalize; + + unsigned SrcSize = MRI.getType(Src1).getSizeInBits(); + unsigned DstSize = Ty.getSizeInBits(); + unsigned NarrowSize = NarrowTy.getSizeInBits(); + if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0) + return UnableToLegalize; + + unsigned NumDstParts = DstSize / NarrowSize; + unsigned NumSrcParts = SrcSize / NarrowSize; + bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH; + unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1); + + SmallVector<Register, 2> Src1Parts, Src2Parts, DstTmpRegs; + extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts); + extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts); + DstTmpRegs.resize(DstTmpParts); + multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy); + + // Take only high half of registers if this is high mul. + ArrayRef<Register> DstRegs( + IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts); + MIRBuilder.buildMerge(DstReg, DstRegs); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + if (TypeIdx != 1) + return UnableToLegalize; + + uint64_t NarrowSize = NarrowTy.getSizeInBits(); + + int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); + // FIXME: add support for when SizeOp1 isn't an exact multiple of + // NarrowSize. + if (SizeOp1 % NarrowSize != 0) + return UnableToLegalize; + int NumParts = SizeOp1 / NarrowSize; + + SmallVector<Register, 2> SrcRegs, DstRegs; + SmallVector<uint64_t, 2> Indexes; + extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); + + Register OpReg = MI.getOperand(0).getReg(); + uint64_t OpStart = MI.getOperand(2).getImm(); + uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); + for (int i = 0; i < NumParts; ++i) { + unsigned SrcStart = i * NarrowSize; + + if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) { + // No part of the extract uses this subregister, ignore it. + continue; + } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) { + // The entire subregister is extracted, forward the value. + DstRegs.push_back(SrcRegs[i]); + continue; + } + + // OpSegStart is where this destination segment would start in OpReg if it + // extended infinitely in both directions. + int64_t ExtractOffset; + uint64_t SegSize; + if (OpStart < SrcStart) { + ExtractOffset = 0; + SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart); + } else { + ExtractOffset = OpStart - SrcStart; + SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize); + } + + Register SegReg = SrcRegs[i]; + if (ExtractOffset != 0 || SegSize != NarrowSize) { + // A genuine extract is needed. + SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); + MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset); + } + + DstRegs.push_back(SegReg); + } + + Register DstReg = MI.getOperand(0).getReg(); + if(MRI.getType(DstReg).isVector()) + MIRBuilder.buildBuildVector(DstReg, DstRegs); + else + MIRBuilder.buildMerge(DstReg, DstRegs); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + // FIXME: Don't know how to handle secondary types yet. + if (TypeIdx != 0) + return UnableToLegalize; + + uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + uint64_t NarrowSize = NarrowTy.getSizeInBits(); + + // FIXME: add support for when SizeOp0 isn't an exact multiple of + // NarrowSize. + if (SizeOp0 % NarrowSize != 0) + return UnableToLegalize; + + int NumParts = SizeOp0 / NarrowSize; + + SmallVector<Register, 2> SrcRegs, DstRegs; + SmallVector<uint64_t, 2> Indexes; + extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); + + Register OpReg = MI.getOperand(2).getReg(); + uint64_t OpStart = MI.getOperand(3).getImm(); + uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); + for (int i = 0; i < NumParts; ++i) { + unsigned DstStart = i * NarrowSize; + + if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) { + // No part of the insert affects this subregister, forward the original. + DstRegs.push_back(SrcRegs[i]); + continue; + } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) { + // The entire subregister is defined by this insert, forward the new + // value. + DstRegs.push_back(OpReg); + continue; + } + + // OpSegStart is where this destination segment would start in OpReg if it + // extended infinitely in both directions. + int64_t ExtractOffset, InsertOffset; + uint64_t SegSize; + if (OpStart < DstStart) { + InsertOffset = 0; + ExtractOffset = DstStart - OpStart; + SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart); + } else { + InsertOffset = OpStart - DstStart; + ExtractOffset = 0; + SegSize = + std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart); + } + + Register SegReg = OpReg; + if (ExtractOffset != 0 || SegSize != OpSize) { + // A genuine extract is needed. + SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); + MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset); + } + + Register DstReg = MRI.createGenericVirtualRegister(NarrowTy); + MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset); + DstRegs.push_back(DstReg); + } + + assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered"); + Register DstReg = MI.getOperand(0).getReg(); + if(MRI.getType(DstReg).isVector()) + MIRBuilder.buildBuildVector(DstReg, DstRegs); + else + MIRBuilder.buildMerge(DstReg, DstRegs); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + + assert(MI.getNumOperands() == 3 && TypeIdx == 0); + + SmallVector<Register, 4> DstRegs, DstLeftoverRegs; + SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs; + SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs; + LLT LeftoverTy; + if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy, + Src0Regs, Src0LeftoverRegs)) + return UnableToLegalize; + + LLT Unused; + if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused, + Src1Regs, Src1LeftoverRegs)) + llvm_unreachable("inconsistent extractParts result"); + + for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) { + auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, + {Src0Regs[I], Src1Regs[I]}); + DstRegs.push_back(Inst->getOperand(0).getReg()); + } + + for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) { + auto Inst = MIRBuilder.buildInstr( + MI.getOpcode(), + {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]}); + DstLeftoverRegs.push_back(Inst->getOperand(0).getReg()); + } + + insertParts(DstReg, DstTy, NarrowTy, DstRegs, + LeftoverTy, DstLeftoverRegs); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + if (TypeIdx != 0) + return UnableToLegalize; + + Register CondReg = MI.getOperand(1).getReg(); + LLT CondTy = MRI.getType(CondReg); + if (CondTy.isVector()) // TODO: Handle vselect + return UnableToLegalize; + + Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + + SmallVector<Register, 4> DstRegs, DstLeftoverRegs; + SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs; + SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs; + LLT LeftoverTy; + if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy, + Src1Regs, Src1LeftoverRegs)) + return UnableToLegalize; + + LLT Unused; + if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused, + Src2Regs, Src2LeftoverRegs)) + llvm_unreachable("inconsistent extractParts result"); + + for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) { + auto Select = MIRBuilder.buildSelect(NarrowTy, + CondReg, Src1Regs[I], Src2Regs[I]); + DstRegs.push_back(Select->getOperand(0).getReg()); + } + + for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) { + auto Select = MIRBuilder.buildSelect( + LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]); + DstLeftoverRegs.push_back(Select->getOperand(0).getReg()); + } + + insertParts(DstReg, DstTy, NarrowTy, DstRegs, + LeftoverTy, DstLeftoverRegs); + + MI.eraseFromParent(); + return Legalized; } LegalizerHelper::LegalizeResult @@ -1288,9 +3099,9 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return Legalized; } case TargetOpcode::G_CTLZ: { - unsigned SrcReg = MI.getOperand(1).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); unsigned Len = Ty.getSizeInBits(); - if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty}})) { + if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty, Ty}})) { // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero. auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty}, {SrcReg}); @@ -1314,7 +3125,7 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { // return Len - popcount(x); // // Ref: "Hacker's Delight" by Henry Warren - unsigned Op = SrcReg; + Register Op = SrcReg; unsigned NewLen = PowerOf2Ceil(Len); for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) { auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i); @@ -1338,9 +3149,9 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return Legalized; } case TargetOpcode::G_CTTZ: { - unsigned SrcReg = MI.getOperand(1).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); unsigned Len = Ty.getSizeInBits(); - if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty}})) { + if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty, Ty}})) { // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with // zero. auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF, @@ -1365,8 +3176,8 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { TargetOpcode::G_AND, {Ty}, {MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty}, {SrcReg, MIBCstNeg1})}); - if (!isSupported({TargetOpcode::G_CTPOP, {Ty}}) && - isSupported({TargetOpcode::G_CTLZ, {Ty}})) { + if (!isSupported({TargetOpcode::G_CTPOP, {Ty, Ty}}) && + isSupported({TargetOpcode::G_CTLZ, {Ty, Ty}})) { auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len); MIRBuilder.buildInstr( TargetOpcode::G_SUB, {MI.getOperand(0).getReg()}, @@ -1381,3 +3192,230 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { } } } + +// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float +// representation. +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + const LLT S64 = LLT::scalar(64); + const LLT S32 = LLT::scalar(32); + const LLT S1 = LLT::scalar(1); + + assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32); + + // unsigned cul2f(ulong u) { + // uint lz = clz(u); + // uint e = (u != 0) ? 127U + 63U - lz : 0; + // u = (u << lz) & 0x7fffffffffffffffUL; + // ulong t = u & 0xffffffffffUL; + // uint v = (e << 23) | (uint)(u >> 40); + // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U); + // return as_float(v + r); + // } + + auto Zero32 = MIRBuilder.buildConstant(S32, 0); + auto Zero64 = MIRBuilder.buildConstant(S64, 0); + + auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src); + + auto K = MIRBuilder.buildConstant(S32, 127U + 63U); + auto Sub = MIRBuilder.buildSub(S32, K, LZ); + + auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64); + auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32); + + auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1); + auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ); + + auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0); + + auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL); + auto T = MIRBuilder.buildAnd(S64, U, Mask1); + + auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40)); + auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23)); + auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl)); + + auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL); + auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C); + auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C); + auto One = MIRBuilder.buildConstant(S32, 1); + + auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One); + auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32); + auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0); + MIRBuilder.buildAdd(Dst, V, R); + + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(Dst); + LLT SrcTy = MRI.getType(Src); + + if (SrcTy != LLT::scalar(64)) + return UnableToLegalize; + + if (DstTy == LLT::scalar(32)) { + // TODO: SelectionDAG has several alternative expansions to port which may + // be more reasonble depending on the available instructions. If a target + // has sitofp, does not have CTLZ, or can efficiently use f64 as an + // intermediate type, this is probably worse. + return lowerU64ToF32BitOps(MI); + } + + return UnableToLegalize; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(Dst); + LLT SrcTy = MRI.getType(Src); + + const LLT S64 = LLT::scalar(64); + const LLT S32 = LLT::scalar(32); + const LLT S1 = LLT::scalar(1); + + if (SrcTy != S64) + return UnableToLegalize; + + if (DstTy == S32) { + // signed cl2f(long l) { + // long s = l >> 63; + // float r = cul2f((l + s) ^ s); + // return s ? -r : r; + // } + Register L = Src; + auto SignBit = MIRBuilder.buildConstant(S64, 63); + auto S = MIRBuilder.buildAShr(S64, L, SignBit); + + auto LPlusS = MIRBuilder.buildAdd(S64, L, S); + auto Xor = MIRBuilder.buildXor(S64, LPlusS, S); + auto R = MIRBuilder.buildUITOFP(S32, Xor); + + auto RNeg = MIRBuilder.buildFNeg(S32, R); + auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S, + MIRBuilder.buildConstant(S64, 0)); + MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R); + return Legalized; + } + + return UnableToLegalize; +} + +static CmpInst::Predicate minMaxToCompare(unsigned Opc) { + switch (Opc) { + case TargetOpcode::G_SMIN: + return CmpInst::ICMP_SLT; + case TargetOpcode::G_SMAX: + return CmpInst::ICMP_SGT; + case TargetOpcode::G_UMIN: + return CmpInst::ICMP_ULT; + case TargetOpcode::G_UMAX: + return CmpInst::ICMP_UGT; + default: + llvm_unreachable("not in integer min/max"); + } +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { + Register Dst = MI.getOperand(0).getReg(); + Register Src0 = MI.getOperand(1).getReg(); + Register Src1 = MI.getOperand(2).getReg(); + + const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode()); + LLT CmpType = MRI.getType(Dst).changeElementSize(1); + + auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1); + MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { + Register Dst = MI.getOperand(0).getReg(); + Register Src0 = MI.getOperand(1).getReg(); + Register Src1 = MI.getOperand(2).getReg(); + + const LLT Src0Ty = MRI.getType(Src0); + const LLT Src1Ty = MRI.getType(Src1); + + const int Src0Size = Src0Ty.getScalarSizeInBits(); + const int Src1Size = Src1Ty.getScalarSizeInBits(); + + auto SignBitMask = MIRBuilder.buildConstant( + Src0Ty, APInt::getSignMask(Src0Size)); + + auto NotSignBitMask = MIRBuilder.buildConstant( + Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1)); + + auto And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask); + MachineInstr *Or; + + if (Src0Ty == Src1Ty) { + auto And1 = MIRBuilder.buildAnd(Src1Ty, Src0, SignBitMask); + Or = MIRBuilder.buildOr(Dst, And0, And1); + } else if (Src0Size > Src1Size) { + auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size); + auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1); + auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt); + auto And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask); + Or = MIRBuilder.buildOr(Dst, And0, And1); + } else { + auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size); + auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt); + auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift); + auto And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask); + Or = MIRBuilder.buildOr(Dst, And0, And1); + } + + // Be careful about setting nsz/nnan/ninf on every instruction, since the + // constants are a nan and -0.0, but the final result should preserve + // everything. + if (unsigned Flags = MI.getFlags()) + Or->setFlags(Flags); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) { + unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ? + TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE; + + Register Dst = MI.getOperand(0).getReg(); + Register Src0 = MI.getOperand(1).getReg(); + Register Src1 = MI.getOperand(2).getReg(); + LLT Ty = MRI.getType(Dst); + + if (!MI.getFlag(MachineInstr::FmNoNans)) { + // Insert canonicalizes if it's possible we need to quiet to get correct + // sNaN behavior. + + // Note this must be done here, and not as an optimization combine in the + // absence of a dedicate quiet-snan instruction as we're using an + // omni-purpose G_FCANONICALIZE. + if (!isKnownNeverSNaN(Src0, MRI)) + Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0); + + if (!isKnownNeverSNaN(Src1, MRI)) + Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0); + } + + // If there are no nans, it's safe to simply replace this with the non-IEEE + // version. + MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags()); + MI.eraseFromParent(); + return Legalized; +} diff --git a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp index fa36ede5b976..6e1de95b3277 100644 --- a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -1,9 +1,8 @@ //===- lib/CodeGen/GlobalISel/LegalizerInfo.cpp - Legalizer ---------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -43,6 +42,45 @@ cl::opt<bool> llvm::DisableGISelLegalityCheck( cl::desc("Don't verify that MIR is fully legal between GlobalISel passes"), cl::Hidden); +raw_ostream &llvm::operator<<(raw_ostream &OS, LegalizeAction Action) { + switch (Action) { + case Legal: + OS << "Legal"; + break; + case NarrowScalar: + OS << "NarrowScalar"; + break; + case WidenScalar: + OS << "WidenScalar"; + break; + case FewerElements: + OS << "FewerElements"; + break; + case MoreElements: + OS << "MoreElements"; + break; + case Lower: + OS << "Lower"; + break; + case Libcall: + OS << "Libcall"; + break; + case Custom: + OS << "Custom"; + break; + case Unsupported: + OS << "Unsupported"; + break; + case NotFound: + OS << "NotFound"; + break; + case UseLegacyRules: + OS << "UseLegacyRules"; + break; + } + return OS; +} + raw_ostream &LegalityQuery::print(raw_ostream &OS) const { OS << Opcode << ", Tys={"; for (const auto &Type : Types) { @@ -59,6 +97,86 @@ raw_ostream &LegalityQuery::print(raw_ostream &OS) const { return OS; } +#ifndef NDEBUG +// Make sure the rule won't (trivially) loop forever. +static bool hasNoSimpleLoops(const LegalizeRule &Rule, const LegalityQuery &Q, + const std::pair<unsigned, LLT> &Mutation) { + switch (Rule.getAction()) { + case Custom: + case Lower: + case MoreElements: + case FewerElements: + break; + default: + return Q.Types[Mutation.first] != Mutation.second; + } + return true; +} + +// Make sure the returned mutation makes sense for the match type. +static bool mutationIsSane(const LegalizeRule &Rule, + const LegalityQuery &Q, + std::pair<unsigned, LLT> Mutation) { + // If the user wants a custom mutation, then we can't really say much about + // it. Return true, and trust that they're doing the right thing. + if (Rule.getAction() == Custom) + return true; + + const unsigned TypeIdx = Mutation.first; + const LLT OldTy = Q.Types[TypeIdx]; + const LLT NewTy = Mutation.second; + + switch (Rule.getAction()) { + case FewerElements: + case MoreElements: { + if (!OldTy.isVector()) + return false; + + if (NewTy.isVector()) { + if (Rule.getAction() == FewerElements) { + // Make sure the element count really decreased. + if (NewTy.getNumElements() >= OldTy.getNumElements()) + return false; + } else { + // Make sure the element count really increased. + if (NewTy.getNumElements() <= OldTy.getNumElements()) + return false; + } + } + + // Make sure the element type didn't change. + return NewTy.getScalarType() == OldTy.getElementType(); + } + case NarrowScalar: + case WidenScalar: { + if (OldTy.isVector()) { + // Number of elements should not change. + if (!NewTy.isVector() || OldTy.getNumElements() != NewTy.getNumElements()) + return false; + } else { + // Both types must be vectors + if (NewTy.isVector()) + return false; + } + + if (Rule.getAction() == NarrowScalar) { + // Make sure the size really decreased. + if (NewTy.getScalarSizeInBits() >= OldTy.getScalarSizeInBits()) + return false; + } else { + // Make sure the size really increased. + if (NewTy.getScalarSizeInBits() <= OldTy.getScalarSizeInBits()) + return false; + } + + return true; + } + default: + return true; + } +} +#endif + LegalizeActionStep LegalizeRuleSet::apply(const LegalityQuery &Query) const { LLVM_DEBUG(dbgs() << "Applying legalizer ruleset to: "; Query.print(dbgs()); dbgs() << "\n"); @@ -66,17 +184,15 @@ LegalizeActionStep LegalizeRuleSet::apply(const LegalityQuery &Query) const { LLVM_DEBUG(dbgs() << ".. fallback to legacy rules (no rules defined)\n"); return {LegalizeAction::UseLegacyRules, 0, LLT{}}; } - for (const auto &Rule : Rules) { + for (const LegalizeRule &Rule : Rules) { if (Rule.match(Query)) { LLVM_DEBUG(dbgs() << ".. match\n"); std::pair<unsigned, LLT> Mutation = Rule.determineMutation(Query); - LLVM_DEBUG(dbgs() << ".. .. " << (unsigned)Rule.getAction() << ", " + LLVM_DEBUG(dbgs() << ".. .. " << Rule.getAction() << ", " << Mutation.first << ", " << Mutation.second << "\n"); - assert((Query.Types[Mutation.first] != Mutation.second || - Rule.getAction() == Lower || - Rule.getAction() == MoreElements || - Rule.getAction() == FewerElements) && - "Simple loop detected"); + assert(mutationIsSane(Rule, Query, Mutation) && + "legality mutation invalid for match"); + assert(hasNoSimpleLoops(Rule, Query, Mutation) && "Simple loop detected"); return {Rule.getAction(), Mutation.first, Mutation.second}; } else LLVM_DEBUG(dbgs() << ".. no match\n"); @@ -180,16 +296,14 @@ void LegalizerInfo::computeTables() { if (TypeIdx < ScalarSizeChangeStrategies[OpcodeIdx].size() && ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx] != nullptr) S = ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx]; - llvm::sort(ScalarSpecifiedActions.begin(), - ScalarSpecifiedActions.end()); + llvm::sort(ScalarSpecifiedActions); checkPartialSizeAndActionsVector(ScalarSpecifiedActions); setScalarAction(Opcode, TypeIdx, S(ScalarSpecifiedActions)); } // 2. Handle pointer types for (auto PointerSpecifiedActions : AddressSpace2SpecifiedActions) { - llvm::sort(PointerSpecifiedActions.second.begin(), - PointerSpecifiedActions.second.end()); + llvm::sort(PointerSpecifiedActions.second); checkPartialSizeAndActionsVector(PointerSpecifiedActions.second); // For pointer types, we assume that there isn't a meaningfull way // to change the number of bits used in the pointer. @@ -201,8 +315,7 @@ void LegalizerInfo::computeTables() { // 3. Handle vector types SizeAndActionsVec ElementSizesSeen; for (auto VectorSpecifiedActions : ElemSize2SpecifiedActions) { - llvm::sort(VectorSpecifiedActions.second.begin(), - VectorSpecifiedActions.second.end()); + llvm::sort(VectorSpecifiedActions.second); const uint16_t ElementSize = VectorSpecifiedActions.first; ElementSizesSeen.push_back({ElementSize, Legal}); checkPartialSizeAndActionsVector(VectorSpecifiedActions.second); @@ -328,9 +441,8 @@ LegalizerInfo::getAction(const LegalityQuery &Query) const { for (unsigned i = 0; i < Query.Types.size(); ++i) { auto Action = getAspectAction({Query.Opcode, i, Query.Types[i]}); if (Action.first != Legal) { - LLVM_DEBUG(dbgs() << ".. (legacy) Type " << i - << " Action=" << (unsigned)Action.first << ", " - << Action.second << "\n"); + LLVM_DEBUG(dbgs() << ".. (legacy) Type " << i << " Action=" + << Action.first << ", " << Action.second << "\n"); return {Action.first, i, Action.second}; } else LLVM_DEBUG(dbgs() << ".. (legacy) Type " << i << " Legal\n"); @@ -364,8 +476,9 @@ LegalizerInfo::getAction(const MachineInstr &MI, SmallVector<LegalityQuery::MemDesc, 2> MemDescrs; for (const auto &MMO : MI.memoperands()) - MemDescrs.push_back( - {MMO->getSize() /* in bytes */ * 8, MMO->getOrdering()}); + MemDescrs.push_back({8 * MMO->getSize() /* in bits */, + 8 * MMO->getAlignment(), + MMO->getOrdering()}); return getAction({MI.getOpcode(), Types, MemDescrs}); } @@ -375,6 +488,14 @@ bool LegalizerInfo::isLegal(const MachineInstr &MI, return getAction(MI, MRI).Action == Legal; } +bool LegalizerInfo::isLegalOrCustom(const MachineInstr &MI, + const MachineRegisterInfo &MRI) const { + auto Action = getAction(MI, MRI).Action; + // If the action is custom, it may not necessarily modify the instruction, + // so we have to assume it's legal. + return Action == Legal || Action == Custom; +} + bool LegalizerInfo::legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, GISelChangeObserver &Observer) const { @@ -423,14 +544,10 @@ LegalizerInfo::findAction(const SizeAndActionsVec &Vec, const uint32_t Size) { // Find the last element in Vec that has a bitsize equal to or smaller than // the requested bit size. // That is the element just before the first element that is bigger than Size. - auto VecIt = std::upper_bound( - Vec.begin(), Vec.end(), Size, - [](const uint32_t Size, const SizeAndAction lhs) -> bool { - return Size < lhs.first; - }); - assert(VecIt != Vec.begin() && "Does Vec not start with size 1?"); - --VecIt; - int VecIdx = VecIt - Vec.begin(); + auto It = partition_point( + Vec, [=](const SizeAndAction &A) { return A.first <= Size; }); + assert(It != Vec.begin() && "Does Vec not start with size 1?"); + int VecIdx = It - Vec.begin() - 1; LegalizeAction Action = Vec[VecIdx].second; switch (Action) { @@ -541,6 +658,12 @@ LegalizerInfo::findVectorLegalAction(const InstrAspect &Aspect) const { IntermediateType.getScalarSizeInBits())}; } +bool LegalizerInfo::legalizeIntrinsic(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const { + return true; +} + /// \pre Type indices of every opcode form a dense set starting from 0. void LegalizerInfo::verify(const MCInstrInfo &MII) const { #ifndef NDEBUG @@ -584,7 +707,8 @@ const MachineInstr *llvm::machineFunctionIsIllegal(const MachineFunction &MF) { const MachineRegisterInfo &MRI = MF.getRegInfo(); for (const MachineBasicBlock &MBB : MF) for (const MachineInstr &MI : MBB) - if (isPreISelGenericOpcode(MI.getOpcode()) && !MLI->isLegal(MI, MRI)) + if (isPreISelGenericOpcode(MI.getOpcode()) && + !MLI->isLegalOrCustom(MI, MRI)) return &MI; } return nullptr; diff --git a/lib/CodeGen/GlobalISel/Localizer.cpp b/lib/CodeGen/GlobalISel/Localizer.cpp index 52b340753a50..3592409710a7 100644 --- a/lib/CodeGen/GlobalISel/Localizer.cpp +++ b/lib/CodeGen/GlobalISel/Localizer.cpp @@ -1,9 +1,8 @@ //===- Localizer.cpp ---------------------- Localize some instrs -*- C++ -*-==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file @@ -11,8 +10,8 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/Localizer.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" @@ -21,17 +20,53 @@ using namespace llvm; char Localizer::ID = 0; -INITIALIZE_PASS(Localizer, DEBUG_TYPE, - "Move/duplicate certain instructions close to their use", false, - false) +INITIALIZE_PASS_BEGIN(Localizer, DEBUG_TYPE, + "Move/duplicate certain instructions close to their use", + false, false) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_END(Localizer, DEBUG_TYPE, + "Move/duplicate certain instructions close to their use", + false, false) -Localizer::Localizer() : MachineFunctionPass(ID) { - initializeLocalizerPass(*PassRegistry::getPassRegistry()); -} +Localizer::Localizer() : MachineFunctionPass(ID) { } -void Localizer::init(MachineFunction &MF) { MRI = &MF.getRegInfo(); } +void Localizer::init(MachineFunction &MF) { + MRI = &MF.getRegInfo(); + TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(MF.getFunction()); +} bool Localizer::shouldLocalize(const MachineInstr &MI) { + // Assuming a spill and reload of a value has a cost of 1 instruction each, + // this helper function computes the maximum number of uses we should consider + // for remat. E.g. on arm64 global addresses take 2 insts to materialize. We + // break even in terms of code size when the original MI has 2 users vs + // choosing to potentially spill. Any more than 2 users we we have a net code + // size increase. This doesn't take into account register pressure though. + auto maxUses = [](unsigned RematCost) { + // A cost of 1 means remats are basically free. + if (RematCost == 1) + return UINT_MAX; + if (RematCost == 2) + return 2U; + + // Remat is too expensive, only sink if there's one user. + if (RematCost > 2) + return 1U; + llvm_unreachable("Unexpected remat cost"); + }; + + // Helper to walk through uses and terminate if we've reached a limit. Saves + // us spending time traversing uses if all we want to know is if it's >= min. + auto isUsesAtMost = [&](unsigned Reg, unsigned MaxUses) { + unsigned NumUses = 0; + auto UI = MRI->use_instr_nodbg_begin(Reg), UE = MRI->use_instr_nodbg_end(); + for (; UI != UE && NumUses < MaxUses; ++UI) { + NumUses++; + } + // If we haven't reached the end yet then there are more than MaxUses users. + return UI == UE; + }; + switch (MI.getOpcode()) { default: return false; @@ -40,11 +75,22 @@ bool Localizer::shouldLocalize(const MachineInstr &MI) { case TargetOpcode::G_CONSTANT: case TargetOpcode::G_FCONSTANT: case TargetOpcode::G_FRAME_INDEX: + case TargetOpcode::G_INTTOPTR: return true; + case TargetOpcode::G_GLOBAL_VALUE: { + unsigned RematCost = TTI->getGISelRematGlobalCost(); + unsigned Reg = MI.getOperand(0).getReg(); + unsigned MaxUses = maxUses(RematCost); + if (MaxUses == UINT_MAX) + return true; // Remats are "free" so always localize. + bool B = isUsesAtMost(Reg, MaxUses); + return B; + } } } void Localizer::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<TargetTransformInfoWrapperPass>(); getSelectionDAGFallbackAnalysisUsage(AU); MachineFunctionPass::getAnalysisUsage(AU); } @@ -58,6 +104,107 @@ bool Localizer::isLocalUse(MachineOperand &MOUse, const MachineInstr &Def, return InsertMBB == Def.getParent(); } +bool Localizer::localizeInterBlock(MachineFunction &MF, + LocalizedSetVecT &LocalizedInstrs) { + bool Changed = false; + DenseMap<std::pair<MachineBasicBlock *, unsigned>, unsigned> MBBWithLocalDef; + + // Since the IRTranslator only emits constants into the entry block, and the + // rest of the GISel pipeline generally emits constants close to their users, + // we only localize instructions in the entry block here. This might change if + // we start doing CSE across blocks. + auto &MBB = MF.front(); + for (auto RI = MBB.rbegin(), RE = MBB.rend(); RI != RE; ++RI) { + MachineInstr &MI = *RI; + if (!shouldLocalize(MI)) + continue; + LLVM_DEBUG(dbgs() << "Should localize: " << MI); + assert(MI.getDesc().getNumDefs() == 1 && + "More than one definition not supported yet"); + unsigned Reg = MI.getOperand(0).getReg(); + // Check if all the users of MI are local. + // We are going to invalidation the list of use operands, so we + // can't use range iterator. + for (auto MOIt = MRI->use_begin(Reg), MOItEnd = MRI->use_end(); + MOIt != MOItEnd;) { + MachineOperand &MOUse = *MOIt++; + // Check if the use is already local. + MachineBasicBlock *InsertMBB; + LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent(); + dbgs() << "Checking use: " << MIUse + << " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n'); + if (isLocalUse(MOUse, MI, InsertMBB)) + continue; + LLVM_DEBUG(dbgs() << "Fixing non-local use\n"); + Changed = true; + auto MBBAndReg = std::make_pair(InsertMBB, Reg); + auto NewVRegIt = MBBWithLocalDef.find(MBBAndReg); + if (NewVRegIt == MBBWithLocalDef.end()) { + // Create the localized instruction. + MachineInstr *LocalizedMI = MF.CloneMachineInstr(&MI); + LocalizedInstrs.insert(LocalizedMI); + MachineInstr &UseMI = *MOUse.getParent(); + if (MRI->hasOneUse(Reg) && !UseMI.isPHI()) + InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(UseMI), LocalizedMI); + else + InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(InsertMBB->begin()), + LocalizedMI); + + // Set a new register for the definition. + unsigned NewReg = MRI->createGenericVirtualRegister(MRI->getType(Reg)); + MRI->setRegClassOrRegBank(NewReg, MRI->getRegClassOrRegBank(Reg)); + LocalizedMI->getOperand(0).setReg(NewReg); + NewVRegIt = + MBBWithLocalDef.insert(std::make_pair(MBBAndReg, NewReg)).first; + LLVM_DEBUG(dbgs() << "Inserted: " << *LocalizedMI); + } + LLVM_DEBUG(dbgs() << "Update use with: " << printReg(NewVRegIt->second) + << '\n'); + // Update the user reg. + MOUse.setReg(NewVRegIt->second); + } + } + return Changed; +} + +bool Localizer::localizeIntraBlock(LocalizedSetVecT &LocalizedInstrs) { + bool Changed = false; + + // For each already-localized instruction which has multiple users, then we + // scan the block top down from the current position until we hit one of them. + + // FIXME: Consider doing inst duplication if live ranges are very long due to + // many users, but this case may be better served by regalloc improvements. + + for (MachineInstr *MI : LocalizedInstrs) { + unsigned Reg = MI->getOperand(0).getReg(); + MachineBasicBlock &MBB = *MI->getParent(); + // All of the user MIs of this reg. + SmallPtrSet<MachineInstr *, 32> Users; + for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) { + if (!UseMI.isPHI()) + Users.insert(&UseMI); + } + // If all the users were PHIs then they're not going to be in our block, + // don't try to move this instruction. + if (Users.empty()) + continue; + + MachineBasicBlock::iterator II(MI); + ++II; + while (II != MBB.end() && !Users.count(&*II)) + ++II; + + LLVM_DEBUG(dbgs() << "Intra-block: moving " << *MI << " before " << *&*II + << "\n"); + assert(II != MBB.end() && "Didn't find the user in the MBB"); + MI->removeFromParent(); + MBB.insert(II, MI); + Changed = true; + } + return Changed; +} + bool Localizer::runOnMachineFunction(MachineFunction &MF) { // If the ISel pipeline failed, do not bother running that pass. if (MF.getProperties().hasProperty( @@ -68,62 +215,10 @@ bool Localizer::runOnMachineFunction(MachineFunction &MF) { init(MF); - bool Changed = false; - // Keep track of the instructions we localized. - // We won't need to process them if we see them later in the CFG. - SmallPtrSet<MachineInstr *, 16> LocalizedInstrs; - DenseMap<std::pair<MachineBasicBlock *, unsigned>, unsigned> MBBWithLocalDef; - // TODO: Do bottom up traversal. - for (MachineBasicBlock &MBB : MF) { - for (MachineInstr &MI : MBB) { - if (LocalizedInstrs.count(&MI) || !shouldLocalize(MI)) - continue; - LLVM_DEBUG(dbgs() << "Should localize: " << MI); - assert(MI.getDesc().getNumDefs() == 1 && - "More than one definition not supported yet"); - unsigned Reg = MI.getOperand(0).getReg(); - // Check if all the users of MI are local. - // We are going to invalidation the list of use operands, so we - // can't use range iterator. - for (auto MOIt = MRI->use_begin(Reg), MOItEnd = MRI->use_end(); - MOIt != MOItEnd;) { - MachineOperand &MOUse = *MOIt++; - // Check if the use is already local. - MachineBasicBlock *InsertMBB; - LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent(); - dbgs() << "Checking use: " << MIUse - << " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n'); - if (isLocalUse(MOUse, MI, InsertMBB)) - continue; - LLVM_DEBUG(dbgs() << "Fixing non-local use\n"); - Changed = true; - auto MBBAndReg = std::make_pair(InsertMBB, Reg); - auto NewVRegIt = MBBWithLocalDef.find(MBBAndReg); - if (NewVRegIt == MBBWithLocalDef.end()) { - // Create the localized instruction. - MachineInstr *LocalizedMI = MF.CloneMachineInstr(&MI); - LocalizedInstrs.insert(LocalizedMI); - // Don't try to be smart for the insertion point. - // There is no guarantee that the first seen use is the first - // use in the block. - InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(InsertMBB->begin()), - LocalizedMI); + // Keep track of the instructions we localized. We'll do a second pass of + // intra-block localization to further reduce live ranges. + LocalizedSetVecT LocalizedInstrs; - // Set a new register for the definition. - unsigned NewReg = - MRI->createGenericVirtualRegister(MRI->getType(Reg)); - MRI->setRegClassOrRegBank(NewReg, MRI->getRegClassOrRegBank(Reg)); - LocalizedMI->getOperand(0).setReg(NewReg); - NewVRegIt = - MBBWithLocalDef.insert(std::make_pair(MBBAndReg, NewReg)).first; - LLVM_DEBUG(dbgs() << "Inserted: " << *LocalizedMI); - } - LLVM_DEBUG(dbgs() << "Update use with: " << printReg(NewVRegIt->second) - << '\n'); - // Update the user reg. - MOUse.setReg(NewVRegIt->second); - } - } - } - return Changed; + bool Changed = localizeInterBlock(MF, LocalizedInstrs); + return Changed |= localizeIntraBlock(LocalizedInstrs); } diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 1f5611061994..b7a73326b85c 100644 --- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -1,9 +1,8 @@ //===-- llvm/CodeGen/GlobalISel/MachineIRBuilder.cpp - MIBuilder--*- C++ -*-==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file @@ -17,6 +16,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DebugInfo.h" @@ -87,7 +87,7 @@ MachineInstrBuilder MachineIRBuilder::insertInstr(MachineInstrBuilder MIB) { } MachineInstrBuilder -MachineIRBuilder::buildDirectDbgValue(unsigned Reg, const MDNode *Variable, +MachineIRBuilder::buildDirectDbgValue(Register Reg, const MDNode *Variable, const MDNode *Expr) { assert(isa<DILocalVariable>(Variable) && "not a variable"); assert(cast<DIExpression>(Expr)->isValid() && "not an expression"); @@ -100,7 +100,7 @@ MachineIRBuilder::buildDirectDbgValue(unsigned Reg, const MDNode *Variable, } MachineInstrBuilder -MachineIRBuilder::buildIndirectDbgValue(unsigned Reg, const MDNode *Variable, +MachineIRBuilder::buildIndirectDbgValue(Register Reg, const MDNode *Variable, const MDNode *Expr) { assert(isa<DILocalVariable>(Variable) && "not a variable"); assert(cast<DIExpression>(Expr)->isValid() && "not an expression"); @@ -160,23 +160,32 @@ MachineInstrBuilder MachineIRBuilder::buildDbgLabel(const MDNode *Label) { return MIB.addMetadata(Label); } -MachineInstrBuilder MachineIRBuilder::buildFrameIndex(unsigned Res, int Idx) { - assert(getMRI()->getType(Res).isPointer() && "invalid operand type"); - return buildInstr(TargetOpcode::G_FRAME_INDEX) - .addDef(Res) - .addFrameIndex(Idx); +MachineInstrBuilder MachineIRBuilder::buildFrameIndex(const DstOp &Res, + int Idx) { + assert(Res.getLLTTy(*getMRI()).isPointer() && "invalid operand type"); + auto MIB = buildInstr(TargetOpcode::G_FRAME_INDEX); + Res.addDefToMIB(*getMRI(), MIB); + MIB.addFrameIndex(Idx); + return MIB; } -MachineInstrBuilder MachineIRBuilder::buildGlobalValue(unsigned Res, +MachineInstrBuilder MachineIRBuilder::buildGlobalValue(const DstOp &Res, const GlobalValue *GV) { - assert(getMRI()->getType(Res).isPointer() && "invalid operand type"); - assert(getMRI()->getType(Res).getAddressSpace() == + assert(Res.getLLTTy(*getMRI()).isPointer() && "invalid operand type"); + assert(Res.getLLTTy(*getMRI()).getAddressSpace() == GV->getType()->getAddressSpace() && "address space mismatch"); - return buildInstr(TargetOpcode::G_GLOBAL_VALUE) - .addDef(Res) - .addGlobalAddress(GV); + auto MIB = buildInstr(TargetOpcode::G_GLOBAL_VALUE); + Res.addDefToMIB(*getMRI(), MIB); + MIB.addGlobalAddress(GV); + return MIB; +} + +MachineInstrBuilder MachineIRBuilder::buildJumpTable(const LLT PtrTy, + unsigned JTI) { + return buildInstr(TargetOpcode::G_JUMP_TABLE, {PtrTy}, {}) + .addJumpTableIndex(JTI); } void MachineIRBuilder::validateBinaryOp(const LLT &Res, const LLT &Op0, @@ -185,20 +194,28 @@ void MachineIRBuilder::validateBinaryOp(const LLT &Res, const LLT &Op0, assert((Res == Op0 && Res == Op1) && "type mismatch"); } -MachineInstrBuilder MachineIRBuilder::buildGEP(unsigned Res, unsigned Op0, - unsigned Op1) { - assert(getMRI()->getType(Res).isPointer() && - getMRI()->getType(Res) == getMRI()->getType(Op0) && "type mismatch"); - assert(getMRI()->getType(Op1).isScalar() && "invalid offset type"); +void MachineIRBuilder::validateShiftOp(const LLT &Res, const LLT &Op0, + const LLT &Op1) { + assert((Res.isScalar() || Res.isVector()) && "invalid operand type"); + assert((Res == Op0) && "type mismatch"); +} + +MachineInstrBuilder MachineIRBuilder::buildGEP(const DstOp &Res, + const SrcOp &Op0, + const SrcOp &Op1) { + assert(Res.getLLTTy(*getMRI()).isPointer() && + Res.getLLTTy(*getMRI()) == Op0.getLLTTy(*getMRI()) && "type mismatch"); + assert(Op1.getLLTTy(*getMRI()).isScalar() && "invalid offset type"); - return buildInstr(TargetOpcode::G_GEP) - .addDef(Res) - .addUse(Op0) - .addUse(Op1); + auto MIB = buildInstr(TargetOpcode::G_GEP); + Res.addDefToMIB(*getMRI(), MIB); + Op0.addSrcToMIB(MIB); + Op1.addSrcToMIB(MIB); + return MIB; } Optional<MachineInstrBuilder> -MachineIRBuilder::materializeGEP(unsigned &Res, unsigned Op0, +MachineIRBuilder::materializeGEP(Register &Res, Register Op0, const LLT &ValueTy, uint64_t Value) { assert(Res == 0 && "Res is a result argument"); assert(ValueTy.isScalar() && "invalid offset type"); @@ -209,32 +226,43 @@ MachineIRBuilder::materializeGEP(unsigned &Res, unsigned Op0, } Res = getMRI()->createGenericVirtualRegister(getMRI()->getType(Op0)); - unsigned TmpReg = getMRI()->createGenericVirtualRegister(ValueTy); - - buildConstant(TmpReg, Value); - return buildGEP(Res, Op0, TmpReg); + auto Cst = buildConstant(ValueTy, Value); + return buildGEP(Res, Op0, Cst.getReg(0)); } -MachineInstrBuilder MachineIRBuilder::buildPtrMask(unsigned Res, unsigned Op0, +MachineInstrBuilder MachineIRBuilder::buildPtrMask(const DstOp &Res, + const SrcOp &Op0, uint32_t NumBits) { - assert(getMRI()->getType(Res).isPointer() && - getMRI()->getType(Res) == getMRI()->getType(Op0) && "type mismatch"); + assert(Res.getLLTTy(*getMRI()).isPointer() && + Res.getLLTTy(*getMRI()) == Op0.getLLTTy(*getMRI()) && "type mismatch"); - return buildInstr(TargetOpcode::G_PTR_MASK) - .addDef(Res) - .addUse(Op0) - .addImm(NumBits); + auto MIB = buildInstr(TargetOpcode::G_PTR_MASK); + Res.addDefToMIB(*getMRI(), MIB); + Op0.addSrcToMIB(MIB); + MIB.addImm(NumBits); + return MIB; } MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) { return buildInstr(TargetOpcode::G_BR).addMBB(&Dest); } -MachineInstrBuilder MachineIRBuilder::buildBrIndirect(unsigned Tgt) { +MachineInstrBuilder MachineIRBuilder::buildBrIndirect(Register Tgt) { assert(getMRI()->getType(Tgt).isPointer() && "invalid branch destination"); return buildInstr(TargetOpcode::G_BRINDIRECT).addUse(Tgt); } +MachineInstrBuilder MachineIRBuilder::buildBrJT(Register TablePtr, + unsigned JTI, + Register IndexReg) { + assert(getMRI()->getType(TablePtr).isPointer() && + "Table reg must be a pointer"); + return buildInstr(TargetOpcode::G_BRJT) + .addUse(TablePtr) + .addJumpTableIndex(JTI) + .addUse(IndexReg); +} + MachineInstrBuilder MachineIRBuilder::buildCopy(const DstOp &Res, const SrcOp &Op) { return buildInstr(TargetOpcode::COPY, Res, Op); @@ -243,36 +271,60 @@ MachineInstrBuilder MachineIRBuilder::buildCopy(const DstOp &Res, MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res, const ConstantInt &Val) { LLT Ty = Res.getLLTTy(*getMRI()); + LLT EltTy = Ty.getScalarType(); + assert(EltTy.getScalarSizeInBits() == Val.getBitWidth() && + "creating constant with the wrong size"); + + if (Ty.isVector()) { + auto Const = buildInstr(TargetOpcode::G_CONSTANT) + .addDef(getMRI()->createGenericVirtualRegister(EltTy)) + .addCImm(&Val); + return buildSplatVector(Res, Const); + } - assert((Ty.isScalar() || Ty.isPointer()) && "invalid operand type"); - - const ConstantInt *NewVal = &Val; - if (Ty.getSizeInBits() != Val.getBitWidth()) - NewVal = ConstantInt::get(getMF().getFunction().getContext(), - Val.getValue().sextOrTrunc(Ty.getSizeInBits())); - - auto MIB = buildInstr(TargetOpcode::G_CONSTANT); - Res.addDefToMIB(*getMRI(), MIB); - MIB.addCImm(NewVal); - return MIB; + auto Const = buildInstr(TargetOpcode::G_CONSTANT); + Res.addDefToMIB(*getMRI(), Const); + Const.addCImm(&Val); + return Const; } MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res, int64_t Val) { auto IntN = IntegerType::get(getMF().getFunction().getContext(), - Res.getLLTTy(*getMRI()).getSizeInBits()); + Res.getLLTTy(*getMRI()).getScalarSizeInBits()); ConstantInt *CI = ConstantInt::get(IntN, Val, true); return buildConstant(Res, *CI); } MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res, const ConstantFP &Val) { - assert(Res.getLLTTy(*getMRI()).isScalar() && "invalid operand type"); + LLT Ty = Res.getLLTTy(*getMRI()); + LLT EltTy = Ty.getScalarType(); - auto MIB = buildInstr(TargetOpcode::G_FCONSTANT); - Res.addDefToMIB(*getMRI(), MIB); - MIB.addFPImm(&Val); - return MIB; + assert(APFloat::getSizeInBits(Val.getValueAPF().getSemantics()) + == EltTy.getSizeInBits() && + "creating fconstant with the wrong size"); + + assert(!Ty.isPointer() && "invalid operand type"); + + if (Ty.isVector()) { + auto Const = buildInstr(TargetOpcode::G_FCONSTANT) + .addDef(getMRI()->createGenericVirtualRegister(EltTy)) + .addFPImm(&Val); + + return buildSplatVector(Res, Const); + } + + auto Const = buildInstr(TargetOpcode::G_FCONSTANT); + Res.addDefToMIB(*getMRI(), Const); + Const.addFPImm(&Val); + return Const; +} + +MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res, + const APInt &Val) { + ConstantInt *CI = ConstantInt::get(getMF().getFunction().getContext(), Val); + return buildConstant(Res, *CI); } MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res, @@ -280,44 +332,62 @@ MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res, LLT DstTy = Res.getLLTTy(*getMRI()); auto &Ctx = getMF().getFunction().getContext(); auto *CFP = - ConstantFP::get(Ctx, getAPFloatFromSize(Val, DstTy.getSizeInBits())); + ConstantFP::get(Ctx, getAPFloatFromSize(Val, DstTy.getScalarSizeInBits())); return buildFConstant(Res, *CFP); } -MachineInstrBuilder MachineIRBuilder::buildBrCond(unsigned Tst, +MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res, + const APFloat &Val) { + auto &Ctx = getMF().getFunction().getContext(); + auto *CFP = ConstantFP::get(Ctx, Val); + return buildFConstant(Res, *CFP); +} + +MachineInstrBuilder MachineIRBuilder::buildBrCond(Register Tst, MachineBasicBlock &Dest) { assert(getMRI()->getType(Tst).isScalar() && "invalid operand type"); return buildInstr(TargetOpcode::G_BRCOND).addUse(Tst).addMBB(&Dest); } -MachineInstrBuilder MachineIRBuilder::buildLoad(unsigned Res, unsigned Addr, +MachineInstrBuilder MachineIRBuilder::buildLoad(const DstOp &Res, + const SrcOp &Addr, MachineMemOperand &MMO) { return buildLoadInstr(TargetOpcode::G_LOAD, Res, Addr, MMO); } MachineInstrBuilder MachineIRBuilder::buildLoadInstr(unsigned Opcode, - unsigned Res, - unsigned Addr, + const DstOp &Res, + const SrcOp &Addr, MachineMemOperand &MMO) { - assert(getMRI()->getType(Res).isValid() && "invalid operand type"); - assert(getMRI()->getType(Addr).isPointer() && "invalid operand type"); + assert(Res.getLLTTy(*getMRI()).isValid() && "invalid operand type"); + assert(Addr.getLLTTy(*getMRI()).isPointer() && "invalid operand type"); - return buildInstr(Opcode) - .addDef(Res) - .addUse(Addr) - .addMemOperand(&MMO); + auto MIB = buildInstr(Opcode); + Res.addDefToMIB(*getMRI(), MIB); + Addr.addSrcToMIB(MIB); + MIB.addMemOperand(&MMO); + return MIB; } -MachineInstrBuilder MachineIRBuilder::buildStore(unsigned Val, unsigned Addr, +MachineInstrBuilder MachineIRBuilder::buildStore(const SrcOp &Val, + const SrcOp &Addr, MachineMemOperand &MMO) { - assert(getMRI()->getType(Val).isValid() && "invalid operand type"); - assert(getMRI()->getType(Addr).isPointer() && "invalid operand type"); + assert(Val.getLLTTy(*getMRI()).isValid() && "invalid operand type"); + assert(Addr.getLLTTy(*getMRI()).isPointer() && "invalid operand type"); - return buildInstr(TargetOpcode::G_STORE) - .addUse(Val) - .addUse(Addr) - .addMemOperand(&MMO); + auto MIB = buildInstr(TargetOpcode::G_STORE); + Val.addSrcToMIB(MIB); + Addr.addSrcToMIB(MIB); + MIB.addMemOperand(&MMO); + return MIB; +} + +MachineInstrBuilder MachineIRBuilder::buildUAddo(const DstOp &Res, + const DstOp &CarryOut, + const SrcOp &Op0, + const SrcOp &Op1) { + return buildInstr(TargetOpcode::G_UADDO, {Res, CarryOut}, {Op0, Op1}); } MachineInstrBuilder MachineIRBuilder::buildUAdde(const DstOp &Res, @@ -344,6 +414,25 @@ MachineInstrBuilder MachineIRBuilder::buildZExt(const DstOp &Res, return buildInstr(TargetOpcode::G_ZEXT, Res, Op); } +unsigned MachineIRBuilder::getBoolExtOp(bool IsVec, bool IsFP) const { + const auto *TLI = getMF().getSubtarget().getTargetLowering(); + switch (TLI->getBooleanContents(IsVec, IsFP)) { + case TargetLoweringBase::ZeroOrNegativeOneBooleanContent: + return TargetOpcode::G_SEXT; + case TargetLoweringBase::ZeroOrOneBooleanContent: + return TargetOpcode::G_ZEXT; + default: + return TargetOpcode::G_ANYEXT; + } +} + +MachineInstrBuilder MachineIRBuilder::buildBoolExt(const DstOp &Res, + const SrcOp &Op, + bool IsFP) { + unsigned ExtOp = getBoolExtOp(getMRI()->getType(Op.getReg()).isVector(), IsFP); + return buildInstr(ExtOp, Res, Op); +} + MachineInstrBuilder MachineIRBuilder::buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op) { @@ -403,29 +492,32 @@ MachineInstrBuilder MachineIRBuilder::buildCast(const DstOp &Dst, return buildInstr(Opcode, Dst, Src); } -MachineInstrBuilder MachineIRBuilder::buildExtract(unsigned Res, unsigned Src, +MachineInstrBuilder MachineIRBuilder::buildExtract(const DstOp &Dst, + const SrcOp &Src, uint64_t Index) { + LLT SrcTy = Src.getLLTTy(*getMRI()); + LLT DstTy = Dst.getLLTTy(*getMRI()); + #ifndef NDEBUG - assert(getMRI()->getType(Src).isValid() && "invalid operand type"); - assert(getMRI()->getType(Res).isValid() && "invalid operand type"); - assert(Index + getMRI()->getType(Res).getSizeInBits() <= - getMRI()->getType(Src).getSizeInBits() && + assert(SrcTy.isValid() && "invalid operand type"); + assert(DstTy.isValid() && "invalid operand type"); + assert(Index + DstTy.getSizeInBits() <= SrcTy.getSizeInBits() && "extracting off end of register"); #endif - if (getMRI()->getType(Res).getSizeInBits() == - getMRI()->getType(Src).getSizeInBits()) { + if (DstTy.getSizeInBits() == SrcTy.getSizeInBits()) { assert(Index == 0 && "insertion past the end of a register"); - return buildCast(Res, Src); + return buildCast(Dst, Src); } - return buildInstr(TargetOpcode::G_EXTRACT) - .addDef(Res) - .addUse(Src) - .addImm(Index); + auto Extract = buildInstr(TargetOpcode::G_EXTRACT); + Dst.addDefToMIB(*getMRI(), Extract); + Src.addSrcToMIB(Extract); + Extract.addImm(Index); + return Extract; } -void MachineIRBuilder::buildSequence(unsigned Res, ArrayRef<unsigned> Ops, +void MachineIRBuilder::buildSequence(Register Res, ArrayRef<Register> Ops, ArrayRef<uint64_t> Indices) { #ifndef NDEBUG assert(Ops.size() == Indices.size() && "incompatible args"); @@ -454,11 +546,11 @@ void MachineIRBuilder::buildSequence(unsigned Res, ArrayRef<unsigned> Ops, return; } - unsigned ResIn = getMRI()->createGenericVirtualRegister(ResTy); + Register ResIn = getMRI()->createGenericVirtualRegister(ResTy); buildUndef(ResIn); for (unsigned i = 0; i < Ops.size(); ++i) { - unsigned ResOut = i + 1 == Ops.size() + Register ResOut = i + 1 == Ops.size() ? Res : getMRI()->createGenericVirtualRegister(ResTy); buildInsert(ResOut, ResIn, Ops[i], Indices[i]); @@ -471,11 +563,12 @@ MachineInstrBuilder MachineIRBuilder::buildUndef(const DstOp &Res) { } MachineInstrBuilder MachineIRBuilder::buildMerge(const DstOp &Res, - ArrayRef<unsigned> Ops) { + ArrayRef<Register> Ops) { // Unfortunately to convert from ArrayRef<LLT> to ArrayRef<SrcOp>, // we need some temporary storage for the DstOp objects. Here we use a // sufficiently large SmallVector to not go through the heap. SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end()); + assert(TmpVec.size() > 1); return buildInstr(TargetOpcode::G_MERGE_VALUES, Res, TmpVec); } @@ -485,31 +578,48 @@ MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<LLT> Res, // we need some temporary storage for the DstOp objects. Here we use a // sufficiently large SmallVector to not go through the heap. SmallVector<DstOp, 8> TmpVec(Res.begin(), Res.end()); + assert(TmpVec.size() > 1); return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op); } -MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<unsigned> Res, +MachineInstrBuilder MachineIRBuilder::buildUnmerge(LLT Res, + const SrcOp &Op) { + unsigned NumReg = Op.getLLTTy(*getMRI()).getSizeInBits() / Res.getSizeInBits(); + SmallVector<Register, 8> TmpVec; + for (unsigned I = 0; I != NumReg; ++I) + TmpVec.push_back(getMRI()->createGenericVirtualRegister(Res)); + return buildUnmerge(TmpVec, Op); +} + +MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<Register> Res, const SrcOp &Op) { - // Unfortunately to convert from ArrayRef<unsigned> to ArrayRef<DstOp>, + // Unfortunately to convert from ArrayRef<Register> to ArrayRef<DstOp>, // we need some temporary storage for the DstOp objects. Here we use a // sufficiently large SmallVector to not go through the heap. SmallVector<DstOp, 8> TmpVec(Res.begin(), Res.end()); + assert(TmpVec.size() > 1); return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op); } MachineInstrBuilder MachineIRBuilder::buildBuildVector(const DstOp &Res, - ArrayRef<unsigned> Ops) { - // Unfortunately to convert from ArrayRef<unsigned> to ArrayRef<SrcOp>, + ArrayRef<Register> Ops) { + // Unfortunately to convert from ArrayRef<Register> to ArrayRef<SrcOp>, // we need some temporary storage for the DstOp objects. Here we use a // sufficiently large SmallVector to not go through the heap. SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end()); return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec); } +MachineInstrBuilder MachineIRBuilder::buildSplatVector(const DstOp &Res, + const SrcOp &Src) { + SmallVector<SrcOp, 8> TmpVec(Res.getLLTTy(*getMRI()).getNumElements(), Src); + return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec); +} + MachineInstrBuilder MachineIRBuilder::buildBuildVectorTrunc(const DstOp &Res, - ArrayRef<unsigned> Ops) { - // Unfortunately to convert from ArrayRef<unsigned> to ArrayRef<SrcOp>, + ArrayRef<Register> Ops) { + // Unfortunately to convert from ArrayRef<Register> to ArrayRef<SrcOp>, // we need some temporary storage for the DstOp objects. Here we use a // sufficiently large SmallVector to not go through the heap. SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end()); @@ -517,16 +627,16 @@ MachineIRBuilder::buildBuildVectorTrunc(const DstOp &Res, } MachineInstrBuilder -MachineIRBuilder::buildConcatVectors(const DstOp &Res, ArrayRef<unsigned> Ops) { - // Unfortunately to convert from ArrayRef<unsigned> to ArrayRef<SrcOp>, +MachineIRBuilder::buildConcatVectors(const DstOp &Res, ArrayRef<Register> Ops) { + // Unfortunately to convert from ArrayRef<Register> to ArrayRef<SrcOp>, // we need some temporary storage for the DstOp objects. Here we use a // sufficiently large SmallVector to not go through the heap. SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end()); return buildInstr(TargetOpcode::G_CONCAT_VECTORS, Res, TmpVec); } -MachineInstrBuilder MachineIRBuilder::buildInsert(unsigned Res, unsigned Src, - unsigned Op, unsigned Index) { +MachineInstrBuilder MachineIRBuilder::buildInsert(Register Res, Register Src, + Register Op, unsigned Index) { assert(Index + getMRI()->getType(Op).getSizeInBits() <= getMRI()->getType(Res).getSizeInBits() && "insertion past the end of a register"); @@ -544,13 +654,25 @@ MachineInstrBuilder MachineIRBuilder::buildInsert(unsigned Res, unsigned Src, } MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID, - unsigned Res, + ArrayRef<Register> ResultRegs, bool HasSideEffects) { auto MIB = buildInstr(HasSideEffects ? TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS : TargetOpcode::G_INTRINSIC); - if (Res) - MIB.addDef(Res); + for (unsigned ResultReg : ResultRegs) + MIB.addDef(ResultReg); + MIB.addIntrinsicID(ID); + return MIB; +} + +MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID, + ArrayRef<DstOp> Results, + bool HasSideEffects) { + auto MIB = + buildInstr(HasSideEffects ? TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS + : TargetOpcode::G_INTRINSIC); + for (DstOp Result : Results) + Result.addDefToMIB(*getMRI(), MIB); MIB.addIntrinsicID(ID); return MIB; } @@ -601,8 +723,8 @@ MachineIRBuilder::buildExtractVectorElement(const DstOp &Res, const SrcOp &Val, } MachineInstrBuilder MachineIRBuilder::buildAtomicCmpXchgWithSuccess( - unsigned OldValRes, unsigned SuccessRes, unsigned Addr, unsigned CmpVal, - unsigned NewVal, MachineMemOperand &MMO) { + Register OldValRes, Register SuccessRes, Register Addr, Register CmpVal, + Register NewVal, MachineMemOperand &MMO) { #ifndef NDEBUG LLT OldValResTy = getMRI()->getType(OldValRes); LLT SuccessResTy = getMRI()->getType(SuccessRes); @@ -628,8 +750,8 @@ MachineInstrBuilder MachineIRBuilder::buildAtomicCmpXchgWithSuccess( } MachineInstrBuilder -MachineIRBuilder::buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr, - unsigned CmpVal, unsigned NewVal, +MachineIRBuilder::buildAtomicCmpXchg(Register OldValRes, Register Addr, + Register CmpVal, Register NewVal, MachineMemOperand &MMO) { #ifndef NDEBUG LLT OldValResTy = getMRI()->getType(OldValRes); @@ -653,9 +775,9 @@ MachineIRBuilder::buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr, } MachineInstrBuilder MachineIRBuilder::buildAtomicRMW(unsigned Opcode, - unsigned OldValRes, - unsigned Addr, - unsigned Val, + Register OldValRes, + Register Addr, + Register Val, MachineMemOperand &MMO) { #ifndef NDEBUG LLT OldValResTy = getMRI()->getType(OldValRes); @@ -675,75 +797,82 @@ MachineInstrBuilder MachineIRBuilder::buildAtomicRMW(unsigned Opcode, } MachineInstrBuilder -MachineIRBuilder::buildAtomicRMWXchg(unsigned OldValRes, unsigned Addr, - unsigned Val, MachineMemOperand &MMO) { +MachineIRBuilder::buildAtomicRMWXchg(Register OldValRes, Register Addr, + Register Val, MachineMemOperand &MMO) { return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_XCHG, OldValRes, Addr, Val, MMO); } MachineInstrBuilder -MachineIRBuilder::buildAtomicRMWAdd(unsigned OldValRes, unsigned Addr, - unsigned Val, MachineMemOperand &MMO) { +MachineIRBuilder::buildAtomicRMWAdd(Register OldValRes, Register Addr, + Register Val, MachineMemOperand &MMO) { return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_ADD, OldValRes, Addr, Val, MMO); } MachineInstrBuilder -MachineIRBuilder::buildAtomicRMWSub(unsigned OldValRes, unsigned Addr, - unsigned Val, MachineMemOperand &MMO) { +MachineIRBuilder::buildAtomicRMWSub(Register OldValRes, Register Addr, + Register Val, MachineMemOperand &MMO) { return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_SUB, OldValRes, Addr, Val, MMO); } MachineInstrBuilder -MachineIRBuilder::buildAtomicRMWAnd(unsigned OldValRes, unsigned Addr, - unsigned Val, MachineMemOperand &MMO) { +MachineIRBuilder::buildAtomicRMWAnd(Register OldValRes, Register Addr, + Register Val, MachineMemOperand &MMO) { return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_AND, OldValRes, Addr, Val, MMO); } MachineInstrBuilder -MachineIRBuilder::buildAtomicRMWNand(unsigned OldValRes, unsigned Addr, - unsigned Val, MachineMemOperand &MMO) { +MachineIRBuilder::buildAtomicRMWNand(Register OldValRes, Register Addr, + Register Val, MachineMemOperand &MMO) { return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_NAND, OldValRes, Addr, Val, MMO); } -MachineInstrBuilder MachineIRBuilder::buildAtomicRMWOr(unsigned OldValRes, - unsigned Addr, - unsigned Val, +MachineInstrBuilder MachineIRBuilder::buildAtomicRMWOr(Register OldValRes, + Register Addr, + Register Val, MachineMemOperand &MMO) { return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_OR, OldValRes, Addr, Val, MMO); } MachineInstrBuilder -MachineIRBuilder::buildAtomicRMWXor(unsigned OldValRes, unsigned Addr, - unsigned Val, MachineMemOperand &MMO) { +MachineIRBuilder::buildAtomicRMWXor(Register OldValRes, Register Addr, + Register Val, MachineMemOperand &MMO) { return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_XOR, OldValRes, Addr, Val, MMO); } MachineInstrBuilder -MachineIRBuilder::buildAtomicRMWMax(unsigned OldValRes, unsigned Addr, - unsigned Val, MachineMemOperand &MMO) { +MachineIRBuilder::buildAtomicRMWMax(Register OldValRes, Register Addr, + Register Val, MachineMemOperand &MMO) { return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_MAX, OldValRes, Addr, Val, MMO); } MachineInstrBuilder -MachineIRBuilder::buildAtomicRMWMin(unsigned OldValRes, unsigned Addr, - unsigned Val, MachineMemOperand &MMO) { +MachineIRBuilder::buildAtomicRMWMin(Register OldValRes, Register Addr, + Register Val, MachineMemOperand &MMO) { return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_MIN, OldValRes, Addr, Val, MMO); } MachineInstrBuilder -MachineIRBuilder::buildAtomicRMWUmax(unsigned OldValRes, unsigned Addr, - unsigned Val, MachineMemOperand &MMO) { +MachineIRBuilder::buildAtomicRMWUmax(Register OldValRes, Register Addr, + Register Val, MachineMemOperand &MMO) { return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_UMAX, OldValRes, Addr, Val, MMO); } MachineInstrBuilder -MachineIRBuilder::buildAtomicRMWUmin(unsigned OldValRes, unsigned Addr, - unsigned Val, MachineMemOperand &MMO) { +MachineIRBuilder::buildAtomicRMWUmin(Register OldValRes, Register Addr, + Register Val, MachineMemOperand &MMO) { return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_UMIN, OldValRes, Addr, Val, MMO); } MachineInstrBuilder -MachineIRBuilder::buildBlockAddress(unsigned Res, const BlockAddress *BA) { +MachineIRBuilder::buildFence(unsigned Ordering, unsigned Scope) { + return buildInstr(TargetOpcode::G_FENCE) + .addImm(Ordering) + .addImm(Scope); +} + +MachineInstrBuilder +MachineIRBuilder::buildBlockAddress(Register Res, const BlockAddress *BA) { #ifndef NDEBUG assert(getMRI()->getType(Res).isPointer() && "invalid res type"); #endif @@ -803,17 +932,18 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc, } case TargetOpcode::G_ADD: case TargetOpcode::G_AND: - case TargetOpcode::G_ASHR: - case TargetOpcode::G_LSHR: case TargetOpcode::G_MUL: case TargetOpcode::G_OR: - case TargetOpcode::G_SHL: case TargetOpcode::G_SUB: case TargetOpcode::G_XOR: case TargetOpcode::G_UDIV: case TargetOpcode::G_SDIV: case TargetOpcode::G_UREM: - case TargetOpcode::G_SREM: { + case TargetOpcode::G_SREM: + case TargetOpcode::G_SMIN: + case TargetOpcode::G_SMAX: + case TargetOpcode::G_UMIN: + case TargetOpcode::G_UMAX: { // All these are binary ops. assert(DstOps.size() == 1 && "Invalid Dst"); assert(SrcOps.size() == 2 && "Invalid Srcs"); @@ -821,6 +951,17 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc, SrcOps[0].getLLTTy(*getMRI()), SrcOps[1].getLLTTy(*getMRI())); break; + } + case TargetOpcode::G_SHL: + case TargetOpcode::G_ASHR: + case TargetOpcode::G_LSHR: { + assert(DstOps.size() == 1 && "Invalid Dst"); + assert(SrcOps.size() == 2 && "Invalid Srcs"); + validateShiftOp(DstOps[0].getLLTTy(*getMRI()), + SrcOps[0].getLLTTy(*getMRI()), + SrcOps[1].getLLTTy(*getMRI())); + break; + } case TargetOpcode::G_SEXT: case TargetOpcode::G_ZEXT: case TargetOpcode::G_ANYEXT: @@ -830,7 +971,7 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc, SrcOps[0].getLLTTy(*getMRI()), true); break; case TargetOpcode::G_TRUNC: - case TargetOpcode::G_FPTRUNC: + case TargetOpcode::G_FPTRUNC: { assert(DstOps.size() == 1 && "Invalid Dst"); assert(SrcOps.size() == 1 && "Invalid Srcs"); validateTruncExt(DstOps[0].getLLTTy(*getMRI()), @@ -839,10 +980,8 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc, } case TargetOpcode::COPY: assert(DstOps.size() == 1 && "Invalid Dst"); - assert(SrcOps.size() == 1 && "Invalid Srcs"); - assert(DstOps[0].getLLTTy(*getMRI()) == LLT() || - SrcOps[0].getLLTTy(*getMRI()) == LLT() || - DstOps[0].getLLTTy(*getMRI()) == SrcOps[0].getLLTTy(*getMRI())); + // If the caller wants to add a subreg source it has to be done separately + // so we may not have any SrcOps at this point yet. break; case TargetOpcode::G_FCMP: case TargetOpcode::G_ICMP: { @@ -943,7 +1082,7 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc, "type mismatch in input list"); assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() == DstOps[0].getLLTTy(*getMRI()).getSizeInBits() && - "input scalars do not exactly cover the outpur vector register"); + "input scalars do not exactly cover the output vector register"); break; } case TargetOpcode::G_BUILD_VECTOR_TRUNC: { @@ -976,7 +1115,7 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc, "type mismatch in input list"); assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() == DstOps[0].getLLTTy(*getMRI()).getSizeInBits() && - "input vectors do not exactly cover the outpur vector register"); + "input vectors do not exactly cover the output vector register"); break; } case TargetOpcode::G_UADDE: { diff --git a/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/lib/CodeGen/GlobalISel/RegBankSelect.cpp index dcc8b7cc23c5..42be88fcf947 100644 --- a/lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ b/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -1,9 +1,8 @@ //==- llvm/CodeGen/GlobalISel/RegBankSelect.cpp - RegBankSelect --*- C++ -*-==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file @@ -72,7 +71,6 @@ INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, RegBankSelect::RegBankSelect(Mode RunningMode) : MachineFunctionPass(ID), OptMode(RunningMode) { - initializeRegBankSelectPass(*PassRegistry::getPassRegistry()); if (RegBankSelectMode.getNumOccurrences() != 0) { OptMode = RegBankSelectMode; if (RegBankSelectMode != RunningMode) @@ -110,7 +108,7 @@ void RegBankSelect::getAnalysisUsage(AnalysisUsage &AU) const { } bool RegBankSelect::assignmentMatch( - unsigned Reg, const RegisterBankInfo::ValueMapping &ValMapping, + Register Reg, const RegisterBankInfo::ValueMapping &ValMapping, bool &OnlyAssign) const { // By default we assume we will have to repair something. OnlyAssign = false; @@ -135,34 +133,84 @@ bool RegBankSelect::assignmentMatch( bool RegBankSelect::repairReg( MachineOperand &MO, const RegisterBankInfo::ValueMapping &ValMapping, RegBankSelect::RepairingPlacement &RepairPt, - const iterator_range<SmallVectorImpl<unsigned>::const_iterator> &NewVRegs) { - if (ValMapping.NumBreakDowns != 1 && !TPC->isGlobalISelAbortEnabled()) - return false; - assert(ValMapping.NumBreakDowns == 1 && "Not yet implemented"); + const iterator_range<SmallVectorImpl<Register>::const_iterator> &NewVRegs) { + + assert(ValMapping.NumBreakDowns == (unsigned)size(NewVRegs) && + "need new vreg for each breakdown"); + // An empty range of new register means no repairing. assert(!empty(NewVRegs) && "We should not have to repair"); - // Assume we are repairing a use and thus, the original reg will be - // the source of the repairing. - unsigned Src = MO.getReg(); - unsigned Dst = *NewVRegs.begin(); - - // If we repair a definition, swap the source and destination for - // the repairing. - if (MO.isDef()) - std::swap(Src, Dst); - - assert((RepairPt.getNumInsertPoints() == 1 || - TargetRegisterInfo::isPhysicalRegister(Dst)) && - "We are about to create several defs for Dst"); - - // Build the instruction used to repair, then clone it at the right - // places. Avoiding buildCopy bypasses the check that Src and Dst have the - // same types because the type is a placeholder when this function is called. - MachineInstr *MI = - MIRBuilder.buildInstrNoInsert(TargetOpcode::COPY).addDef(Dst).addUse(Src); - LLVM_DEBUG(dbgs() << "Copy: " << printReg(Src) << " to: " << printReg(Dst) - << '\n'); + MachineInstr *MI; + if (ValMapping.NumBreakDowns == 1) { + // Assume we are repairing a use and thus, the original reg will be + // the source of the repairing. + Register Src = MO.getReg(); + Register Dst = *NewVRegs.begin(); + + // If we repair a definition, swap the source and destination for + // the repairing. + if (MO.isDef()) + std::swap(Src, Dst); + + assert((RepairPt.getNumInsertPoints() == 1 || + TargetRegisterInfo::isPhysicalRegister(Dst)) && + "We are about to create several defs for Dst"); + + // Build the instruction used to repair, then clone it at the right + // places. Avoiding buildCopy bypasses the check that Src and Dst have the + // same types because the type is a placeholder when this function is called. + MI = MIRBuilder.buildInstrNoInsert(TargetOpcode::COPY) + .addDef(Dst) + .addUse(Src); + LLVM_DEBUG(dbgs() << "Copy: " << printReg(Src) << " to: " << printReg(Dst) + << '\n'); + } else { + // TODO: Support with G_IMPLICIT_DEF + G_INSERT sequence or G_EXTRACT + // sequence. + assert(ValMapping.partsAllUniform() && "irregular breakdowns not supported"); + + LLT RegTy = MRI->getType(MO.getReg()); + if (MO.isDef()) { + unsigned MergeOp; + if (RegTy.isVector()) { + if (ValMapping.NumBreakDowns == RegTy.getNumElements()) + MergeOp = TargetOpcode::G_BUILD_VECTOR; + else { + assert( + (ValMapping.BreakDown[0].Length * ValMapping.NumBreakDowns == + RegTy.getSizeInBits()) && + (ValMapping.BreakDown[0].Length % RegTy.getScalarSizeInBits() == + 0) && + "don't understand this value breakdown"); + + MergeOp = TargetOpcode::G_CONCAT_VECTORS; + } + } else + MergeOp = TargetOpcode::G_MERGE_VALUES; + + auto MergeBuilder = + MIRBuilder.buildInstrNoInsert(MergeOp) + .addDef(MO.getReg()); + + for (Register SrcReg : NewVRegs) + MergeBuilder.addUse(SrcReg); + + MI = MergeBuilder; + } else { + MachineInstrBuilder UnMergeBuilder = + MIRBuilder.buildInstrNoInsert(TargetOpcode::G_UNMERGE_VALUES); + for (Register DefReg : NewVRegs) + UnMergeBuilder.addDef(DefReg); + + UnMergeBuilder.addUse(MO.getReg()); + MI = UnMergeBuilder; + } + } + + if (RepairPt.getNumInsertPoints() != 1) + report_fatal_error("need testcase to support multiple insertion points"); + // TODO: // Check if MI is legal. if not, we need to legalize all the // instructions we are going to insert. @@ -195,7 +243,8 @@ uint64_t RegBankSelect::getRepairCost( const RegisterBank *CurRegBank = RBI->getRegBank(MO.getReg(), *MRI, *TRI); // If MO does not have a register bank, we should have just been // able to set one unless we have to break the value down. - assert((!IsSameNumOfValues || CurRegBank) && "We should not have to repair"); + assert(CurRegBank || MO.isDef()); + // Def: Val <- NewDefs // Same number of values: copy // Different number: Val = build_sequence Defs1, Defs2, ... @@ -206,6 +255,9 @@ uint64_t RegBankSelect::getRepairCost( // We should remember that this value is available somewhere else to // coalesce the value. + if (ValMapping.NumBreakDowns != 1) + return RBI->getBreakDownCost(ValMapping, CurRegBank); + if (IsSameNumOfValues) { const RegisterBank *DesiredRegBrank = ValMapping.BreakDown[0].RegBank; // If we repair a definition, swap the source and destination for @@ -345,7 +397,7 @@ void RegBankSelect::tryAvoidingSplit( // repairing. // Check if this is a physical or virtual register. - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { // We are going to split every outgoing edges. // Check that this is possible. @@ -416,7 +468,7 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping( const MachineOperand &MO = MI.getOperand(OpIdx); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; LLVM_DEBUG(dbgs() << "Opd" << OpIdx << '\n'); @@ -542,7 +594,7 @@ bool RegBankSelect::applyMapping( MachineOperand &MO = MI.getOperand(OpIdx); const RegisterBankInfo::ValueMapping &ValMapping = InstrMapping.getOperandMapping(OpIdx); - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); switch (RepairPt.getKind()) { case RepairingPlacement::Reassign: @@ -605,7 +657,7 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "Assign register banks for: " << MF.getName() << '\n'); const Function &F = MF.getFunction(); Mode SaveOptMode = OptMode; - if (F.hasFnAttribute(Attribute::OptimizeNone)) + if (F.hasOptNone()) OptMode = Mode::Fast; init(MF); @@ -644,8 +696,21 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) { "unable to map instruction", MI); return false; } + + // It's possible the mapping changed control flow, and moved the following + // instruction to a new block, so figure out the new parent. + if (MII != End) { + MachineBasicBlock *NextInstBB = MII->getParent(); + if (NextInstBB != MBB) { + LLVM_DEBUG(dbgs() << "Instruction mapping changed control flow\n"); + MBB = NextInstBB; + MIRBuilder.setMBB(*MBB); + End = MBB->end(); + } + } } } + OptMode = SaveOptMode; return false; } @@ -692,7 +757,7 @@ RegBankSelect::RepairingPlacement::RepairingPlacement( MachineBasicBlock &Pred = *MI.getOperand(OpIdx + 1).getMBB(); // Check if we can move the insertion point prior to the // terminators of the predecessor. - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); MachineBasicBlock::iterator It = Pred.getLastNonDebugInstr(); for (auto Begin = Pred.begin(); It != Begin && It->isTerminator(); --It) if (It->modifiesRegister(Reg, &TRI)) { diff --git a/lib/CodeGen/GlobalISel/RegisterBank.cpp b/lib/CodeGen/GlobalISel/RegisterBank.cpp index 16f67a217ce1..4e41f338934d 100644 --- a/lib/CodeGen/GlobalISel/RegisterBank.cpp +++ b/lib/CodeGen/GlobalISel/RegisterBank.cpp @@ -1,9 +1,8 @@ //===- llvm/CodeGen/GlobalISel/RegisterBank.cpp - Register Bank --*- C++ -*-==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file diff --git a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp index 28404e52d6ea..159422e38878 100644 --- a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp +++ b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp @@ -1,9 +1,8 @@ //===- llvm/CodeGen/GlobalISel/RegisterBankInfo.cpp --------------*- C++ -*-==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file @@ -81,7 +80,7 @@ bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const { } const RegisterBank * -RegisterBankInfo::getRegBank(unsigned Reg, const MachineRegisterInfo &MRI, +RegisterBankInfo::getRegBank(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const { if (TargetRegisterInfo::isPhysicalRegister(Reg)) return &getRegBankFromRegClass(getMinimalPhysRegClass(Reg, TRI)); @@ -96,7 +95,7 @@ RegisterBankInfo::getRegBank(unsigned Reg, const MachineRegisterInfo &MRI, } const TargetRegisterClass & -RegisterBankInfo::getMinimalPhysRegClass(unsigned Reg, +RegisterBankInfo::getMinimalPhysRegClass(Register Reg, const TargetRegisterInfo &TRI) const { assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "Reg must be a physreg"); @@ -126,7 +125,7 @@ const RegisterBank *RegisterBankInfo::getRegBankFromConstraints( } const TargetRegisterClass *RegisterBankInfo::constrainGenericRegister( - unsigned Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI) { + Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI) { // If the register already has a class, fallback to MRI::constrainRegClass. auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); @@ -181,7 +180,7 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const { const MachineOperand &MO = MI.getOperand(OpIdx); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; // The register bank of Reg is just a side effect of the current @@ -208,19 +207,49 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const { continue; } } - const ValueMapping *ValMapping = - &getValueMapping(0, getSizeInBits(Reg, MRI, TRI), *CurRegBank); + + unsigned Size = getSizeInBits(Reg, MRI, TRI); + const ValueMapping *ValMapping = &getValueMapping(0, Size, *CurRegBank); if (IsCopyLike) { - OperandsMapping[0] = ValMapping; + if (!OperandsMapping[0]) { + if (MI.isRegSequence()) { + // For reg_sequence, the result size does not match the input. + unsigned ResultSize = getSizeInBits(MI.getOperand(0).getReg(), + MRI, TRI); + OperandsMapping[0] = &getValueMapping(0, ResultSize, *CurRegBank); + } else { + OperandsMapping[0] = ValMapping; + } + } + + // The default handling assumes any register bank can be copied to any + // other. If this isn't the case, the target should specially deal with + // reg_sequence/phi. There may also be unsatisfiable copies. + for (; OpIdx != EndIdx; ++OpIdx) { + const MachineOperand &MO = MI.getOperand(OpIdx); + if (!MO.isReg()) + continue; + Register Reg = MO.getReg(); + if (!Reg) + continue; + + const RegisterBank *AltRegBank = getRegBank(Reg, MRI, TRI); + if (AltRegBank && + cannotCopy(*CurRegBank, *AltRegBank, getSizeInBits(Reg, MRI, TRI))) + return getInvalidInstructionMapping(); + } + CompleteMapping = true; break; } + OperandsMapping[OpIdx] = ValMapping; } - if (IsCopyLike && !CompleteMapping) + if (IsCopyLike && !CompleteMapping) { // No way to deduce the type from what we have. return getInvalidInstructionMapping(); + } assert(CompleteMapping && "Setting an uncomplete mapping"); return getInstructionMapping( @@ -363,11 +392,8 @@ RegisterBankInfo::getInstructionMappingImpl( ++NumInstructionMappingsCreated; auto &InstrMapping = MapOfInstructionMappings[Hash]; - if (IsInvalid) - InstrMapping = llvm::make_unique<InstructionMapping>(); - else - InstrMapping = llvm::make_unique<InstructionMapping>( - ID, Cost, OperandsMapping, NumOperands); + InstrMapping = llvm::make_unique<InstructionMapping>( + ID, Cost, OperandsMapping, NumOperands); return *InstrMapping; } @@ -382,8 +408,12 @@ RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { RegisterBankInfo::InstructionMappings RegisterBankInfo::getInstrPossibleMappings(const MachineInstr &MI) const { InstructionMappings PossibleMappings; - // Put the default mapping first. - PossibleMappings.push_back(&getInstrMapping(MI)); + const auto &Mapping = getInstrMapping(MI); + if (Mapping.isValid()) { + // Put the default mapping first. + PossibleMappings.push_back(&Mapping); + } + // Then the alternative mapping, if any. InstructionMappings AltMappings = getInstrAlternativeMappings(MI); for (const InstructionMapping *AltMapping : AltMappings) @@ -424,14 +454,14 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) { assert(OpdMapper.getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns == 1 && "This mapping is too complex for this function"); - iterator_range<SmallVectorImpl<unsigned>::const_iterator> NewRegs = + iterator_range<SmallVectorImpl<Register>::const_iterator> NewRegs = OpdMapper.getVRegs(OpIdx); if (empty(NewRegs)) { LLVM_DEBUG(dbgs() << " has not been repaired, nothing to be done\n"); continue; } - unsigned OrigReg = MO.getReg(); - unsigned NewReg = *NewRegs.begin(); + Register OrigReg = MO.getReg(); + Register NewReg = *NewRegs.begin(); LLVM_DEBUG(dbgs() << " changed, replace " << printReg(OrigReg, nullptr)); MO.setReg(NewReg); LLVM_DEBUG(dbgs() << " with " << printReg(NewReg, nullptr)); @@ -456,7 +486,7 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) { } } -unsigned RegisterBankInfo::getSizeInBits(unsigned Reg, +unsigned RegisterBankInfo::getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const { if (TargetRegisterInfo::isPhysicalRegister(Reg)) { @@ -498,6 +528,19 @@ void RegisterBankInfo::PartialMapping::print(raw_ostream &OS) const { OS << "nullptr"; } +bool RegisterBankInfo::ValueMapping::partsAllUniform() const { + if (NumBreakDowns < 2) + return true; + + const PartialMapping *First = begin(); + for (const PartialMapping *Part = First + 1; Part != end(); ++Part) { + if (Part->Length != First->Length || Part->RegBank != First->RegBank) + return false; + } + + return true; +} + bool RegisterBankInfo::ValueMapping::verify(unsigned MeaningfulBitWidth) const { assert(NumBreakDowns && "Value mapped nowhere?!"); unsigned OrigValueBitWidth = 0; @@ -565,7 +608,7 @@ bool RegisterBankInfo::InstructionMapping::verify( "We should not care about non-reg mapping"); continue; } - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; assert(getOperandMapping(Idx).isValid() && @@ -610,7 +653,7 @@ RegisterBankInfo::OperandsMapper::OperandsMapper( assert(InstrMapping.verify(MI) && "Invalid mapping for MI"); } -iterator_range<SmallVectorImpl<unsigned>::iterator> +iterator_range<SmallVectorImpl<Register>::iterator> RegisterBankInfo::OperandsMapper::getVRegsMem(unsigned OpIdx) { assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access"); unsigned NumPartialVal = @@ -626,18 +669,18 @@ RegisterBankInfo::OperandsMapper::getVRegsMem(unsigned OpIdx) { for (unsigned i = 0; i < NumPartialVal; ++i) NewVRegs.push_back(0); } - SmallVectorImpl<unsigned>::iterator End = + SmallVectorImpl<Register>::iterator End = getNewVRegsEnd(StartIdx, NumPartialVal); return make_range(&NewVRegs[StartIdx], End); } -SmallVectorImpl<unsigned>::const_iterator +SmallVectorImpl<Register>::const_iterator RegisterBankInfo::OperandsMapper::getNewVRegsEnd(unsigned StartIdx, unsigned NumVal) const { return const_cast<OperandsMapper *>(this)->getNewVRegsEnd(StartIdx, NumVal); } -SmallVectorImpl<unsigned>::iterator +SmallVectorImpl<Register>::iterator RegisterBankInfo::OperandsMapper::getNewVRegsEnd(unsigned StartIdx, unsigned NumVal) { assert((NewVRegs.size() == StartIdx + NumVal || @@ -649,11 +692,11 @@ RegisterBankInfo::OperandsMapper::getNewVRegsEnd(unsigned StartIdx, void RegisterBankInfo::OperandsMapper::createVRegs(unsigned OpIdx) { assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access"); - iterator_range<SmallVectorImpl<unsigned>::iterator> NewVRegsForOpIdx = + iterator_range<SmallVectorImpl<Register>::iterator> NewVRegsForOpIdx = getVRegsMem(OpIdx); const ValueMapping &ValMapping = getInstrMapping().getOperandMapping(OpIdx); const PartialMapping *PartMap = ValMapping.begin(); - for (unsigned &NewVReg : NewVRegsForOpIdx) { + for (Register &NewVReg : NewVRegsForOpIdx) { assert(PartMap != ValMapping.end() && "Out-of-bound access"); assert(NewVReg == 0 && "Register has already been created"); // The new registers are always bound to scalar with the right size. @@ -669,7 +712,7 @@ void RegisterBankInfo::OperandsMapper::createVRegs(unsigned OpIdx) { void RegisterBankInfo::OperandsMapper::setVRegs(unsigned OpIdx, unsigned PartialMapIdx, - unsigned NewVReg) { + Register NewVReg) { assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access"); assert(getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns > PartialMapIdx && @@ -681,7 +724,7 @@ void RegisterBankInfo::OperandsMapper::setVRegs(unsigned OpIdx, NewVRegs[OpToNewVRegIdx[OpIdx] + PartialMapIdx] = NewVReg; } -iterator_range<SmallVectorImpl<unsigned>::const_iterator> +iterator_range<SmallVectorImpl<Register>::const_iterator> RegisterBankInfo::OperandsMapper::getVRegs(unsigned OpIdx, bool ForDebug) const { (void)ForDebug; @@ -693,12 +736,12 @@ RegisterBankInfo::OperandsMapper::getVRegs(unsigned OpIdx, unsigned PartMapSize = getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns; - SmallVectorImpl<unsigned>::const_iterator End = + SmallVectorImpl<Register>::const_iterator End = getNewVRegsEnd(StartIdx, PartMapSize); - iterator_range<SmallVectorImpl<unsigned>::const_iterator> Res = + iterator_range<SmallVectorImpl<Register>::const_iterator> Res = make_range(&NewVRegs[StartIdx], End); #ifndef NDEBUG - for (unsigned VReg : Res) + for (Register VReg : Res) assert((VReg || ForDebug) && "Some registers are uninitialized"); #endif return Res; @@ -747,7 +790,7 @@ void RegisterBankInfo::OperandsMapper::print(raw_ostream &OS, IsFirst = false; OS << '(' << printReg(getMI().getOperand(Idx).getReg(), TRI) << ", ["; bool IsFirstNewVReg = true; - for (unsigned VReg : getVRegs(Idx)) { + for (Register VReg : getVRegs(Idx)) { if (!IsFirstNewVReg) OS << ", "; IsFirstNewVReg = false; diff --git a/lib/CodeGen/GlobalISel/Utils.cpp b/lib/CodeGen/GlobalISel/Utils.cpp index 59cbf93e7cd1..766ea1d60bac 100644 --- a/lib/CodeGen/GlobalISel/Utils.cpp +++ b/lib/CodeGen/GlobalISel/Utils.cpp @@ -1,9 +1,8 @@ //===- llvm/CodeGen/GlobalISel/Utils.cpp -------------------------*- C++ -*-==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file This file implements the utility functions used by the GlobalISel @@ -30,16 +29,10 @@ using namespace llvm; unsigned llvm::constrainRegToClass(MachineRegisterInfo &MRI, const TargetInstrInfo &TII, - const RegisterBankInfo &RBI, - MachineInstr &InsertPt, unsigned Reg, + const RegisterBankInfo &RBI, unsigned Reg, const TargetRegisterClass &RegClass) { - if (!RBI.constrainGenericRegister(Reg, RegClass, MRI)) { - unsigned NewReg = MRI.createVirtualRegister(&RegClass); - BuildMI(*InsertPt.getParent(), InsertPt, InsertPt.getDebugLoc(), - TII.get(TargetOpcode::COPY), NewReg) - .addReg(Reg); - return NewReg; - } + if (!RBI.constrainGenericRegister(Reg, RegClass, MRI)) + return MRI.createVirtualRegister(&RegClass); return Reg; } @@ -47,6 +40,37 @@ unsigned llvm::constrainRegToClass(MachineRegisterInfo &MRI, unsigned llvm::constrainOperandRegClass( const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, + const RegisterBankInfo &RBI, MachineInstr &InsertPt, + const TargetRegisterClass &RegClass, const MachineOperand &RegMO, + unsigned OpIdx) { + unsigned Reg = RegMO.getReg(); + // Assume physical registers are properly constrained. + assert(TargetRegisterInfo::isVirtualRegister(Reg) && + "PhysReg not implemented"); + + unsigned ConstrainedReg = constrainRegToClass(MRI, TII, RBI, Reg, RegClass); + // If we created a new virtual register because the class is not compatible + // then create a copy between the new and the old register. + if (ConstrainedReg != Reg) { + MachineBasicBlock::iterator InsertIt(&InsertPt); + MachineBasicBlock &MBB = *InsertPt.getParent(); + if (RegMO.isUse()) { + BuildMI(MBB, InsertIt, InsertPt.getDebugLoc(), + TII.get(TargetOpcode::COPY), ConstrainedReg) + .addReg(Reg); + } else { + assert(RegMO.isDef() && "Must be a definition"); + BuildMI(MBB, std::next(InsertIt), InsertPt.getDebugLoc(), + TII.get(TargetOpcode::COPY), Reg) + .addReg(ConstrainedReg); + } + } + return ConstrainedReg; +} + +unsigned llvm::constrainOperandRegClass( + const MachineFunction &MF, const TargetRegisterInfo &TRI, + MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II, const MachineOperand &RegMO, unsigned OpIdx) { unsigned Reg = RegMO.getReg(); @@ -82,7 +106,8 @@ unsigned llvm::constrainOperandRegClass( // and they never reach this function. return Reg; } - return constrainRegToClass(MRI, TII, RBI, InsertPt, Reg, *RegClass); + return constrainOperandRegClass(MF, TRI, MRI, TII, RBI, InsertPt, *RegClass, + RegMO, OpIdx); } bool llvm::constrainSelectedInstRegOperands(MachineInstr &I, @@ -184,18 +209,71 @@ void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC, Optional<int64_t> llvm::getConstantVRegVal(unsigned VReg, const MachineRegisterInfo &MRI) { - MachineInstr *MI = MRI.getVRegDef(VReg); - if (MI->getOpcode() != TargetOpcode::G_CONSTANT) + Optional<ValueAndVReg> ValAndVReg = + getConstantVRegValWithLookThrough(VReg, MRI, /*LookThroughInstrs*/ false); + assert((!ValAndVReg || ValAndVReg->VReg == VReg) && + "Value found while looking through instrs"); + if (!ValAndVReg) + return None; + return ValAndVReg->Value; +} + +Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough( + unsigned VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs) { + SmallVector<std::pair<unsigned, unsigned>, 4> SeenOpcodes; + MachineInstr *MI; + while ((MI = MRI.getVRegDef(VReg)) && + MI->getOpcode() != TargetOpcode::G_CONSTANT && LookThroughInstrs) { + switch (MI->getOpcode()) { + case TargetOpcode::G_TRUNC: + case TargetOpcode::G_SEXT: + case TargetOpcode::G_ZEXT: + SeenOpcodes.push_back(std::make_pair( + MI->getOpcode(), + MRI.getType(MI->getOperand(0).getReg()).getSizeInBits())); + VReg = MI->getOperand(1).getReg(); + break; + case TargetOpcode::COPY: + VReg = MI->getOperand(1).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(VReg)) + return None; + break; + case TargetOpcode::G_INTTOPTR: + VReg = MI->getOperand(1).getReg(); + break; + default: + return None; + } + } + if (!MI || MI->getOpcode() != TargetOpcode::G_CONSTANT || + (!MI->getOperand(1).isImm() && !MI->getOperand(1).isCImm())) return None; - if (MI->getOperand(1).isImm()) - return MI->getOperand(1).getImm(); + const MachineOperand &CstVal = MI->getOperand(1); + unsigned BitWidth = MRI.getType(MI->getOperand(0).getReg()).getSizeInBits(); + APInt Val = CstVal.isImm() ? APInt(BitWidth, CstVal.getImm()) + : CstVal.getCImm()->getValue(); + assert(Val.getBitWidth() == BitWidth && + "Value bitwidth doesn't match definition type"); + while (!SeenOpcodes.empty()) { + std::pair<unsigned, unsigned> OpcodeAndSize = SeenOpcodes.pop_back_val(); + switch (OpcodeAndSize.first) { + case TargetOpcode::G_TRUNC: + Val = Val.trunc(OpcodeAndSize.second); + break; + case TargetOpcode::G_SEXT: + Val = Val.sext(OpcodeAndSize.second); + break; + case TargetOpcode::G_ZEXT: + Val = Val.zext(OpcodeAndSize.second); + break; + } + } - if (MI->getOperand(1).isCImm() && - MI->getOperand(1).getCImm()->getBitWidth() <= 64) - return MI->getOperand(1).getCImm()->getSExtValue(); + if (Val.getBitWidth() > 64) + return None; - return None; + return ValueAndVReg{Val.getSExtValue(), VReg}; } const llvm::ConstantFP* llvm::getConstantFPVRegVal(unsigned VReg, @@ -206,8 +284,8 @@ const llvm::ConstantFP* llvm::getConstantFPVRegVal(unsigned VReg, return MI->getOperand(1).getFPImm(); } -llvm::MachineInstr *llvm::getOpcodeDef(unsigned Opcode, unsigned Reg, - const MachineRegisterInfo &MRI) { +llvm::MachineInstr *llvm::getDefIgnoringCopies(Register Reg, + const MachineRegisterInfo &MRI) { auto *DefMI = MRI.getVRegDef(Reg); auto DstTy = MRI.getType(DefMI->getOperand(0).getReg()); if (!DstTy.isValid()) @@ -219,7 +297,13 @@ llvm::MachineInstr *llvm::getOpcodeDef(unsigned Opcode, unsigned Reg, break; DefMI = MRI.getVRegDef(SrcReg); } - return DefMI->getOpcode() == Opcode ? DefMI : nullptr; + return DefMI; +} + +llvm::MachineInstr *llvm::getOpcodeDef(unsigned Opcode, Register Reg, + const MachineRegisterInfo &MRI) { + MachineInstr *DefMI = getDefIgnoringCopies(Reg, MRI); + return DefMI && DefMI->getOpcode() == Opcode ? DefMI : nullptr; } APFloat llvm::getAPFloatFromSize(double Val, unsigned Size) { @@ -286,6 +370,31 @@ Optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode, const unsigned Op1, return None; } +bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, + bool SNaN) { + const MachineInstr *DefMI = MRI.getVRegDef(Val); + if (!DefMI) + return false; + + if (DefMI->getFlag(MachineInstr::FmNoNans)) + return true; + + if (SNaN) { + // FP operations quiet. For now, just handle the ones inserted during + // legalization. + switch (DefMI->getOpcode()) { + case TargetOpcode::G_FPEXT: + case TargetOpcode::G_FPTRUNC: + case TargetOpcode::G_FCANONICALIZE: + return true; + default: + return false; + } + } + + return false; +} + void llvm::getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU) { AU.addPreserved<StackProtector>(); } diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp index d3364952f244..09201c2e7bae 100644 --- a/lib/CodeGen/GlobalMerge.cpp +++ b/lib/CodeGen/GlobalMerge.cpp @@ -1,9 +1,8 @@ //===- GlobalMerge.cpp - Internal globals merging -------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -220,11 +219,11 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, Module &M, bool isConst, unsigned AddrSpace) const { auto &DL = M.getDataLayout(); // FIXME: Find better heuristics - std::stable_sort(Globals.begin(), Globals.end(), - [&DL](const GlobalVariable *GV1, const GlobalVariable *GV2) { - return DL.getTypeAllocSize(GV1->getValueType()) < - DL.getTypeAllocSize(GV2->getValueType()); - }); + llvm::stable_sort( + Globals, [&DL](const GlobalVariable *GV1, const GlobalVariable *GV2) { + return DL.getTypeAllocSize(GV1->getValueType()) < + DL.getTypeAllocSize(GV2->getValueType()); + }); // If we want to just blindly group all globals together, do so. if (!GlobalMergeGroupByUse) { @@ -331,7 +330,7 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, Function *ParentFn = I->getParent()->getParent(); // If we're only optimizing for size, ignore non-minsize functions. - if (OnlyOptimizeForSize && !ParentFn->optForMinSize()) + if (OnlyOptimizeForSize && !ParentFn->hasMinSize()) continue; size_t UGSIdx = GlobalUsesByFunction[ParentFn]; @@ -386,11 +385,11 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, // // Multiply that by the size of the set to give us a crude profitability // metric. - std::stable_sort(UsedGlobalSets.begin(), UsedGlobalSets.end(), - [](const UsedGlobalSet &UGS1, const UsedGlobalSet &UGS2) { - return UGS1.Globals.count() * UGS1.UsageCount < - UGS2.Globals.count() * UGS2.UsageCount; - }); + llvm::stable_sort(UsedGlobalSets, + [](const UsedGlobalSet &UGS1, const UsedGlobalSet &UGS2) { + return UGS1.Globals.count() * UGS1.UsageCount < + UGS2.Globals.count() * UGS2.UsageCount; + }); // We can choose to merge all globals together, but ignore globals never used // with another global. This catches the obviously non-profitable cases of diff --git a/lib/CodeGen/HardwareLoops.cpp b/lib/CodeGen/HardwareLoops.cpp new file mode 100644 index 000000000000..5f57cabbe865 --- /dev/null +++ b/lib/CodeGen/HardwareLoops.cpp @@ -0,0 +1,463 @@ +//===-- HardwareLoops.cpp - Target Independent Hardware Loops --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// Insert hardware loop intrinsics into loops which are deemed profitable by +/// the target, by querying TargetTransformInfo. A hardware loop comprises of +/// two intrinsics: one, outside the loop, to set the loop iteration count and +/// another, in the exit block, to decrement the counter. The decremented value +/// can either be carried through the loop via a phi or handled in some opaque +/// way by the target. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" +#include "llvm/PassSupport.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/LoopUtils.h" + +#define DEBUG_TYPE "hardware-loops" + +#define HW_LOOPS_NAME "Hardware Loop Insertion" + +using namespace llvm; + +static cl::opt<bool> +ForceHardwareLoops("force-hardware-loops", cl::Hidden, cl::init(false), + cl::desc("Force hardware loops intrinsics to be inserted")); + +static cl::opt<bool> +ForceHardwareLoopPHI( + "force-hardware-loop-phi", cl::Hidden, cl::init(false), + cl::desc("Force hardware loop counter to be updated through a phi")); + +static cl::opt<bool> +ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), + cl::desc("Force allowance of nested hardware loops")); + +static cl::opt<unsigned> +LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(1), + cl::desc("Set the loop decrement value")); + +static cl::opt<unsigned> +CounterBitWidth("hardware-loop-counter-bitwidth", cl::Hidden, cl::init(32), + cl::desc("Set the loop counter bitwidth")); + +static cl::opt<bool> +ForceGuardLoopEntry( + "force-hardware-loop-guard", cl::Hidden, cl::init(false), + cl::desc("Force generation of loop guard intrinsic")); + +STATISTIC(NumHWLoops, "Number of loops converted to hardware loops"); + +namespace { + + using TTI = TargetTransformInfo; + + class HardwareLoops : public FunctionPass { + public: + static char ID; + + HardwareLoops() : FunctionPass(ID) { + initializeHardwareLoopsPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<LoopInfoWrapperPass>(); + AU.addPreserved<LoopInfoWrapperPass>(); + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); + AU.addRequired<ScalarEvolutionWrapperPass>(); + AU.addRequired<AssumptionCacheTracker>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); + } + + // Try to convert the given Loop into a hardware loop. + bool TryConvertLoop(Loop *L); + + // Given that the target believes the loop to be profitable, try to + // convert it. + bool TryConvertLoop(HardwareLoopInfo &HWLoopInfo); + + private: + ScalarEvolution *SE = nullptr; + LoopInfo *LI = nullptr; + const DataLayout *DL = nullptr; + const TargetTransformInfo *TTI = nullptr; + DominatorTree *DT = nullptr; + bool PreserveLCSSA = false; + AssumptionCache *AC = nullptr; + TargetLibraryInfo *LibInfo = nullptr; + Module *M = nullptr; + bool MadeChange = false; + }; + + class HardwareLoop { + // Expand the trip count scev into a value that we can use. + Value *InitLoopCount(); + + // Insert the set_loop_iteration intrinsic. + void InsertIterationSetup(Value *LoopCountInit); + + // Insert the loop_decrement intrinsic. + void InsertLoopDec(); + + // Insert the loop_decrement_reg intrinsic. + Instruction *InsertLoopRegDec(Value *EltsRem); + + // If the target requires the counter value to be updated in the loop, + // insert a phi to hold the value. The intended purpose is for use by + // loop_decrement_reg. + PHINode *InsertPHICounter(Value *NumElts, Value *EltsRem); + + // Create a new cmp, that checks the returned value of loop_decrement*, + // and update the exit branch to use it. + void UpdateBranch(Value *EltsRem); + + public: + HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE, + const DataLayout &DL) : + SE(SE), DL(DL), L(Info.L), M(L->getHeader()->getModule()), + ExitCount(Info.ExitCount), + CountType(Info.CountType), + ExitBranch(Info.ExitBranch), + LoopDecrement(Info.LoopDecrement), + UsePHICounter(Info.CounterInReg), + UseLoopGuard(Info.PerformEntryTest) { } + + void Create(); + + private: + ScalarEvolution &SE; + const DataLayout &DL; + Loop *L = nullptr; + Module *M = nullptr; + const SCEV *ExitCount = nullptr; + Type *CountType = nullptr; + BranchInst *ExitBranch = nullptr; + Value *LoopDecrement = nullptr; + bool UsePHICounter = false; + bool UseLoopGuard = false; + BasicBlock *BeginBB = nullptr; + }; +} + +char HardwareLoops::ID = 0; + +bool HardwareLoops::runOnFunction(Function &F) { + if (skipFunction(F)) + return false; + + LLVM_DEBUG(dbgs() << "HWLoops: Running on " << F.getName() << "\n"); + + LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); + DL = &F.getParent()->getDataLayout(); + auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); + LibInfo = TLIP ? &TLIP->getTLI() : nullptr; + PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); + AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); + M = F.getParent(); + + for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) { + Loop *L = *I; + if (!L->getParentLoop()) + TryConvertLoop(L); + } + + return MadeChange; +} + +// Return true if the search should stop, which will be when an inner loop is +// converted and the parent loop doesn't support containing a hardware loop. +bool HardwareLoops::TryConvertLoop(Loop *L) { + // Process nested loops first. + for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) + if (TryConvertLoop(*I)) + return true; // Stop search. + + HardwareLoopInfo HWLoopInfo(L); + if (!HWLoopInfo.canAnalyze(*LI)) + return false; + + if (TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo) || + ForceHardwareLoops) { + + // Allow overriding of the counter width and loop decrement value. + if (CounterBitWidth.getNumOccurrences()) + HWLoopInfo.CountType = + IntegerType::get(M->getContext(), CounterBitWidth); + + if (LoopDecrement.getNumOccurrences()) + HWLoopInfo.LoopDecrement = + ConstantInt::get(HWLoopInfo.CountType, LoopDecrement); + + MadeChange |= TryConvertLoop(HWLoopInfo); + return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop); + } + + return false; +} + +bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) { + + Loop *L = HWLoopInfo.L; + LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L); + + if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT, ForceNestedLoop, + ForceHardwareLoopPHI)) + return false; + + assert( + (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) && + "Hardware Loop must have set exit info."); + + BasicBlock *Preheader = L->getLoopPreheader(); + + // If we don't have a preheader, then insert one. + if (!Preheader) + Preheader = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA); + if (!Preheader) + return false; + + HardwareLoop HWLoop(HWLoopInfo, *SE, *DL); + HWLoop.Create(); + ++NumHWLoops; + return true; +} + +void HardwareLoop::Create() { + LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n"); + + Value *LoopCountInit = InitLoopCount(); + if (!LoopCountInit) + return; + + InsertIterationSetup(LoopCountInit); + + if (UsePHICounter || ForceHardwareLoopPHI) { + Instruction *LoopDec = InsertLoopRegDec(LoopCountInit); + Value *EltsRem = InsertPHICounter(LoopCountInit, LoopDec); + LoopDec->setOperand(0, EltsRem); + UpdateBranch(LoopDec); + } else + InsertLoopDec(); + + // Run through the basic blocks of the loop and see if any of them have dead + // PHIs that can be removed. + for (auto I : L->blocks()) + DeleteDeadPHIs(I); +} + +static bool CanGenerateTest(Loop *L, Value *Count) { + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader->getSinglePredecessor()) + return false; + + BasicBlock *Pred = Preheader->getSinglePredecessor(); + if (!isa<BranchInst>(Pred->getTerminator())) + return false; + + auto *BI = cast<BranchInst>(Pred->getTerminator()); + if (BI->isUnconditional() || !isa<ICmpInst>(BI->getCondition())) + return false; + + // Check that the icmp is checking for equality of Count and zero and that + // a non-zero value results in entering the loop. + auto ICmp = cast<ICmpInst>(BI->getCondition()); + LLVM_DEBUG(dbgs() << " - Found condition: " << *ICmp << "\n"); + if (!ICmp->isEquality()) + return false; + + auto IsCompareZero = [](ICmpInst *ICmp, Value *Count, unsigned OpIdx) { + if (auto *Const = dyn_cast<ConstantInt>(ICmp->getOperand(OpIdx))) + return Const->isZero() && ICmp->getOperand(OpIdx ^ 1) == Count; + return false; + }; + + if (!IsCompareZero(ICmp, Count, 0) && !IsCompareZero(ICmp, Count, 1)) + return false; + + unsigned SuccIdx = ICmp->getPredicate() == ICmpInst::ICMP_NE ? 0 : 1; + if (BI->getSuccessor(SuccIdx) != Preheader) + return false; + + return true; +} + +Value *HardwareLoop::InitLoopCount() { + LLVM_DEBUG(dbgs() << "HWLoops: Initialising loop counter value:\n"); + // Can we replace a conditional branch with an intrinsic that sets the + // loop counter and tests that is not zero? + + SCEVExpander SCEVE(SE, DL, "loopcnt"); + if (!ExitCount->getType()->isPointerTy() && + ExitCount->getType() != CountType) + ExitCount = SE.getZeroExtendExpr(ExitCount, CountType); + + ExitCount = SE.getAddExpr(ExitCount, SE.getOne(CountType)); + + // If we're trying to use the 'test and set' form of the intrinsic, we need + // to replace a conditional branch that is controlling entry to the loop. It + // is likely (guaranteed?) that the preheader has an unconditional branch to + // the loop header, so also check if it has a single predecessor. + if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount, + SE.getZero(ExitCount->getType()))) { + LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n"); + UseLoopGuard |= ForceGuardLoopEntry; + } else + UseLoopGuard = false; + + BasicBlock *BB = L->getLoopPreheader(); + if (UseLoopGuard && BB->getSinglePredecessor() && + cast<BranchInst>(BB->getTerminator())->isUnconditional()) + BB = BB->getSinglePredecessor(); + + if (!isSafeToExpandAt(ExitCount, BB->getTerminator(), SE)) { + LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount " + << *ExitCount << "\n"); + return nullptr; + } + + Value *Count = SCEVE.expandCodeFor(ExitCount, CountType, + BB->getTerminator()); + + // FIXME: We've expanded Count where we hope to insert the counter setting + // intrinsic. But, in the case of the 'test and set' form, we may fallback to + // the just 'set' form and in which case the insertion block is most likely + // different. It means there will be instruction(s) in a block that possibly + // aren't needed. The isLoopEntryGuardedByCond is trying to avoid this issue, + // but it's doesn't appear to work in all cases. + + UseLoopGuard = UseLoopGuard && CanGenerateTest(L, Count); + BeginBB = UseLoopGuard ? BB : L->getLoopPreheader(); + LLVM_DEBUG(dbgs() << " - Loop Count: " << *Count << "\n" + << " - Expanded Count in " << BB->getName() << "\n" + << " - Will insert set counter intrinsic into: " + << BeginBB->getName() << "\n"); + return Count; +} + +void HardwareLoop::InsertIterationSetup(Value *LoopCountInit) { + IRBuilder<> Builder(BeginBB->getTerminator()); + Type *Ty = LoopCountInit->getType(); + Intrinsic::ID ID = UseLoopGuard ? + Intrinsic::test_set_loop_iterations : Intrinsic::set_loop_iterations; + Function *LoopIter = Intrinsic::getDeclaration(M, ID, Ty); + Value *SetCount = Builder.CreateCall(LoopIter, LoopCountInit); + + // Use the return value of the intrinsic to control the entry of the loop. + if (UseLoopGuard) { + assert((isa<BranchInst>(BeginBB->getTerminator()) && + cast<BranchInst>(BeginBB->getTerminator())->isConditional()) && + "Expected conditional branch"); + auto *LoopGuard = cast<BranchInst>(BeginBB->getTerminator()); + LoopGuard->setCondition(SetCount); + if (LoopGuard->getSuccessor(0) != L->getLoopPreheader()) + LoopGuard->swapSuccessors(); + } + LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop counter: " + << *SetCount << "\n"); +} + +void HardwareLoop::InsertLoopDec() { + IRBuilder<> CondBuilder(ExitBranch); + + Function *DecFunc = + Intrinsic::getDeclaration(M, Intrinsic::loop_decrement, + LoopDecrement->getType()); + Value *Ops[] = { LoopDecrement }; + Value *NewCond = CondBuilder.CreateCall(DecFunc, Ops); + Value *OldCond = ExitBranch->getCondition(); + ExitBranch->setCondition(NewCond); + + // The false branch must exit the loop. + if (!L->contains(ExitBranch->getSuccessor(0))) + ExitBranch->swapSuccessors(); + + // The old condition may be dead now, and may have even created a dead PHI + // (the original induction variable). + RecursivelyDeleteTriviallyDeadInstructions(OldCond); + + LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *NewCond << "\n"); +} + +Instruction* HardwareLoop::InsertLoopRegDec(Value *EltsRem) { + IRBuilder<> CondBuilder(ExitBranch); + + Function *DecFunc = + Intrinsic::getDeclaration(M, Intrinsic::loop_decrement_reg, + { EltsRem->getType(), EltsRem->getType(), + LoopDecrement->getType() + }); + Value *Ops[] = { EltsRem, LoopDecrement }; + Value *Call = CondBuilder.CreateCall(DecFunc, Ops); + + LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *Call << "\n"); + return cast<Instruction>(Call); +} + +PHINode* HardwareLoop::InsertPHICounter(Value *NumElts, Value *EltsRem) { + BasicBlock *Preheader = L->getLoopPreheader(); + BasicBlock *Header = L->getHeader(); + BasicBlock *Latch = ExitBranch->getParent(); + IRBuilder<> Builder(Header->getFirstNonPHI()); + PHINode *Index = Builder.CreatePHI(NumElts->getType(), 2); + Index->addIncoming(NumElts, Preheader); + Index->addIncoming(EltsRem, Latch); + LLVM_DEBUG(dbgs() << "HWLoops: PHI Counter: " << *Index << "\n"); + return Index; +} + +void HardwareLoop::UpdateBranch(Value *EltsRem) { + IRBuilder<> CondBuilder(ExitBranch); + Value *NewCond = + CondBuilder.CreateICmpNE(EltsRem, ConstantInt::get(EltsRem->getType(), 0)); + Value *OldCond = ExitBranch->getCondition(); + ExitBranch->setCondition(NewCond); + + // The false branch must exit the loop. + if (!L->contains(ExitBranch->getSuccessor(0))) + ExitBranch->swapSuccessors(); + + // The old condition may be dead now, and may have even created a dead PHI + // (the original induction variable). + RecursivelyDeleteTriviallyDeadInstructions(OldCond); +} + +INITIALIZE_PASS_BEGIN(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) +INITIALIZE_PASS_END(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false) + +FunctionPass *llvm::createHardwareLoopsPass() { return new HardwareLoops(); } diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index ceeba639ee09..b17a253fe23f 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -1,9 +1,8 @@ //===- IfConversion.cpp - Machine code if conversion pass -----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -1317,7 +1316,7 @@ void IfConverter::AnalyzeBlocks( AnalyzeBlock(MBB, Tokens); // Sort to favor more complex ifcvt scheme. - std::stable_sort(Tokens.begin(), Tokens.end(), IfcvtTokenCmp); + llvm::stable_sort(Tokens, IfcvtTokenCmp); } /// Returns true either if ToMBB is the next block after MBB or that all the diff --git a/lib/CodeGen/ImplicitNullChecks.cpp b/lib/CodeGen/ImplicitNullChecks.cpp index f411ee6745d0..1e82ea659617 100644 --- a/lib/CodeGen/ImplicitNullChecks.cpp +++ b/lib/CodeGen/ImplicitNullChecks.cpp @@ -1,9 +1,8 @@ //===- ImplicitNullChecks.cpp - Fold null checks into memory accesses -----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -181,7 +180,8 @@ class ImplicitNullChecks : public MachineFunctionPass { /// Returns AR_NoAlias if \p MI memory operation does not alias with /// \p PrevMI, AR_MayAlias if they may alias and AR_WillAliasEverything if /// they may alias and any further memory operation may alias with \p PrevMI. - AliasResult areMemoryOpsAliased(MachineInstr &MI, MachineInstr *PrevMI); + AliasResult areMemoryOpsAliased(const MachineInstr &MI, + const MachineInstr *PrevMI) const; enum SuitabilityResult { SR_Suitable, @@ -195,7 +195,8 @@ class ImplicitNullChecks : public MachineFunctionPass { /// no sense to continue lookup due to any other instruction will not be able /// to be used. \p PrevInsts is the set of instruction seen since /// the explicit null check on \p PointerReg. - SuitabilityResult isSuitableMemoryOp(MachineInstr &MI, unsigned PointerReg, + SuitabilityResult isSuitableMemoryOp(const MachineInstr &MI, + unsigned PointerReg, ArrayRef<MachineInstr *> PrevInsts); /// Return true if \p FaultingMI can be hoisted from after the @@ -228,7 +229,8 @@ public: } // end anonymous namespace bool ImplicitNullChecks::canHandle(const MachineInstr *MI) { - if (MI->isCall() || MI->hasUnmodeledSideEffects()) + if (MI->isCall() || MI->mayRaiseFPException() || + MI->hasUnmodeledSideEffects()) return false; auto IsRegMask = [](const MachineOperand &MO) { return MO.isRegMask(); }; (void)IsRegMask; @@ -319,8 +321,8 @@ static bool AnyAliasLiveIn(const TargetRegisterInfo *TRI, } ImplicitNullChecks::AliasResult -ImplicitNullChecks::areMemoryOpsAliased(MachineInstr &MI, - MachineInstr *PrevMI) { +ImplicitNullChecks::areMemoryOpsAliased(const MachineInstr &MI, + const MachineInstr *PrevMI) const { // If it is not memory access, skip the check. if (!(PrevMI->mayStore() || PrevMI->mayLoad())) return AR_NoAlias; @@ -357,10 +359,11 @@ ImplicitNullChecks::areMemoryOpsAliased(MachineInstr &MI, } ImplicitNullChecks::SuitabilityResult -ImplicitNullChecks::isSuitableMemoryOp(MachineInstr &MI, unsigned PointerReg, +ImplicitNullChecks::isSuitableMemoryOp(const MachineInstr &MI, + unsigned PointerReg, ArrayRef<MachineInstr *> PrevInsts) { int64_t Offset; - MachineOperand *BaseOp; + const MachineOperand *BaseOp; if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI) || !BaseOp->isReg() || BaseOp->getReg() != PointerReg) diff --git a/lib/CodeGen/IndirectBrExpandPass.cpp b/lib/CodeGen/IndirectBrExpandPass.cpp index 7b05ebf820fd..7ac093ba4a71 100644 --- a/lib/CodeGen/IndirectBrExpandPass.cpp +++ b/lib/CodeGen/IndirectBrExpandPass.cpp @@ -1,9 +1,8 @@ //===- IndirectBrExpandPass.cpp - Expand indirectbr to switch -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file @@ -149,11 +148,9 @@ bool IndirectBrExpandPass::runOnFunction(Function &F) { ConstantInt *BBIndexC = ConstantInt::get(ITy, BBIndex); // Now rewrite the blockaddress to an integer constant based on the index. - // FIXME: We could potentially preserve the uses as arguments to inline asm. - // This would allow some uses such as diagnostic information in crashes to - // have higher quality even when this transform is enabled, but would break - // users that round-trip blockaddresses through inline assembly and then - // back into an indirectbr. + // FIXME: This part doesn't properly recognize other uses of blockaddress + // expressions, for instance, where they are used to pass labels to + // asm-goto. This part of the pass needs a rework. BA->replaceAllUsesWith(ConstantExpr::getIntToPtr(BBIndexC, BA->getType())); } diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index 007e9283d833..41ae8061a917 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -1,9 +1,8 @@ //===- InlineSpiller.cpp - Insert spills and restores inline --------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -76,6 +75,10 @@ STATISTIC(NumRemats, "Number of rematerialized defs for spilling"); static cl::opt<bool> DisableHoisting("disable-spill-hoist", cl::Hidden, cl::desc("Disable inline spill hoisting")); +static cl::opt<bool> +RestrictStatepointRemat("restrict-statepoint-remat", + cl::init(false), cl::Hidden, + cl::desc("Restrict remat for statepoint operands")); namespace { @@ -215,6 +218,7 @@ private: void eliminateRedundantSpills(LiveInterval &LI, VNInfo *VNI); void markValueUsed(LiveInterval*, VNInfo*); + bool canGuaranteeAssignmentAfterRemat(unsigned VReg, MachineInstr &MI); bool reMaterializeFor(LiveInterval &, MachineInstr &MI); void reMaterializeAll(); @@ -514,6 +518,28 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) { } while (!WorkList.empty()); } +bool InlineSpiller::canGuaranteeAssignmentAfterRemat(unsigned VReg, + MachineInstr &MI) { + if (!RestrictStatepointRemat) + return true; + // Here's a quick explanation of the problem we're trying to handle here: + // * There are some pseudo instructions with more vreg uses than there are + // physical registers on the machine. + // * This is normally handled by spilling the vreg, and folding the reload + // into the user instruction. (Thus decreasing the number of used vregs + // until the remainder can be assigned to physregs.) + // * However, since we may try to spill vregs in any order, we can end up + // trying to spill each operand to the instruction, and then rematting it + // instead. When that happens, the new live intervals (for the remats) are + // expected to be trivially assignable (i.e. RS_Done). However, since we + // may have more remats than physregs, we're guaranteed to fail to assign + // one. + // At the moment, we only handle this for STATEPOINTs since they're the only + // psuedo op where we've seen this. If we start seeing other instructions + // with the same problem, we need to revisit this. + return (MI.getOpcode() != TargetOpcode::STATEPOINT); +} + /// reMaterializeFor - Attempt to rematerialize before MI instead of reloading. bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) { // Analyze instruction @@ -569,6 +595,14 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) { return true; } + // If we can't guarantee that we'll be able to actually assign the new vreg, + // we can't remat. + if (!canGuaranteeAssignmentAfterRemat(VirtReg.reg, MI)) { + markValueUsed(&VirtReg, ParentVNI); + LLVM_DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << MI); + return false; + } + // Allocate a new register for the remat. unsigned NewVReg = Edit->createFrom(Original); @@ -799,11 +833,11 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops, if (FoldOps.empty()) return false; - MachineInstrSpan MIS(MI); + MachineInstrSpan MIS(MI, MI->getParent()); MachineInstr *FoldMI = LoadMI ? TII.foldMemoryOperand(*MI, FoldOps, *LoadMI, &LIS) - : TII.foldMemoryOperand(*MI, FoldOps, StackSlot, &LIS); + : TII.foldMemoryOperand(*MI, FoldOps, StackSlot, &LIS, &VRM); if (!FoldMI) return false; @@ -834,6 +868,8 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops, HSpiller.rmFromMergeableSpills(*MI, FI)) --NumSpills; LIS.ReplaceMachineInstrInMaps(*MI, *FoldMI); + if (MI->isCall()) + MI->getMF()->updateCallSiteInfo(MI, FoldMI); MI->eraseFromParent(); // Insert any new instructions other than FoldMI into the LIS maps. @@ -871,7 +907,7 @@ void InlineSpiller::insertReload(unsigned NewVReg, MachineBasicBlock::iterator MI) { MachineBasicBlock &MBB = *MI->getParent(); - MachineInstrSpan MIS(MI); + MachineInstrSpan MIS(MI, &MBB); TII.loadRegFromStackSlot(MBB, MI, NewVReg, StackSlot, MRI.getRegClass(NewVReg), &TRI); @@ -901,7 +937,7 @@ void InlineSpiller::insertSpill(unsigned NewVReg, bool isKill, MachineBasicBlock::iterator MI) { MachineBasicBlock &MBB = *MI->getParent(); - MachineInstrSpan MIS(MI); + MachineInstrSpan MIS(MI, &MBB); bool IsRealSpill = true; if (isFullUndefDef(*MI)) { // Don't spill undef value. diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp index 82f6e8d8e234..7b50dac4cd1a 100644 --- a/lib/CodeGen/InterferenceCache.cpp +++ b/lib/CodeGen/InterferenceCache.cpp @@ -1,9 +1,8 @@ //===- InterferenceCache.cpp - Caching per-block interference -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h index 160e2b16e294..50c6ac62d194 100644 --- a/lib/CodeGen/InterferenceCache.h +++ b/lib/CodeGen/InterferenceCache.h @@ -1,9 +1,8 @@ //===- InterferenceCache.h - Caching per-block interference ----*- C++ -*--===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/InterleavedAccessPass.cpp b/lib/CodeGen/InterleavedAccessPass.cpp index fd2ff162630a..14bc560a561c 100644 --- a/lib/CodeGen/InterleavedAccessPass.cpp +++ b/lib/CodeGen/InterleavedAccessPass.cpp @@ -1,9 +1,8 @@ //===- InterleavedAccessPass.cpp ------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -164,14 +163,19 @@ static bool isDeInterleaveMaskOfFactor(ArrayRef<int> Mask, unsigned Factor, /// <0, 2, 4, 6> (mask of index 0 to extract even elements) /// <1, 3, 5, 7> (mask of index 1 to extract odd elements) static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor, - unsigned &Index, unsigned MaxFactor) { + unsigned &Index, unsigned MaxFactor, + unsigned NumLoadElements) { if (Mask.size() < 2) return false; // Check potential Factors. - for (Factor = 2; Factor <= MaxFactor; Factor++) + for (Factor = 2; Factor <= MaxFactor; Factor++) { + // Make sure we don't produce a load wider than the input load. + if (Mask.size() * Factor > NumLoadElements) + return false; if (isDeInterleaveMaskOfFactor(Mask, Factor, Index)) return true; + } return false; } @@ -303,9 +307,10 @@ bool InterleavedAccess::lowerInterleavedLoad( unsigned Factor, Index; + unsigned NumLoadElements = LI->getType()->getVectorNumElements(); // Check if the first shufflevector is DE-interleave shuffle. if (!isDeInterleaveMask(Shuffles[0]->getShuffleMask(), Factor, Index, - MaxFactor)) + MaxFactor, NumLoadElements)) return false; // Holds the corresponding index for each DE-interleave shuffle. diff --git a/lib/CodeGen/InterleavedLoadCombinePass.cpp b/lib/CodeGen/InterleavedLoadCombinePass.cpp index 989fa164ad2d..9525da849e2a 100644 --- a/lib/CodeGen/InterleavedLoadCombinePass.cpp +++ b/lib/CodeGen/InterleavedLoadCombinePass.cpp @@ -1,9 +1,8 @@ //===- InterleavedLoadCombine.cpp - Combine Interleaved Loads ---*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -961,6 +960,7 @@ public: if (!PtrTy) { Result = Polynomial(); BasePtr = nullptr; + return; } unsigned PointerBits = DL.getIndexSizeInBits(PtrTy->getPointerAddressSpace()); @@ -1219,7 +1219,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad, "interleaved.wide.ptrcast"); // Create the wide load and update the MemorySSA. - auto LI = Builder.CreateAlignedLoad(CI, InsertionPoint->getAlignment(), + auto LI = Builder.CreateAlignedLoad(ILTy, CI, InsertionPoint->getAlignment(), "interleaved.wide.load"); auto MSSAU = MemorySSAUpdater(&MSSA); MemoryUse *MSSALoad = cast<MemoryUse>(MSSAU.createMemoryAccessBefore( diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index 707113bd973b..8cbd8bcaeabb 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -1,9 +1,8 @@ //===-- IntrinsicLowering.cpp - Intrinsic Lowering default implementation -===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -24,39 +23,6 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; -template <class ArgIt> -static void EnsureFunctionExists(Module &M, const char *Name, - ArgIt ArgBegin, ArgIt ArgEnd, - Type *RetTy) { - // Insert a correctly-typed definition now. - std::vector<Type *> ParamTys; - for (ArgIt I = ArgBegin; I != ArgEnd; ++I) - ParamTys.push_back(I->getType()); - M.getOrInsertFunction(Name, FunctionType::get(RetTy, ParamTys, false)); -} - -static void EnsureFPIntrinsicsExist(Module &M, Function &Fn, - const char *FName, - const char *DName, const char *LDName) { - // Insert definitions for all the floating point types. - switch((int)Fn.arg_begin()->getType()->getTypeID()) { - case Type::FloatTyID: - EnsureFunctionExists(M, FName, Fn.arg_begin(), Fn.arg_end(), - Type::getFloatTy(M.getContext())); - break; - case Type::DoubleTyID: - EnsureFunctionExists(M, DName, Fn.arg_begin(), Fn.arg_end(), - Type::getDoubleTy(M.getContext())); - break; - case Type::X86_FP80TyID: - case Type::FP128TyID: - case Type::PPC_FP128TyID: - EnsureFunctionExists(M, LDName, Fn.arg_begin(), Fn.arg_end(), - Fn.arg_begin()->getType()); - break; - } -} - /// This function is used when we want to lower an intrinsic call to a call of /// an external function. This handles hard cases such as when there was already /// a prototype for the external function, but that prototype doesn't match the @@ -72,8 +38,8 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI, std::vector<Type *> ParamTys; for (ArgIt I = ArgBegin; I != ArgEnd; ++I) ParamTys.push_back((*I)->getType()); - Constant* FCache = M->getOrInsertFunction(NewFn, - FunctionType::get(RetTy, ParamTys, false)); + FunctionCallee FCache = + M->getOrInsertFunction(NewFn, FunctionType::get(RetTy, ParamTys, false)); IRBuilder<> Builder(CI->getParent(), CI->getIterator()); SmallVector<Value *, 8> Args(ArgBegin, ArgEnd); @@ -92,75 +58,6 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI, # define setjmp_undefined_for_msvc #endif -void IntrinsicLowering::AddPrototypes(Module &M) { - LLVMContext &Context = M.getContext(); - for (auto &F : M) - if (F.isDeclaration() && !F.use_empty()) - switch (F.getIntrinsicID()) { - default: break; - case Intrinsic::setjmp: - EnsureFunctionExists(M, "setjmp", F.arg_begin(), F.arg_end(), - Type::getInt32Ty(M.getContext())); - break; - case Intrinsic::longjmp: - EnsureFunctionExists(M, "longjmp", F.arg_begin(), F.arg_end(), - Type::getVoidTy(M.getContext())); - break; - case Intrinsic::siglongjmp: - EnsureFunctionExists(M, "abort", F.arg_end(), F.arg_end(), - Type::getVoidTy(M.getContext())); - break; - case Intrinsic::memcpy: - M.getOrInsertFunction("memcpy", - Type::getInt8PtrTy(Context), - Type::getInt8PtrTy(Context), - Type::getInt8PtrTy(Context), - DL.getIntPtrType(Context)); - break; - case Intrinsic::memmove: - M.getOrInsertFunction("memmove", - Type::getInt8PtrTy(Context), - Type::getInt8PtrTy(Context), - Type::getInt8PtrTy(Context), - DL.getIntPtrType(Context)); - break; - case Intrinsic::memset: - M.getOrInsertFunction("memset", - Type::getInt8PtrTy(Context), - Type::getInt8PtrTy(Context), - Type::getInt32Ty(M.getContext()), - DL.getIntPtrType(Context)); - break; - case Intrinsic::sqrt: - EnsureFPIntrinsicsExist(M, F, "sqrtf", "sqrt", "sqrtl"); - break; - case Intrinsic::sin: - EnsureFPIntrinsicsExist(M, F, "sinf", "sin", "sinl"); - break; - case Intrinsic::cos: - EnsureFPIntrinsicsExist(M, F, "cosf", "cos", "cosl"); - break; - case Intrinsic::pow: - EnsureFPIntrinsicsExist(M, F, "powf", "pow", "powl"); - break; - case Intrinsic::log: - EnsureFPIntrinsicsExist(M, F, "logf", "log", "logl"); - break; - case Intrinsic::log2: - EnsureFPIntrinsicsExist(M, F, "log2f", "log2", "log2l"); - break; - case Intrinsic::log10: - EnsureFPIntrinsicsExist(M, F, "log10f", "log10", "log10l"); - break; - case Intrinsic::exp: - EnsureFPIntrinsicsExist(M, F, "expf", "exp", "expl"); - break; - case Intrinsic::exp2: - EnsureFPIntrinsicsExist(M, F, "exp2f", "exp2", "exp2l"); - break; - } -} - /// Emit the code to lower bswap of V before the specified instruction IP. static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) { assert(V->getType()->isIntOrIntVectorTy() && "Can't bswap a non-integer type!"); @@ -601,7 +498,7 @@ bool IntrinsicLowering::LowerToByteSwap(CallInst *CI) { // Okay, we can do this xform, do so now. Module *M = CI->getModule(); - Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Ty); + Function *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Ty); Value *Op = CI->getArgOperand(0); Op = CallInst::Create(Int, Op, CI->getName(), CI); diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 52e832cc38c1..886ae7e94adb 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -1,9 +1,8 @@ //===-- LLVMTargetMachine.cpp - Implement the LLVMTargetMachine class -----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -202,6 +201,15 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, return true; if (!TargetPassConfig::willCompleteCodeGenPipeline()) { + if (this->getTargetTriple().isOSAIX()) { + // On AIX, we might manifest MCSymbols during SDAG lowering. For MIR + // testing to be meaningful, we need to ensure that the symbols created + // are MCSymbolXCOFF variants, which requires that + // the TargetLoweringObjectFile instance has been initialized. + MCContext &Ctx = MMI->getContext(); + const_cast<TargetLoweringObjectFile &>(*this->getObjFileLowering()) + .Initialize(Ctx, *this); + } PM.add(createPrintMIRPass(Out)); } else if (addAsmPrinter(PM, Out, DwoOut, FileType, MMI->getContext())) return true; diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp index f9f33a98a9d1..8a7a41d0f763 100644 --- a/lib/CodeGen/LatencyPriorityQueue.cpp +++ b/lib/CodeGen/LatencyPriorityQueue.cpp @@ -1,9 +1,8 @@ //===---- LatencyPriorityQueue.cpp - A latency-oriented priority queue ----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp b/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp index 5b52cc66a297..200ac0ba15bf 100644 --- a/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp +++ b/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp @@ -1,9 +1,8 @@ ///===- LazyMachineBlockFrequencyInfo.cpp - Lazy Machine Block Frequency --===// /// -/// The LLVM Compiler Infrastructure -/// -/// This file is distributed under the University of Illinois Open Source -/// License. See LICENSE.TXT for details. +/// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +/// See https://llvm.org/LICENSE.txt for license information. +/// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception /// ///===---------------------------------------------------------------------===// /// \file diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp index d06821bdfcce..503821537ed9 100644 --- a/lib/CodeGen/LexicalScopes.cpp +++ b/lib/CodeGen/LexicalScopes.cpp @@ -1,9 +1,8 @@ //===- LexicalScopes.cpp - Collecting lexical scope info ------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/LiveDebugValues.cpp b/lib/CodeGen/LiveDebugValues.cpp index fc0ebea2d36c..a669e64692b9 100644 --- a/lib/CodeGen/LiveDebugValues.cpp +++ b/lib/CodeGen/LiveDebugValues.cpp @@ -1,9 +1,8 @@ //===- LiveDebugValues.cpp - Tracking Debug Value MIs ---------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// @@ -21,6 +20,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/Statistic.h" @@ -35,13 +35,15 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/Config/llvm-config.h" +#include "llvm/IR/DIBuilder.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" @@ -57,6 +59,7 @@ #include <cstdint> #include <functional> #include <queue> +#include <tuple> #include <utility> #include <vector> @@ -68,12 +71,12 @@ STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted"); // If @MI is a DBG_VALUE with debug value described by a defined // register, returns the number of this register. In the other case, returns 0. -static unsigned isDbgValueDescribedByReg(const MachineInstr &MI) { +static Register isDbgValueDescribedByReg(const MachineInstr &MI) { assert(MI.isDebugValue() && "expected a DBG_VALUE"); assert(MI.getNumOperands() == 4 && "malformed DBG_VALUE"); // If location of variable is described using a register (directly // or indirectly), this register is always a first operand. - return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0; + return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : Register(); } namespace { @@ -86,6 +89,8 @@ private: BitVector CalleeSavedRegs; LexicalScopes LS; + enum struct TransferKind { TransferCopy, TransferSpill, TransferRestore }; + /// Keeps track of lexical scopes associated with a user value's source /// location. class UserValueScopes { @@ -105,51 +110,134 @@ private: } }; - /// Based on std::pair so it can be used as an index into a DenseMap. - using DebugVariableBase = - std::pair<const DILocalVariable *, const DILocation *>; - /// A potentially inlined instance of a variable. - struct DebugVariable : public DebugVariableBase { - DebugVariable(const DILocalVariable *Var, const DILocation *InlinedAt) - : DebugVariableBase(Var, InlinedAt) {} - - const DILocalVariable *getVar() const { return this->first; } - const DILocation *getInlinedAt() const { return this->second; } - - bool operator<(const DebugVariable &DV) const { - if (getVar() == DV.getVar()) - return getInlinedAt() < DV.getInlinedAt(); - return getVar() < DV.getVar(); + using FragmentInfo = DIExpression::FragmentInfo; + using OptFragmentInfo = Optional<DIExpression::FragmentInfo>; + + /// Storage for identifying a potentially inlined instance of a variable, + /// or a fragment thereof. + class DebugVariable { + const DILocalVariable *Variable; + OptFragmentInfo Fragment; + const DILocation *InlinedAt; + + /// Fragment that will overlap all other fragments. Used as default when + /// caller demands a fragment. + static const FragmentInfo DefaultFragment; + + public: + DebugVariable(const DILocalVariable *Var, OptFragmentInfo &&FragmentInfo, + const DILocation *InlinedAt) + : Variable(Var), Fragment(FragmentInfo), InlinedAt(InlinedAt) {} + + DebugVariable(const DILocalVariable *Var, OptFragmentInfo &FragmentInfo, + const DILocation *InlinedAt) + : Variable(Var), Fragment(FragmentInfo), InlinedAt(InlinedAt) {} + + DebugVariable(const DILocalVariable *Var, const DIExpression *DIExpr, + const DILocation *InlinedAt) + : DebugVariable(Var, DIExpr->getFragmentInfo(), InlinedAt) {} + + DebugVariable(const MachineInstr &MI) + : DebugVariable(MI.getDebugVariable(), + MI.getDebugExpression()->getFragmentInfo(), + MI.getDebugLoc()->getInlinedAt()) {} + + const DILocalVariable *getVar() const { return Variable; } + const OptFragmentInfo &getFragment() const { return Fragment; } + const DILocation *getInlinedAt() const { return InlinedAt; } + + const FragmentInfo getFragmentDefault() const { + return Fragment.getValueOr(DefaultFragment); + } + + static bool isFragmentDefault(FragmentInfo &F) { + return F == DefaultFragment; + } + + bool operator==(const DebugVariable &Other) const { + return std::tie(Variable, Fragment, InlinedAt) == + std::tie(Other.Variable, Other.Fragment, Other.InlinedAt); + } + + bool operator<(const DebugVariable &Other) const { + return std::tie(Variable, Fragment, InlinedAt) < + std::tie(Other.Variable, Other.Fragment, Other.InlinedAt); } }; + friend struct llvm::DenseMapInfo<DebugVariable>; + /// A pair of debug variable and value location. struct VarLoc { + // The location at which a spilled variable resides. It consists of a + // register and an offset. + struct SpillLoc { + unsigned SpillBase; + int SpillOffset; + bool operator==(const SpillLoc &Other) const { + return SpillBase == Other.SpillBase && SpillOffset == Other.SpillOffset; + } + }; + const DebugVariable Var; const MachineInstr &MI; ///< Only used for cloning a new DBG_VALUE. mutable UserValueScopes UVS; - enum { InvalidKind = 0, RegisterKind } Kind = InvalidKind; + enum VarLocKind { + InvalidKind = 0, + RegisterKind, + SpillLocKind, + ImmediateKind, + EntryValueKind + } Kind = InvalidKind; /// The value location. Stored separately to avoid repeatedly /// extracting it from MI. union { uint64_t RegNo; + SpillLoc SpillLocation; uint64_t Hash; + int64_t Immediate; + const ConstantFP *FPImm; + const ConstantInt *CImm; } Loc; - VarLoc(const MachineInstr &MI, LexicalScopes &LS) - : Var(MI.getDebugVariable(), MI.getDebugLoc()->getInlinedAt()), MI(MI), - UVS(MI.getDebugLoc(), LS) { + VarLoc(const MachineInstr &MI, LexicalScopes &LS, + VarLocKind K = InvalidKind) + : Var(MI), MI(MI), UVS(MI.getDebugLoc(), LS){ static_assert((sizeof(Loc) == sizeof(uint64_t)), "hash does not cover all members of Loc"); assert(MI.isDebugValue() && "not a DBG_VALUE"); assert(MI.getNumOperands() == 4 && "malformed DBG_VALUE"); if (int RegNo = isDbgValueDescribedByReg(MI)) { - Kind = RegisterKind; + Kind = MI.isDebugEntryValue() ? EntryValueKind : RegisterKind; Loc.RegNo = RegNo; + } else if (MI.getOperand(0).isImm()) { + Kind = ImmediateKind; + Loc.Immediate = MI.getOperand(0).getImm(); + } else if (MI.getOperand(0).isFPImm()) { + Kind = ImmediateKind; + Loc.FPImm = MI.getOperand(0).getFPImm(); + } else if (MI.getOperand(0).isCImm()) { + Kind = ImmediateKind; + Loc.CImm = MI.getOperand(0).getCImm(); } + assert((Kind != ImmediateKind || !MI.isDebugEntryValue()) && + "entry values must be register locations"); + } + + /// The constructor for spill locations. + VarLoc(const MachineInstr &MI, unsigned SpillBase, int SpillOffset, + LexicalScopes &LS) + : Var(MI), MI(MI), UVS(MI.getDebugLoc(), LS) { + assert(MI.isDebugValue() && "not a DBG_VALUE"); + assert(MI.getNumOperands() == 4 && "malformed DBG_VALUE"); + Kind = SpillLocKind; + Loc.SpillLocation = {SpillBase, SpillOffset}; } + // Is the Loc field a constant or constant object? + bool isConstant() const { return Kind == ImmediateKind; } + /// If this variable is described by a register, return it, /// otherwise return 0. unsigned isDescribedByReg() const { @@ -167,17 +255,18 @@ private: #endif bool operator==(const VarLoc &Other) const { - return Var == Other.Var && Loc.Hash == Other.Loc.Hash; + return Kind == Other.Kind && Var == Other.Var && + Loc.Hash == Other.Loc.Hash; } /// This operator guarantees that VarLocs are sorted by Variable first. bool operator<(const VarLoc &Other) const { - if (Var == Other.Var) - return Loc.Hash < Other.Loc.Hash; - return Var < Other.Var; + return std::tie(Var, Kind, Loc.Hash) < + std::tie(Other.Var, Other.Kind, Other.Loc.Hash); } }; + using DebugParamMap = SmallDenseMap<const DILocalVariable *, MachineInstr *>; using VarLocMap = UniqueVector<VarLoc>; using VarLocSet = SparseBitVector<>; using VarLocInMBB = SmallDenseMap<const MachineBasicBlock *, VarLocSet>; @@ -187,26 +276,35 @@ private: }; using TransferMap = SmallVector<TransferDebugPair, 4>; + // Types for recording sets of variable fragments that overlap. For a given + // local variable, we record all other fragments of that variable that could + // overlap it, to reduce search time. + using FragmentOfVar = + std::pair<const DILocalVariable *, DIExpression::FragmentInfo>; + using OverlapMap = + DenseMap<FragmentOfVar, SmallVector<DIExpression::FragmentInfo, 1>>; + + // Helper while building OverlapMap, a map of all fragments seen for a given + // DILocalVariable. + using VarToFragments = + DenseMap<const DILocalVariable *, SmallSet<FragmentInfo, 4>>; + /// This holds the working set of currently open ranges. For fast /// access, this is done both as a set of VarLocIDs, and a map of /// DebugVariable to recent VarLocID. Note that a DBG_VALUE ends all /// previous open ranges for the same variable. class OpenRangesSet { VarLocSet VarLocs; - SmallDenseMap<DebugVariableBase, unsigned, 8> Vars; + SmallDenseMap<DebugVariable, unsigned, 8> Vars; + OverlapMap &OverlappingFragments; public: + OpenRangesSet(OverlapMap &_OLapMap) : OverlappingFragments(_OLapMap) {} + const VarLocSet &getVarLocs() const { return VarLocs; } /// Terminate all open ranges for Var by removing it from the set. - void erase(DebugVariable Var) { - auto It = Vars.find(Var); - if (It != Vars.end()) { - unsigned ID = It->second; - VarLocs.reset(ID); - Vars.erase(It); - } - } + void erase(DebugVariable Var); /// Terminate all open ranges listed in \c KillSet by removing /// them from the set. @@ -217,7 +315,7 @@ private: } /// Insert a new range into the set. - void insert(unsigned VarLocID, DebugVariableBase Var) { + void insert(unsigned VarLocID, DebugVariable Var) { VarLocs.set(VarLocID); Vars.insert({Var, VarLocID}); } @@ -237,24 +335,43 @@ private: bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF, unsigned &Reg); - int extractSpillBaseRegAndOffset(const MachineInstr &MI, unsigned &Reg); + /// If a given instruction is identified as a spill, return the spill location + /// and set \p Reg to the spilled register. + Optional<VarLoc::SpillLoc> isRestoreInstruction(const MachineInstr &MI, + MachineFunction *MF, + unsigned &Reg); + /// Given a spill instruction, extract the register and offset used to + /// address the spill location in a target independent way. + VarLoc::SpillLoc extractSpillBaseRegAndOffset(const MachineInstr &MI); void insertTransferDebugPair(MachineInstr &MI, OpenRangesSet &OpenRanges, TransferMap &Transfers, VarLocMap &VarLocIDs, - unsigned OldVarID, unsigned NewReg = 0); + unsigned OldVarID, TransferKind Kind, + unsigned NewReg = 0); void transferDebugValue(const MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs); - void transferSpillInst(MachineInstr &MI, OpenRangesSet &OpenRanges, - VarLocMap &VarLocIDs, TransferMap &Transfers); + void transferSpillOrRestoreInst(MachineInstr &MI, OpenRangesSet &OpenRanges, + VarLocMap &VarLocIDs, TransferMap &Transfers); + void emitEntryValues(MachineInstr &MI, OpenRangesSet &OpenRanges, + VarLocMap &VarLocIDs, TransferMap &Transfers, + DebugParamMap &DebugEntryVals, + SparseBitVector<> &KillSet); void transferRegisterCopy(MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs, TransferMap &Transfers); void transferRegisterDef(MachineInstr &MI, OpenRangesSet &OpenRanges, - const VarLocMap &VarLocIDs); + VarLocMap &VarLocIDs, TransferMap &Transfers, + DebugParamMap &DebugEntryVals); bool transferTerminatorInst(MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocInMBB &OutLocs, const VarLocMap &VarLocIDs); + bool process(MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocInMBB &OutLocs, VarLocMap &VarLocIDs, - TransferMap &Transfers, bool transferChanges); + TransferMap &Transfers, DebugParamMap &DebugEntryVals, + bool transferChanges, OverlapMap &OverlapFragments, + VarToFragments &SeenFragments); + + void accumulateFragmentMap(MachineInstr &MI, VarToFragments &SeenFragments, + OverlapMap &OLapMap); bool join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs, const VarLocMap &VarLocIDs, @@ -289,10 +406,46 @@ public: } // end anonymous namespace +namespace llvm { + +template <> struct DenseMapInfo<LiveDebugValues::DebugVariable> { + using DV = LiveDebugValues::DebugVariable; + using OptFragmentInfo = LiveDebugValues::OptFragmentInfo; + using FragmentInfo = LiveDebugValues::FragmentInfo; + + // Empty key: no key should be generated that has no DILocalVariable. + static inline DV getEmptyKey() { + return DV(nullptr, OptFragmentInfo(), nullptr); + } + + // Difference in tombstone is that the Optional is meaningful + static inline DV getTombstoneKey() { + return DV(nullptr, OptFragmentInfo({0, 0}), nullptr); + } + + static unsigned getHashValue(const DV &D) { + unsigned HV = 0; + const OptFragmentInfo &Fragment = D.getFragment(); + if (Fragment) + HV = DenseMapInfo<FragmentInfo>::getHashValue(*Fragment); + + return hash_combine(D.getVar(), HV, D.getInlinedAt()); + } + + static bool isEqual(const DV &A, const DV &B) { return A == B; } +}; + +} // namespace llvm + //===----------------------------------------------------------------------===// // Implementation //===----------------------------------------------------------------------===// +const DIExpression::FragmentInfo + LiveDebugValues::DebugVariable::DefaultFragment = { + std::numeric_limits<uint64_t>::max(), + std::numeric_limits<uint64_t>::min()}; + char LiveDebugValues::ID = 0; char &llvm::LiveDebugValuesID = LiveDebugValues::ID; @@ -312,6 +465,39 @@ void LiveDebugValues::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } +/// Erase a variable from the set of open ranges, and additionally erase any +/// fragments that may overlap it. +void LiveDebugValues::OpenRangesSet::erase(DebugVariable Var) { + // Erasure helper. + auto DoErase = [this](DebugVariable VarToErase) { + auto It = Vars.find(VarToErase); + if (It != Vars.end()) { + unsigned ID = It->second; + VarLocs.reset(ID); + Vars.erase(It); + } + }; + + // Erase the variable/fragment that ends here. + DoErase(Var); + + // Extract the fragment. Interpret an empty fragment as one that covers all + // possible bits. + FragmentInfo ThisFragment = Var.getFragmentDefault(); + + // There may be fragments that overlap the designated fragment. Look them up + // in the pre-computed overlap map, and erase them too. + auto MapIt = OverlappingFragments.find({Var.getVar(), ThisFragment}); + if (MapIt != OverlappingFragments.end()) { + for (auto Fragment : MapIt->second) { + LiveDebugValues::OptFragmentInfo FragmentHolder; + if (!DebugVariable::isFragmentDefault(Fragment)) + FragmentHolder = LiveDebugValues::OptFragmentInfo(Fragment); + DoErase({Var.getVar(), FragmentHolder, Var.getInlinedAt()}); + } + } +} + //===----------------------------------------------------------------------===// // Debug Range Extension Implementation //===----------------------------------------------------------------------===// @@ -339,10 +525,8 @@ void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF, } #endif -/// Given a spill instruction, extract the register and offset used to -/// address the spill location in a target independent way. -int LiveDebugValues::extractSpillBaseRegAndOffset(const MachineInstr &MI, - unsigned &Reg) { +LiveDebugValues::VarLoc::SpillLoc +LiveDebugValues::extractSpillBaseRegAndOffset(const MachineInstr &MI) { assert(MI.hasOneMemOperand() && "Spill instruction does not have exactly one memory operand?"); auto MMOI = MI.memoperands_begin(); @@ -351,7 +535,9 @@ int LiveDebugValues::extractSpillBaseRegAndOffset(const MachineInstr &MI, "Inconsistent memory operand in spill instruction"); int FI = cast<FixedStackPseudoSourceValue>(PVal)->getFrameIndex(); const MachineBasicBlock *MBB = MI.getParent(); - return TFI->getFrameIndexReference(*MBB->getParent(), FI, Reg); + unsigned Reg; + int Offset = TFI->getFrameIndexReference(*MBB->getParent(), FI, Reg); + return {Reg, Offset}; } /// End all previous ranges related to @MI and start a new range from @MI @@ -362,21 +548,72 @@ void LiveDebugValues::transferDebugValue(const MachineInstr &MI, if (!MI.isDebugValue()) return; const DILocalVariable *Var = MI.getDebugVariable(); + const DIExpression *Expr = MI.getDebugExpression(); const DILocation *DebugLoc = MI.getDebugLoc(); const DILocation *InlinedAt = DebugLoc->getInlinedAt(); assert(Var->isValidLocationForIntrinsic(DebugLoc) && "Expected inlined-at fields to agree"); // End all previous ranges of Var. - DebugVariable V(Var, InlinedAt); + DebugVariable V(Var, Expr, InlinedAt); OpenRanges.erase(V); // Add the VarLoc to OpenRanges from this DBG_VALUE. - // TODO: Currently handles DBG_VALUE which has only reg as location. - if (isDbgValueDescribedByReg(MI)) { + unsigned ID; + if (isDbgValueDescribedByReg(MI) || MI.getOperand(0).isImm() || + MI.getOperand(0).isFPImm() || MI.getOperand(0).isCImm()) { + // Use normal VarLoc constructor for registers and immediates. VarLoc VL(MI, LS); - unsigned ID = VarLocIDs.insert(VL); + ID = VarLocIDs.insert(VL); + OpenRanges.insert(ID, VL.Var); + } else if (MI.hasOneMemOperand()) { + // It's a stack spill -- fetch spill base and offset. + VarLoc::SpillLoc SpillLocation = extractSpillBaseRegAndOffset(MI); + VarLoc VL(MI, SpillLocation.SpillBase, SpillLocation.SpillOffset, LS); + ID = VarLocIDs.insert(VL); OpenRanges.insert(ID, VL.Var); + } else { + // This must be an undefined location. We should leave OpenRanges closed. + assert(MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == 0 && + "Unexpected non-undef DBG_VALUE encountered"); + } +} + +void LiveDebugValues::emitEntryValues(MachineInstr &MI, + OpenRangesSet &OpenRanges, + VarLocMap &VarLocIDs, + TransferMap &Transfers, + DebugParamMap &DebugEntryVals, + SparseBitVector<> &KillSet) { + MachineFunction *MF = MI.getParent()->getParent(); + for (unsigned ID : KillSet) { + if (!VarLocIDs[ID].Var.getVar()->isParameter()) + continue; + + const MachineInstr *CurrDebugInstr = &VarLocIDs[ID].MI; + + // If parameter's DBG_VALUE is not in the map that means we can't + // generate parameter's entry value. + if (!DebugEntryVals.count(CurrDebugInstr->getDebugVariable())) + continue; + + auto ParamDebugInstr = DebugEntryVals[CurrDebugInstr->getDebugVariable()]; + DIExpression *NewExpr = DIExpression::prepend( + ParamDebugInstr->getDebugExpression(), DIExpression::EntryValue); + MachineInstr *EntryValDbgMI = + BuildMI(*MF, ParamDebugInstr->getDebugLoc(), ParamDebugInstr->getDesc(), + ParamDebugInstr->isIndirectDebugValue(), + ParamDebugInstr->getOperand(0).getReg(), + ParamDebugInstr->getDebugVariable(), NewExpr); + + if (ParamDebugInstr->isIndirectDebugValue()) + EntryValDbgMI->getOperand(1).setImm( + ParamDebugInstr->getOperand(1).getImm()); + + Transfers.push_back({&MI, EntryValDbgMI}); + VarLoc VL(*EntryValDbgMI, LS); + unsigned EntryValLocID = VarLocIDs.insert(VL); + OpenRanges.insert(EntryValLocID, VL.Var); } } @@ -387,51 +624,92 @@ void LiveDebugValues::transferDebugValue(const MachineInstr &MI, /// otherwise it is variable's location on the stack. void LiveDebugValues::insertTransferDebugPair( MachineInstr &MI, OpenRangesSet &OpenRanges, TransferMap &Transfers, - VarLocMap &VarLocIDs, unsigned OldVarID, unsigned NewReg) { - const MachineInstr *DMI = &VarLocIDs[OldVarID].MI; + VarLocMap &VarLocIDs, unsigned OldVarID, TransferKind Kind, + unsigned NewReg) { + const MachineInstr *DebugInstr = &VarLocIDs[OldVarID].MI; MachineFunction *MF = MI.getParent()->getParent(); - MachineInstr *NewDMI; - if (NewReg) { + MachineInstr *NewDebugInstr; + + auto ProcessVarLoc = [&MI, &OpenRanges, &Transfers, &DebugInstr, + &VarLocIDs](VarLoc &VL, MachineInstr *NewDebugInstr) { + unsigned LocId = VarLocIDs.insert(VL); + + // Close this variable's previous location range. + DebugVariable V(*DebugInstr); + OpenRanges.erase(V); + + OpenRanges.insert(LocId, VL.Var); + // The newly created DBG_VALUE instruction NewDebugInstr must be inserted + // after MI. Keep track of the pairing. + TransferDebugPair MIP = {&MI, NewDebugInstr}; + Transfers.push_back(MIP); + }; + + // End all previous ranges of Var. + OpenRanges.erase(VarLocIDs[OldVarID].Var); + switch (Kind) { + case TransferKind::TransferCopy: { + assert(NewReg && + "No register supplied when handling a copy of a debug value"); // Create a DBG_VALUE instruction to describe the Var in its new // register location. - NewDMI = BuildMI(*MF, DMI->getDebugLoc(), DMI->getDesc(), - DMI->isIndirectDebugValue(), NewReg, - DMI->getDebugVariable(), DMI->getDebugExpression()); - if (DMI->isIndirectDebugValue()) - NewDMI->getOperand(1).setImm(DMI->getOperand(1).getImm()); + NewDebugInstr = BuildMI( + *MF, DebugInstr->getDebugLoc(), DebugInstr->getDesc(), + DebugInstr->isIndirectDebugValue(), NewReg, + DebugInstr->getDebugVariable(), DebugInstr->getDebugExpression()); + if (DebugInstr->isIndirectDebugValue()) + NewDebugInstr->getOperand(1).setImm(DebugInstr->getOperand(1).getImm()); + VarLoc VL(*NewDebugInstr, LS); + ProcessVarLoc(VL, NewDebugInstr); LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for register copy: "; - NewDMI->print(dbgs(), false, false, false, TII)); - } else { + NewDebugInstr->print(dbgs(), /*IsStandalone*/false, + /*SkipOpers*/false, /*SkipDebugLoc*/false, + /*AddNewLine*/true, TII)); + return; + } + case TransferKind::TransferSpill: { // Create a DBG_VALUE instruction to describe the Var in its spilled // location. - unsigned SpillBase; - int SpillOffset = extractSpillBaseRegAndOffset(MI, SpillBase); - auto *SpillExpr = DIExpression::prepend(DMI->getDebugExpression(), - DIExpression::NoDeref, SpillOffset); - NewDMI = BuildMI(*MF, DMI->getDebugLoc(), DMI->getDesc(), true, SpillBase, - DMI->getDebugVariable(), SpillExpr); + VarLoc::SpillLoc SpillLocation = extractSpillBaseRegAndOffset(MI); + auto *SpillExpr = DIExpression::prepend(DebugInstr->getDebugExpression(), + DIExpression::ApplyOffset, + SpillLocation.SpillOffset); + NewDebugInstr = BuildMI( + *MF, DebugInstr->getDebugLoc(), DebugInstr->getDesc(), true, + SpillLocation.SpillBase, DebugInstr->getDebugVariable(), SpillExpr); + VarLoc VL(*NewDebugInstr, SpillLocation.SpillBase, + SpillLocation.SpillOffset, LS); + ProcessVarLoc(VL, NewDebugInstr); LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for spill: "; - NewDMI->print(dbgs(), false, false, false, TII)); + NewDebugInstr->print(dbgs(), /*IsStandalone*/false, + /*SkipOpers*/false, /*SkipDebugLoc*/false, + /*AddNewLine*/true, TII)); + return; } - - // The newly created DBG_VALUE instruction NewDMI must be inserted after - // MI. Keep track of the pairing. - TransferDebugPair MIP = {&MI, NewDMI}; - Transfers.push_back(MIP); - - // End all previous ranges of Var. - OpenRanges.erase(VarLocIDs[OldVarID].Var); - - // Add the VarLoc to OpenRanges. - VarLoc VL(*NewDMI, LS); - unsigned LocID = VarLocIDs.insert(VL); - OpenRanges.insert(LocID, VL.Var); + case TransferKind::TransferRestore: { + assert(NewReg && + "No register supplied when handling a restore of a debug value"); + MachineFunction *MF = MI.getMF(); + DIBuilder DIB(*const_cast<Function &>(MF->getFunction()).getParent()); + NewDebugInstr = + BuildMI(*MF, DebugInstr->getDebugLoc(), DebugInstr->getDesc(), false, + NewReg, DebugInstr->getDebugVariable(), DIB.createExpression()); + VarLoc VL(*NewDebugInstr, LS); + ProcessVarLoc(VL, NewDebugInstr); + LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for register restore: "; + NewDebugInstr->print(dbgs(), /*IsStandalone*/false, + /*SkipOpers*/false, /*SkipDebugLoc*/false, + /*AddNewLine*/true, TII)); + return; + } + } + llvm_unreachable("Invalid transfer kind"); } /// A definition of a register may mark the end of a range. -void LiveDebugValues::transferRegisterDef(MachineInstr &MI, - OpenRangesSet &OpenRanges, - const VarLocMap &VarLocIDs) { +void LiveDebugValues::transferRegisterDef( + MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs, + TransferMap &Transfers, DebugParamMap &DebugEntryVals) { MachineFunction *MF = MI.getMF(); const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); @@ -461,6 +739,13 @@ void LiveDebugValues::transferRegisterDef(MachineInstr &MI, } } OpenRanges.erase(KillSet, VarLocIDs); + + if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) { + auto &TM = TPC->getTM<TargetMachine>(); + if (TM.Options.EnableDebugEntryValues) + emitEntryValues(MI, OpenRanges, VarLocIDs, Transfers, DebugEntryVals, + KillSet); + } } /// Decide if @MI is a spill instruction and return true if it is. We use 2 @@ -471,24 +756,15 @@ void LiveDebugValues::transferRegisterDef(MachineInstr &MI, /// other spills). We do not handle this yet (more than one memory operand). bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI, MachineFunction *MF, unsigned &Reg) { - const MachineFrameInfo &FrameInfo = MF->getFrameInfo(); - int FI; SmallVector<const MachineMemOperand*, 1> Accesses; // TODO: Handle multiple stores folded into one. if (!MI.hasOneMemOperand()) return false; - // To identify a spill instruction, use the same criteria as in AsmPrinter. - if (!((TII->isStoreToStackSlotPostFE(MI, FI) && - FrameInfo.isSpillSlotObjectIndex(FI)) || - (TII->hasStoreToStackSlot(MI, Accesses) && - llvm::any_of(Accesses, [&FrameInfo](const MachineMemOperand *MMO) { - return FrameInfo.isSpillSlotObjectIndex( - cast<FixedStackPseudoSourceValue>(MMO->getPseudoValue()) - ->getFrameIndex()); - })))) - return false; + if (!MI.getSpillSize(TII) && !MI.getFoldedSpillSize(TII)) + return false; // This is not a spill instruction, since no valid size was + // returned from either function. auto isKilledReg = [&](const MachineOperand MO, unsigned &Reg) { if (!MO.isReg() || !MO.isUse()) { @@ -525,29 +801,67 @@ bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI, return false; } +Optional<LiveDebugValues::VarLoc::SpillLoc> +LiveDebugValues::isRestoreInstruction(const MachineInstr &MI, + MachineFunction *MF, unsigned &Reg) { + if (!MI.hasOneMemOperand()) + return None; + + // FIXME: Handle folded restore instructions with more than one memory + // operand. + if (MI.getRestoreSize(TII)) { + Reg = MI.getOperand(0).getReg(); + return extractSpillBaseRegAndOffset(MI); + } + return None; +} + /// A spilled register may indicate that we have to end the current range of /// a variable and create a new one for the spill location. +/// A restored register may indicate the reverse situation. /// We don't want to insert any instructions in process(), so we just create /// the DBG_VALUE without inserting it and keep track of it in \p Transfers. /// It will be inserted into the BB when we're done iterating over the /// instructions. -void LiveDebugValues::transferSpillInst(MachineInstr &MI, - OpenRangesSet &OpenRanges, - VarLocMap &VarLocIDs, - TransferMap &Transfers) { - unsigned Reg; +void LiveDebugValues::transferSpillOrRestoreInst(MachineInstr &MI, + OpenRangesSet &OpenRanges, + VarLocMap &VarLocIDs, + TransferMap &Transfers) { MachineFunction *MF = MI.getMF(); - if (!isSpillInstruction(MI, MF, Reg)) - return; + TransferKind TKind; + unsigned Reg; + Optional<VarLoc::SpillLoc> Loc; - // Check if the register is the location of a debug value. + LLVM_DEBUG(dbgs() << "Examining instruction: "; MI.dump();); + + if (isSpillInstruction(MI, MF, Reg)) { + TKind = TransferKind::TransferSpill; + LLVM_DEBUG(dbgs() << "Recognized as spill: "; MI.dump();); + LLVM_DEBUG(dbgs() << "Register: " << Reg << " " << printReg(Reg, TRI) + << "\n"); + } else { + if (!(Loc = isRestoreInstruction(MI, MF, Reg))) + return; + TKind = TransferKind::TransferRestore; + LLVM_DEBUG(dbgs() << "Recognized as restore: "; MI.dump();); + LLVM_DEBUG(dbgs() << "Register: " << Reg << " " << printReg(Reg, TRI) + << "\n"); + } + // Check if the register or spill location is the location of a debug value. for (unsigned ID : OpenRanges.getVarLocs()) { - if (VarLocIDs[ID].isDescribedByReg() == Reg) { + if (TKind == TransferKind::TransferSpill && + VarLocIDs[ID].isDescribedByReg() == Reg) { LLVM_DEBUG(dbgs() << "Spilling Register " << printReg(Reg, TRI) << '(' << VarLocIDs[ID].Var.getVar()->getName() << ")\n"); - insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, ID); - return; - } + } else if (TKind == TransferKind::TransferRestore && + VarLocIDs[ID].Loc.SpillLocation == *Loc) { + LLVM_DEBUG(dbgs() << "Restoring Register " << printReg(Reg, TRI) << '(' + << VarLocIDs[ID].Var.getVar()->getName() << ")\n"); + } else + continue; + insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, ID, TKind, + Reg); + return; } } @@ -585,7 +899,7 @@ void LiveDebugValues::transferRegisterCopy(MachineInstr &MI, for (unsigned ID : OpenRanges.getVarLocs()) { if (VarLocIDs[ID].isDescribedByReg() == SrcReg) { insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, ID, - DestReg); + TransferKind::TransferCopy, DestReg); return; } } @@ -612,20 +926,92 @@ bool LiveDebugValues::transferTerminatorInst(MachineInstr &MI, }); VarLocSet &VLS = OutLocs[CurMBB]; Changed = VLS |= OpenRanges.getVarLocs(); + // New OutLocs set may be different due to spill, restore or register + // copy instruction processing. + if (Changed) + VLS = OpenRanges.getVarLocs(); OpenRanges.clear(); return Changed; } +/// Accumulate a mapping between each DILocalVariable fragment and other +/// fragments of that DILocalVariable which overlap. This reduces work during +/// the data-flow stage from "Find any overlapping fragments" to "Check if the +/// known-to-overlap fragments are present". +/// \param MI A previously unprocessed DEBUG_VALUE instruction to analyze for +/// fragment usage. +/// \param SeenFragments Map from DILocalVariable to all fragments of that +/// Variable which are known to exist. +/// \param OverlappingFragments The overlap map being constructed, from one +/// Var/Fragment pair to a vector of fragments known to overlap. +void LiveDebugValues::accumulateFragmentMap(MachineInstr &MI, + VarToFragments &SeenFragments, + OverlapMap &OverlappingFragments) { + DebugVariable MIVar(MI); + FragmentInfo ThisFragment = MIVar.getFragmentDefault(); + + // If this is the first sighting of this variable, then we are guaranteed + // there are currently no overlapping fragments either. Initialize the set + // of seen fragments, record no overlaps for the current one, and return. + auto SeenIt = SeenFragments.find(MIVar.getVar()); + if (SeenIt == SeenFragments.end()) { + SmallSet<FragmentInfo, 4> OneFragment; + OneFragment.insert(ThisFragment); + SeenFragments.insert({MIVar.getVar(), OneFragment}); + + OverlappingFragments.insert({{MIVar.getVar(), ThisFragment}, {}}); + return; + } + + // If this particular Variable/Fragment pair already exists in the overlap + // map, it has already been accounted for. + auto IsInOLapMap = + OverlappingFragments.insert({{MIVar.getVar(), ThisFragment}, {}}); + if (!IsInOLapMap.second) + return; + + auto &ThisFragmentsOverlaps = IsInOLapMap.first->second; + auto &AllSeenFragments = SeenIt->second; + + // Otherwise, examine all other seen fragments for this variable, with "this" + // fragment being a previously unseen fragment. Record any pair of + // overlapping fragments. + for (auto &ASeenFragment : AllSeenFragments) { + // Does this previously seen fragment overlap? + if (DIExpression::fragmentsOverlap(ThisFragment, ASeenFragment)) { + // Yes: Mark the current fragment as being overlapped. + ThisFragmentsOverlaps.push_back(ASeenFragment); + // Mark the previously seen fragment as being overlapped by the current + // one. + auto ASeenFragmentsOverlaps = + OverlappingFragments.find({MIVar.getVar(), ASeenFragment}); + assert(ASeenFragmentsOverlaps != OverlappingFragments.end() && + "Previously seen var fragment has no vector of overlaps"); + ASeenFragmentsOverlaps->second.push_back(ThisFragment); + } + } + + AllSeenFragments.insert(ThisFragment); +} + /// This routine creates OpenRanges and OutLocs. bool LiveDebugValues::process(MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocInMBB &OutLocs, VarLocMap &VarLocIDs, - TransferMap &Transfers, bool transferChanges) { + TransferMap &Transfers, DebugParamMap &DebugEntryVals, + bool transferChanges, + OverlapMap &OverlapFragments, + VarToFragments &SeenFragments) { bool Changed = false; transferDebugValue(MI, OpenRanges, VarLocIDs); - transferRegisterDef(MI, OpenRanges, VarLocIDs); + transferRegisterDef(MI, OpenRanges, VarLocIDs, Transfers, + DebugEntryVals); if (transferChanges) { transferRegisterCopy(MI, OpenRanges, VarLocIDs, Transfers); - transferSpillInst(MI, OpenRanges, VarLocIDs, Transfers); + transferSpillOrRestoreInst(MI, OpenRanges, VarLocIDs, Transfers); + } else { + // Build up a map of overlapping fragments on the first run through. + if (MI.isDebugValue()) + accumulateFragmentMap(MI, SeenFragments, OverlapFragments); } Changed = transferTerminatorInst(MI, OpenRanges, OutLocs, VarLocIDs); return Changed; @@ -713,13 +1099,23 @@ bool LiveDebugValues::join( // new range is started for the var from the mbb's beginning by inserting // a new DBG_VALUE. process() will end this range however appropriate. const VarLoc &DiffIt = VarLocIDs[ID]; - const MachineInstr *DMI = &DiffIt.MI; - MachineInstr *MI = - BuildMI(MBB, MBB.instr_begin(), DMI->getDebugLoc(), DMI->getDesc(), - DMI->isIndirectDebugValue(), DMI->getOperand(0).getReg(), - DMI->getDebugVariable(), DMI->getDebugExpression()); - if (DMI->isIndirectDebugValue()) - MI->getOperand(1).setImm(DMI->getOperand(1).getImm()); + const MachineInstr *DebugInstr = &DiffIt.MI; + MachineInstr *MI = nullptr; + if (DiffIt.isConstant()) { + MachineOperand MO(DebugInstr->getOperand(0)); + MI = BuildMI(MBB, MBB.instr_begin(), DebugInstr->getDebugLoc(), + DebugInstr->getDesc(), false, MO, + DebugInstr->getDebugVariable(), + DebugInstr->getDebugExpression()); + } else { + MI = BuildMI(MBB, MBB.instr_begin(), DebugInstr->getDebugLoc(), + DebugInstr->getDesc(), DebugInstr->isIndirectDebugValue(), + DebugInstr->getOperand(0).getReg(), + DebugInstr->getDebugVariable(), + DebugInstr->getDebugExpression()); + if (DebugInstr->isIndirectDebugValue()) + MI->getOperand(1).setImm(DebugInstr->getOperand(1).getImm()); + } LLVM_DEBUG(dbgs() << "Inserted: "; MI->dump();); ILS.set(ID); ++NumInserted; @@ -737,11 +1133,15 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { bool OLChanged = false; bool MBBJoined = false; - VarLocMap VarLocIDs; // Map VarLoc<>unique ID for use in bitvectors. - OpenRangesSet OpenRanges; // Ranges that are open until end of bb. - VarLocInMBB OutLocs; // Ranges that exist beyond bb. - VarLocInMBB InLocs; // Ranges that are incoming after joining. - TransferMap Transfers; // DBG_VALUEs associated with spills. + VarLocMap VarLocIDs; // Map VarLoc<>unique ID for use in bitvectors. + OverlapMap OverlapFragments; // Map of overlapping variable fragments + OpenRangesSet OpenRanges(OverlapFragments); + // Ranges that are open until end of bb. + VarLocInMBB OutLocs; // Ranges that exist beyond bb. + VarLocInMBB InLocs; // Ranges that are incoming after joining. + TransferMap Transfers; // DBG_VALUEs associated with spills. + + VarToFragments SeenFragments; // Blocks which are artificial, i.e. blocks which exclusively contain // instructions without locations, or with line 0 locations. @@ -758,15 +1158,61 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { enum : bool { dontTransferChanges = false, transferChanges = true }; + // Besides parameter's modification, check whether a DBG_VALUE is inlined + // in order to deduce whether the variable that it tracks comes from + // a different function. If that is the case we can't track its entry value. + auto IsUnmodifiedFuncParam = [&](const MachineInstr &MI) { + auto *DIVar = MI.getDebugVariable(); + return DIVar->isParameter() && DIVar->isNotModified() && + !MI.getDebugLoc()->getInlinedAt(); + }; + + const TargetLowering *TLI = MF.getSubtarget().getTargetLowering(); + unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); + unsigned FP = TRI->getFrameRegister(MF); + auto IsRegOtherThanSPAndFP = [&](const MachineOperand &Op) -> bool { + return Op.isReg() && Op.getReg() != SP && Op.getReg() != FP; + }; + + // Working set of currently collected debug variables mapped to DBG_VALUEs + // representing candidates for production of debug entry values. + DebugParamMap DebugEntryVals; + + MachineBasicBlock &First_MBB = *(MF.begin()); + // Only in the case of entry MBB collect DBG_VALUEs representing + // function parameters in order to generate debug entry values for them. + // Currently, we generate debug entry values only for parameters that are + // unmodified throughout the function and located in a register. + // TODO: Add support for parameters that are described as fragments. + // TODO: Add support for modified arguments that can be expressed + // by using its entry value. + // TODO: Add support for local variables that are expressed in terms of + // parameters entry values. + for (auto &MI : First_MBB) + if (MI.isDebugValue() && IsUnmodifiedFuncParam(MI) && + !MI.isIndirectDebugValue() && IsRegOtherThanSPAndFP(MI.getOperand(0)) && + !DebugEntryVals.count(MI.getDebugVariable()) && + !MI.getDebugExpression()->isFragment()) + DebugEntryVals[MI.getDebugVariable()] = &MI; + // Initialize every mbb with OutLocs. // We are not looking at any spill instructions during the initial pass // over the BBs. The LiveDebugVariables pass has already created DBG_VALUE // instructions for spills of registers that are known to be user variables // within the BB in which the spill occurs. - for (auto &MBB : MF) - for (auto &MI : MBB) - process(MI, OpenRanges, OutLocs, VarLocIDs, Transfers, - dontTransferChanges); + for (auto &MBB : MF) { + for (auto &MI : MBB) { + process(MI, OpenRanges, OutLocs, VarLocIDs, Transfers, DebugEntryVals, + dontTransferChanges, OverlapFragments, SeenFragments); + } + // Add any entry DBG_VALUE instructions necessitated by parameter + // clobbering. + for (auto &TR : Transfers) { + MBB.insertAfter(MachineBasicBlock::iterator(*TR.TransferInst), + TR.DebugInst); + } + Transfers.clear(); + } auto hasNonArtificialLocation = [](const MachineInstr &MI) -> bool { if (const DebugLoc &DL = MI.getDebugLoc()) @@ -812,8 +1258,10 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { // examine spill instructions to see whether they spill registers that // correspond to user variables. for (auto &MI : *MBB) - OLChanged |= process(MI, OpenRanges, OutLocs, VarLocIDs, Transfers, - transferChanges); + OLChanged |= + process(MI, OpenRanges, OutLocs, VarLocIDs, Transfers, + DebugEntryVals, transferChanges, OverlapFragments, + SeenFragments); // Add any DBG_VALUE instructions necessitated by spills. for (auto &TR : Transfers) diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index d0d889782a35..656ec7d4bdfd 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -1,9 +1,8 @@ //===- LiveDebugVariables.cpp - Tracking debug info variables -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -23,6 +22,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IntervalMap.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" @@ -71,6 +71,7 @@ EnableLDV("live-debug-variables", cl::init(true), cl::desc("Enable the live debug variables pass"), cl::Hidden); STATISTIC(NumInsertedDebugValues, "Number of DBG_VALUEs inserted"); +STATISTIC(NumInsertedDebugLabels, "Number of DBG_LABELs inserted"); char LiveDebugVariables::ID = 0; @@ -166,10 +167,6 @@ class UserValue { /// Map of slot indices where this value is live. LocMap locInts; - /// Set of interval start indexes that have been trimmed to the - /// lexical scope. - SmallSet<SlotIndex, 2> trimmedDefs; - /// Insert a DBG_VALUE into MBB at Idx for LocNo. void insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx, SlotIndex StopIdx, DbgValueLocation Loc, bool Spilled, @@ -339,6 +336,37 @@ public: void print(raw_ostream &, const TargetRegisterInfo *); }; +/// A user label is a part of a debug info user label. +class UserLabel { + const DILabel *Label; ///< The debug info label we are part of. + DebugLoc dl; ///< The debug location for the label. This is + ///< used by dwarf writer to find lexical scope. + SlotIndex loc; ///< Slot used by the debug label. + + /// Insert a DBG_LABEL into MBB at Idx. + void insertDebugLabel(MachineBasicBlock *MBB, SlotIndex Idx, + LiveIntervals &LIS, const TargetInstrInfo &TII); + +public: + /// Create a new UserLabel. + UserLabel(const DILabel *label, DebugLoc L, SlotIndex Idx) + : Label(label), dl(std::move(L)), loc(Idx) {} + + /// Does this UserLabel match the parameters? + bool match(const DILabel *L, const DILocation *IA, + const SlotIndex Index) const { + return Label == L && dl->getInlinedAt() == IA && loc == Index; + } + + /// Recreate DBG_LABEL instruction from data structures. + void emitDebugLabel(LiveIntervals &LIS, const TargetInstrInfo &TII); + + /// Return DebugLoc of this UserLabel. + DebugLoc getDebugLoc() { return dl; } + + void print(raw_ostream &, const TargetRegisterInfo *); +}; + /// Implementation of the LiveDebugVariables pass. class LDVImpl { LiveDebugVariables &pass; @@ -356,6 +384,9 @@ class LDVImpl { /// All allocated UserValue instances. SmallVector<std::unique_ptr<UserValue>, 8> userValues; + /// All allocated UserLabel instances. + SmallVector<std::unique_ptr<UserLabel>, 2> userLabels; + /// Map virtual register to eq class leader. using VRMap = DenseMap<unsigned, UserValue *>; VRMap virtRegToEqClass; @@ -379,6 +410,14 @@ class LDVImpl { /// \returns True if the DBG_VALUE instruction should be deleted. bool handleDebugValue(MachineInstr &MI, SlotIndex Idx); + /// Add DBG_LABEL instruction to UserLabel. + /// + /// \param MI DBG_LABEL instruction + /// \param Idx Last valid SlotIndex before instruction. + /// + /// \returns True if the DBG_LABEL instruction should be deleted. + bool handleDebugLabel(MachineInstr &MI, SlotIndex Idx); + /// Collect and erase all DBG_VALUE instructions, adding a UserValue def /// for each instruction. /// @@ -400,6 +439,7 @@ public: void clear() { MF = nullptr; userValues.clear(); + userLabels.clear(); virtRegToEqClass.clear(); userVarMap.clear(); // Make sure we call emitDebugValues if the machine function was modified. @@ -445,13 +485,23 @@ static void printDebugLoc(const DebugLoc &DL, raw_ostream &CommentOS, CommentOS << " ]"; } -static void printExtendedName(raw_ostream &OS, const DILocalVariable *V, +static void printExtendedName(raw_ostream &OS, const DINode *Node, const DILocation *DL) { - const LLVMContext &Ctx = V->getContext(); - StringRef Res = V->getName(); + const LLVMContext &Ctx = Node->getContext(); + StringRef Res; + unsigned Line; + if (const auto *V = dyn_cast<const DILocalVariable>(Node)) { + Res = V->getName(); + Line = V->getLine(); + } else if (const auto *L = dyn_cast<const DILabel>(Node)) { + Res = L->getName(); + Line = L->getLine(); + } + if (!Res.empty()) - OS << Res << "," << V->getLine(); - if (auto *InlinedAt = DL->getInlinedAt()) { + OS << Res << "," << Line; + auto *InlinedAt = DL ? DL->getInlinedAt() : nullptr; + if (InlinedAt) { if (DebugLoc InlinedAtDL = InlinedAt) { OS << " @["; printDebugLoc(InlinedAtDL, OS, Ctx); @@ -461,9 +511,8 @@ static void printExtendedName(raw_ostream &OS, const DILocalVariable *V, } void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) { - auto *DV = cast<DILocalVariable>(Variable); OS << "!\""; - printExtendedName(OS, DV, dl); + printExtendedName(OS, Variable, dl); OS << "\"\t"; for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) { @@ -483,10 +532,22 @@ void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) { OS << '\n'; } +void UserLabel::print(raw_ostream &OS, const TargetRegisterInfo *TRI) { + OS << "!\""; + printExtendedName(OS, Label, dl); + + OS << "\"\t"; + OS << loc; + OS << '\n'; +} + void LDVImpl::print(raw_ostream &OS) { OS << "********** DEBUG VARIABLES **********\n"; - for (unsigned i = 0, e = userValues.size(); i != e; ++i) - userValues[i]->print(OS, TRI); + for (auto &userValue : userValues) + userValue->print(OS, TRI); + OS << "********** DEBUG LABELS **********\n"; + for (auto &userLabel : userLabels) + userLabel->print(OS, TRI); } #endif @@ -556,7 +617,7 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) { } else { // The DBG_VALUE is only valid if either Reg is live out from Idx, or Reg // is defined dead at Idx (where Idx is the slot index for the instruction - // preceeding the DBG_VALUE). + // preceding the DBG_VALUE). const LiveInterval &LI = LIS->getInterval(Reg); LiveQueryResult LRQ = LI.Query(Idx); if (!LRQ.valueOutOrDead()) { @@ -587,6 +648,29 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) { return true; } +bool LDVImpl::handleDebugLabel(MachineInstr &MI, SlotIndex Idx) { + // DBG_LABEL label + if (MI.getNumOperands() != 1 || !MI.getOperand(0).isMetadata()) { + LLVM_DEBUG(dbgs() << "Can't handle " << MI); + return false; + } + + // Get or create the UserLabel for label here. + const DILabel *Label = MI.getDebugLabel(); + const DebugLoc &DL = MI.getDebugLoc(); + bool Found = false; + for (auto const &L : userLabels) { + if (L->match(Label, DL->getInlinedAt(), Idx)) { + Found = true; + break; + } + } + if (!Found) + userLabels.push_back(llvm::make_unique<UserLabel>(Label, DL, Idx)); + + return true; +} + bool LDVImpl::collectDebugValues(MachineFunction &mf) { bool Changed = false; for (MachineFunction::iterator MFI = mf.begin(), MFE = mf.end(); MFI != MFE; @@ -610,7 +694,8 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) { do { // Only handle DBG_VALUE in handleDebugValue(). Skip all other // kinds of debug instructions. - if (MBBI->isDebugValue() && handleDebugValue(*MBBI, Idx)) { + if ((MBBI->isDebugValue() && handleDebugValue(*MBBI, Idx)) || + (MBBI->isDebugLabel() && handleDebugLabel(*MBBI, Idx))) { MBBI = MBB->erase(MBBI); Changed = true; } else @@ -655,10 +740,8 @@ void UserValue::extendDef(SlotIndex Idx, DbgValueLocation Loc, LiveRange *LR, } // Limited by the next def. - if (I.valid() && I.start() < Stop) { + if (I.valid() && I.start() < Stop) Stop = I.start(); - ToEnd = false; - } // Limited by VNI's live range. else if (!ToEnd && Kills) Kills->push_back(Stop); @@ -826,8 +909,7 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI, ++I; // If the interval also overlaps the start of the "next" (i.e. - // current) range create a new interval for the remainder (which - // may be further trimmed). + // current) range create a new interval for the remainder if (RStart < IStop) I.insert(RStart, IStop, Loc); } @@ -837,13 +919,6 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI, if (!I.valid()) return; - if (I.start() < RStart) { - // Interval start overlaps range - trim to the scope range. - I.setStartUnchecked(RStart); - // Remember that this interval was trimmed. - trimmedDefs.insert(RStart); - } - // The end of a lexical scope range is the last instruction in the // range. To convert to an interval we need the index of the // instruction after it. @@ -1227,11 +1302,13 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx, // that the original virtual register was a pointer. Also, add the stack slot // offset for the spilled register to the expression. const DIExpression *Expr = Expression; + uint8_t DIExprFlags = DIExpression::ApplyOffset; bool IsIndirect = Loc.wasIndirect(); if (Spilled) { - auto Deref = IsIndirect ? DIExpression::WithDeref : DIExpression::NoDeref; + if (IsIndirect) + DIExprFlags |= DIExpression::DerefAfter; Expr = - DIExpression::prepend(Expr, DIExpression::NoDeref, SpillOffset, Deref); + DIExpression::prepend(Expr, DIExprFlags, SpillOffset); IsIndirect = true; } @@ -1247,6 +1324,15 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx, } while (I != MBB->end()); } +void UserLabel::insertDebugLabel(MachineBasicBlock *MBB, SlotIndex Idx, + LiveIntervals &LIS, + const TargetInstrInfo &TII) { + MachineBasicBlock::iterator I = findInsertLocation(MBB, Idx, LIS); + ++NumInsertedDebugLabels; + BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_LABEL)) + .addMetadata(Label); +} + void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, @@ -1262,12 +1348,6 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, bool Spilled = SpillIt != SpillOffsets.end(); unsigned SpillOffset = Spilled ? SpillIt->second : 0; - // If the interval start was trimmed to the lexical scope insert the - // DBG_VALUE at the previous index (otherwise it appears after the - // first instruction in the range). - if (trimmedDefs.count(Start)) - Start = Start.getPrevIndex(); - LLVM_DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << Loc.locNo()); MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start)->getIterator(); SlotIndex MBBEnd = LIS.getMBBEndIdx(&*MBB); @@ -1295,16 +1375,31 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, } } +void UserLabel::emitDebugLabel(LiveIntervals &LIS, const TargetInstrInfo &TII) { + LLVM_DEBUG(dbgs() << "\t" << loc); + MachineFunction::iterator MBB = LIS.getMBBFromIndex(loc)->getIterator(); + + LLVM_DEBUG(dbgs() << ' ' << printMBBReference(*MBB)); + insertDebugLabel(&*MBB, loc, LIS, TII); + + LLVM_DEBUG(dbgs() << '\n'); +} + void LDVImpl::emitDebugValues(VirtRegMap *VRM) { LLVM_DEBUG(dbgs() << "********** EMITTING LIVE DEBUG VARIABLES **********\n"); if (!MF) return; const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); SpillOffsetMap SpillOffsets; - for (unsigned i = 0, e = userValues.size(); i != e; ++i) { - LLVM_DEBUG(userValues[i]->print(dbgs(), TRI)); - userValues[i]->rewriteLocations(*VRM, *MF, *TII, *TRI, SpillOffsets); - userValues[i]->emitDebugValues(VRM, *LIS, *TII, *TRI, SpillOffsets); + for (auto &userValue : userValues) { + LLVM_DEBUG(userValue->print(dbgs(), TRI)); + userValue->rewriteLocations(*VRM, *MF, *TII, *TRI, SpillOffsets); + userValue->emitDebugValues(VRM, *LIS, *TII, *TRI, SpillOffsets); + } + LLVM_DEBUG(dbgs() << "********** EMITTING LIVE DEBUG LABELS **********\n"); + for (auto &userLabel : userLabels) { + LLVM_DEBUG(userLabel->print(dbgs(), TRI)); + userLabel->emitDebugLabel(*LIS, *TII); } EmitDone = true; } diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h index 0060399c2b04..0cbe10c6a422 100644 --- a/lib/CodeGen/LiveDebugVariables.h +++ b/lib/CodeGen/LiveDebugVariables.h @@ -1,9 +1,8 @@ //===- LiveDebugVariables.h - Tracking debug info variables -----*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index 2340b6abd87c..70b2a77fe800 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -1,9 +1,8 @@ //===- LiveInterval.cpp - Live Interval Representation --------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -297,9 +296,7 @@ private: iterator find(SlotIndex Pos) { return LR->find(Pos); } - iterator findInsertPos(Segment S) { - return std::upper_bound(LR->begin(), LR->end(), S.start); - } + iterator findInsertPos(Segment S) { return llvm::upper_bound(*LR, S.start); } }; //===----------------------------------------------------------------------===// @@ -880,8 +877,53 @@ void LiveInterval::clearSubRanges() { SubRanges = nullptr; } -void LiveInterval::refineSubRanges(BumpPtrAllocator &Allocator, - LaneBitmask LaneMask, std::function<void(LiveInterval::SubRange&)> Apply) { +/// For each VNI in \p SR, check whether or not that value defines part +/// of the mask describe by \p LaneMask and if not, remove that value +/// from \p SR. +static void stripValuesNotDefiningMask(unsigned Reg, LiveInterval::SubRange &SR, + LaneBitmask LaneMask, + const SlotIndexes &Indexes, + const TargetRegisterInfo &TRI) { + // Phys reg should not be tracked at subreg level. + // Same for noreg (Reg == 0). + if (!TargetRegisterInfo::isVirtualRegister(Reg) || !Reg) + return; + // Remove the values that don't define those lanes. + SmallVector<VNInfo *, 8> ToBeRemoved; + for (VNInfo *VNI : SR.valnos) { + if (VNI->isUnused()) + continue; + // PHI definitions don't have MI attached, so there is nothing + // we can use to strip the VNI. + if (VNI->isPHIDef()) + continue; + const MachineInstr *MI = Indexes.getInstructionFromIndex(VNI->def); + assert(MI && "Cannot find the definition of a value"); + bool hasDef = false; + for (ConstMIBundleOperands MOI(*MI); MOI.isValid(); ++MOI) { + if (!MOI->isReg() || !MOI->isDef()) + continue; + if (MOI->getReg() != Reg) + continue; + if ((TRI.getSubRegIndexLaneMask(MOI->getSubReg()) & LaneMask).none()) + continue; + hasDef = true; + break; + } + + if (!hasDef) + ToBeRemoved.push_back(VNI); + } + for (VNInfo *VNI : ToBeRemoved) + SR.removeValNo(VNI); + + assert(!SR.empty() && "At least one value should be defined by this mask"); +} + +void LiveInterval::refineSubRanges( + BumpPtrAllocator &Allocator, LaneBitmask LaneMask, + std::function<void(LiveInterval::SubRange &)> Apply, + const SlotIndexes &Indexes, const TargetRegisterInfo &TRI) { LaneBitmask ToApply = LaneMask; for (SubRange &SR : subranges()) { LaneBitmask SRMask = SR.LaneMask; @@ -899,6 +941,10 @@ void LiveInterval::refineSubRanges(BumpPtrAllocator &Allocator, SR.LaneMask = SRMask & ~Matching; // Create a new subrange for the matching part MatchingRange = createSubRangeFrom(Allocator, Matching, SR); + // Now that the subrange is split in half, make sure we + // only keep in the subranges the VNIs that touch the related half. + stripValuesNotDefiningMask(reg, *MatchingRange, Matching, Indexes, TRI); + stripValuesNotDefiningMask(reg, SR, SR.LaneMask, Indexes, TRI); } Apply(*MatchingRange); ToApply &= ~Matching; diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp index 36428e0335f9..43fa8f2d7157 100644 --- a/lib/CodeGen/LiveIntervalUnion.cpp +++ b/lib/CodeGen/LiveIntervalUnion.cpp @@ -1,9 +1,8 @@ //===- LiveIntervalUnion.cpp - Live interval union data structure ---------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/LiveIntervals.cpp b/lib/CodeGen/LiveIntervals.cpp index 471775f8706b..aa85569063b3 100644 --- a/lib/CodeGen/LiveIntervals.cpp +++ b/lib/CodeGen/LiveIntervals.cpp @@ -1,9 +1,8 @@ //===- LiveIntervals.cpp - Live Interval Analysis -------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -901,8 +900,7 @@ bool LiveIntervals::checkRegMaskInterference(LiveInterval &LI, // We are going to enumerate all the register mask slots contained in LI. // Start with a binary search of RegMaskSlots to find a starting point. - ArrayRef<SlotIndex>::iterator SlotI = - std::lower_bound(Slots.begin(), Slots.end(), LiveI->start); + ArrayRef<SlotIndex>::iterator SlotI = llvm::lower_bound(Slots, LiveI->start); ArrayRef<SlotIndex>::iterator SlotE = Slots.end(); // No slots in range, LI begins after the last call. @@ -1371,8 +1369,7 @@ private: void updateRegMaskSlots() { SmallVectorImpl<SlotIndex>::iterator RI = - std::lower_bound(LIS.RegMaskSlots.begin(), LIS.RegMaskSlots.end(), - OldIdx); + llvm::lower_bound(LIS.RegMaskSlots, OldIdx); assert(RI != LIS.RegMaskSlots.end() && *RI == OldIdx.getRegSlot() && "No RegMask at OldIdx."); *RI = NewIdx.getRegSlot(); diff --git a/lib/CodeGen/LivePhysRegs.cpp b/lib/CodeGen/LivePhysRegs.cpp index 619643acb6d3..cd3d248ac878 100644 --- a/lib/CodeGen/LivePhysRegs.cpp +++ b/lib/CodeGen/LivePhysRegs.cpp @@ -1,9 +1,8 @@ //===--- LivePhysRegs.cpp - Live Physical Register Set --------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp index 70e135ab1aff..d670f28df6ba 100644 --- a/lib/CodeGen/LiveRangeCalc.cpp +++ b/lib/CodeGen/LiveRangeCalc.cpp @@ -1,9 +1,8 @@ //===- LiveRangeCalc.cpp - Calculate live ranges --------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -96,10 +95,11 @@ void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) { } LI.refineSubRanges(*Alloc, SubMask, - [&MO, this](LiveInterval::SubRange &SR) { - if (MO.isDef()) - createDeadDef(*Indexes, *Alloc, SR, MO); - }); + [&MO, this](LiveInterval::SubRange &SR) { + if (MO.isDef()) + createDeadDef(*Indexes, *Alloc, SR, MO); + }, + *Indexes, TRI); } // Create the def in the main liverange. We do not have to do this if diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h index 9f226b154a67..11aea5a3b016 100644 --- a/lib/CodeGen/LiveRangeCalc.h +++ b/lib/CodeGen/LiveRangeCalc.h @@ -1,9 +1,8 @@ //===- LiveRangeCalc.h - Calculate live ranges ------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index 8dfe8b68c3af..882e562ba95c 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -1,9 +1,8 @@ //===-- LiveRangeEdit.cpp - Basic tools for editing a register live range -===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -232,6 +231,8 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, return false; LLVM_DEBUG(dbgs() << " folded: " << *FoldMI); LIS.ReplaceMachineInstrInMaps(*UseMI, *FoldMI); + if (UseMI->isCall()) + UseMI->getMF()->updateCallSiteInfo(UseMI, FoldMI); UseMI->eraseFromParent(); DefMI->addRegisterDead(LI->reg, nullptr); Dead.push_back(DefMI); diff --git a/lib/CodeGen/LiveRangeShrink.cpp b/lib/CodeGen/LiveRangeShrink.cpp index f75d513c89f5..8818f1ce0ad9 100644 --- a/lib/CodeGen/LiveRangeShrink.cpp +++ b/lib/CodeGen/LiveRangeShrink.cpp @@ -1,9 +1,8 @@ //===- LiveRangeShrink.cpp - Move instructions to shrink live range -------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // ///===---------------------------------------------------------------------===// /// diff --git a/lib/CodeGen/LiveRangeUtils.h b/lib/CodeGen/LiveRangeUtils.h index bd57609c3d84..0e6bfeb0d4a5 100644 --- a/lib/CodeGen/LiveRangeUtils.h +++ b/lib/CodeGen/LiveRangeUtils.h @@ -1,9 +1,8 @@ //===-- LiveRangeUtils.h - Live Range modification utilities ----*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp index e72977b02675..ce99e5535c25 100644 --- a/lib/CodeGen/LiveRegMatrix.cpp +++ b/lib/CodeGen/LiveRegMatrix.cpp @@ -1,9 +1,8 @@ //===- LiveRegMatrix.cpp - Track register interference --------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/LiveRegUnits.cpp b/lib/CodeGen/LiveRegUnits.cpp index c22681385492..6afb7fb7aa11 100644 --- a/lib/CodeGen/LiveRegUnits.cpp +++ b/lib/CodeGen/LiveRegUnits.cpp @@ -1,9 +1,8 @@ //===- LiveRegUnits.cpp - Register Unit Set -------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -126,13 +125,15 @@ void LiveRegUnits::addPristines(const MachineFunction &MF) { void LiveRegUnits::addLiveOuts(const MachineBasicBlock &MBB) { const MachineFunction &MF = *MBB.getParent(); - if (!MBB.succ_empty()) { - addPristines(MF); - // To get the live-outs we simply merge the live-ins of all successors. - for (const MachineBasicBlock *Succ : MBB.successors()) - addBlockLiveIns(*this, *Succ); - } else if (MBB.isReturnBlock()) { - // For the return block: Add all callee saved registers. + + addPristines(MF); + + // To get the live-outs we simply merge the live-ins of all successors. + for (const MachineBasicBlock *Succ : MBB.successors()) + addBlockLiveIns(*this, *Succ); + + // For the return block: Add all callee saved registers. + if (MBB.isReturnBlock()) { const MachineFrameInfo &MFI = MF.getFrameInfo(); if (MFI.isCalleeSavedInfoValid()) addCalleeSavedRegs(*this, MF); diff --git a/lib/CodeGen/LiveStacks.cpp b/lib/CodeGen/LiveStacks.cpp index 80ecfdb7a507..f55977d72723 100644 --- a/lib/CodeGen/LiveStacks.cpp +++ b/lib/CodeGen/LiveStacks.cpp @@ -1,9 +1,8 @@ //===-- LiveStacks.cpp - Live Stack Slot Analysis -------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index 0b92eab83806..aaff982ef1b0 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -1,9 +1,8 @@ //===-- LiveVariables.cpp - Live Variable Analysis for Machine Code -------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -401,7 +400,7 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { true/*IsImp*/, true/*IsKill*/)); else { MachineOperand *MO = - LastRefOrPartRef->findRegisterDefOperand(Reg, false, TRI); + LastRefOrPartRef->findRegisterDefOperand(Reg, false, false, TRI); bool NeedEC = MO->isEarlyClobber() && MO->getReg() != Reg; // If the last reference is the last def, then it's not used at all. // That is, unless we are currently processing the last reference itself. diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp index 795028e97929..b14d76a585f7 100644 --- a/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -1,9 +1,8 @@ //===- LocalStackSlotAllocation.cpp - Pre-allocate locals to stack slots --===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -200,19 +199,27 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { // Make sure that the stack protector comes before the local variables on the // stack. SmallSet<int, 16> ProtectedObjs; - if (MFI.getStackProtectorIndex() >= 0) { + if (MFI.hasStackProtectorIndex()) { + int StackProtectorFI = MFI.getStackProtectorIndex(); + + // We need to make sure we didn't pre-allocate the stack protector when + // doing this. + // If we already have a stack protector, this will re-assign it to a slot + // that is **not** covering the protected objects. + assert(!MFI.isObjectPreAllocated(StackProtectorFI) && + "Stack protector pre-allocated in LocalStackSlotAllocation"); + StackObjSet LargeArrayObjs; StackObjSet SmallArrayObjs; StackObjSet AddrOfObjs; - AdjustStackOffset(MFI, MFI.getStackProtectorIndex(), Offset, - StackGrowsDown, MaxAlign); + AdjustStackOffset(MFI, StackProtectorFI, Offset, StackGrowsDown, MaxAlign); // Assign large stack objects first. for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) { if (MFI.isDeadObjectIndex(i)) continue; - if (MFI.getStackProtectorIndex() == (int)i) + if (StackProtectorFI == (int)i) continue; switch (MFI.getObjectSSPLayout(i)) { diff --git a/lib/CodeGen/LoopTraversal.cpp b/lib/CodeGen/LoopTraversal.cpp index a02d10e09d7d..9490dfc40a82 100644 --- a/lib/CodeGen/LoopTraversal.cpp +++ b/lib/CodeGen/LoopTraversal.cpp @@ -1,9 +1,8 @@ //===- LoopTraversal.cpp - Optimal basic block traversal order --*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/LowLevelType.cpp b/lib/CodeGen/LowLevelType.cpp index 1c682e72fa49..ca0daa14fedf 100644 --- a/lib/CodeGen/LowLevelType.cpp +++ b/lib/CodeGen/LowLevelType.cpp @@ -1,9 +1,8 @@ //===-- llvm/CodeGen/LowLevelType.cpp -------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/LowerEmuTLS.cpp b/lib/CodeGen/LowerEmuTLS.cpp index 36c1d358a9bd..c8cf6abda4fc 100644 --- a/lib/CodeGen/LowerEmuTLS.cpp +++ b/lib/CodeGen/LowerEmuTLS.cpp @@ -1,9 +1,8 @@ //===- LowerEmuTLS.cpp - Add __emutls_[vt].* variables --------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/MIRCanonicalizerPass.cpp b/lib/CodeGen/MIRCanonicalizerPass.cpp index f17c23619ed5..f49bc854e23f 100644 --- a/lib/CodeGen/MIRCanonicalizerPass.cpp +++ b/lib/CodeGen/MIRCanonicalizerPass.cpp @@ -1,9 +1,8 @@ //===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -105,6 +104,8 @@ INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer", "Rename Register Operands Canonically", false, false) static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) { + if (MF.empty()) + return {}; ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin()); std::vector<MachineBasicBlock *> RPOList; for (auto MBB : RPOT) { @@ -179,6 +180,8 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount, } std::map<MachineInstr *, std::vector<MachineInstr *>> MultiUsers; + std::map<unsigned, MachineInstr *> MultiUserLookup; + unsigned UseToBringDefCloserToCount = 0; std::vector<MachineInstr *> PseudoIdempotentInstructions; std::vector<unsigned> PhysRegDefs; for (auto *II : Instructions) { @@ -254,6 +257,7 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount, if (Delta < Distance) { Distance = Delta; UseToBringDefCloserTo = UseInst; + MultiUserLookup[UseToBringDefCloserToCount++] = UseToBringDefCloserTo; } } @@ -293,11 +297,11 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount, } // Sort the defs for users of multiple defs lexographically. - for (const auto &E : MultiUsers) { + for (const auto &E : MultiUserLookup) { auto UseI = std::find_if(MBB->instr_begin(), MBB->instr_end(), - [&](MachineInstr &MI) -> bool { return &MI == E.first; }); + [&](MachineInstr &MI) -> bool { return &MI == E.second; }); if (UseI == MBB->instr_end()) continue; @@ -305,7 +309,8 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount, LLVM_DEBUG( dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";); Changed |= rescheduleLexographically( - E.second, MBB, [&]() -> MachineBasicBlock::iterator { return UseI; }); + MultiUsers[E.second], MBB, + [&]() -> MachineBasicBlock::iterator { return UseI; }); } PseudoIdempotentInstCount = PseudoIdempotentInstructions.size(); @@ -342,15 +347,23 @@ static bool propagateLocalCopies(MachineBasicBlock *MBB) { continue; if (!TargetRegisterInfo::isVirtualRegister(Src)) continue; + // Not folding COPY instructions if regbankselect has not set the RCs. + // Why are we only considering Register Classes? Because the verifier + // sometimes gets upset if the register classes don't match even if the + // types do. A future patch might add COPY folding for matching types in + // pre-registerbankselect code. + if (!MRI.getRegClassOrNull(Dst)) + continue; if (MRI.getRegClass(Dst) != MRI.getRegClass(Src)) continue; - for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) { - MachineOperand *MO = &*UI; + std::vector<MachineOperand *> Uses; + for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) + Uses.push_back(&*UI); + for (auto *MO : Uses) MO->setReg(Src); - Changed = true; - } + Changed = true; MI->eraseFromParent(); } @@ -474,18 +487,14 @@ class NamedVRegCursor { unsigned virtualVRegNumber; public: - NamedVRegCursor(MachineRegisterInfo &MRI) : MRI(MRI) { - unsigned VRegGapIndex = 0; - const unsigned VR_GAP = (++VRegGapIndex * 1000); - - unsigned I = MRI.createIncompleteVirtualRegister(); - const unsigned E = (((I + VR_GAP) / VR_GAP) + 1) * VR_GAP; - - virtualVRegNumber = E; - } + NamedVRegCursor(MachineRegisterInfo &MRI) : MRI(MRI), virtualVRegNumber(0) {} void SkipVRegs() { unsigned VRegGapIndex = 1; + if (!virtualVRegNumber) { + VRegGapIndex = 0; + virtualVRegNumber = MRI.createIncompleteVirtualRegister(); + } const unsigned VR_GAP = (++VRegGapIndex * 1000); unsigned I = virtualVRegNumber; @@ -501,14 +510,17 @@ public: return virtualVRegNumber; } - unsigned createVirtualRegister(const TargetRegisterClass *RC) { + unsigned createVirtualRegister(unsigned VReg) { + if (!virtualVRegNumber) + SkipVRegs(); std::string S; raw_string_ostream OS(S); OS << "namedVReg" << (virtualVRegNumber & ~0x80000000); OS.flush(); virtualVRegNumber++; - - return MRI.createVirtualRegister(RC, OS.str()); + if (auto RC = MRI.getRegClassOrNull(VReg)) + return MRI.createVirtualRegister(RC, OS.str()); + return MRI.createGenericVirtualRegister(MRI.getType(VReg), OS.str()); } }; } // namespace @@ -558,7 +570,7 @@ GetVRegRenameMap(const std::vector<TypedVReg> &VRegs, continue; } - auto Rename = NVC.createVirtualRegister(MRI.getRegClass(Reg)); + auto Rename = NVC.createVirtualRegister(Reg); if (VRegRenameMap.find(Reg) == VRegRenameMap.end()) { LLVM_DEBUG(dbgs() << "Mapping vreg ";); @@ -735,14 +747,15 @@ static bool runOnBasicBlock(MachineBasicBlock *MBB, // of the MachineBasicBlock so that they are named in the order that we sorted // them alphabetically. Eventually we wont need SkipVRegs because we will use // named vregs instead. - NVC.SkipVRegs(); + if (IdempotentInstCount) + NVC.SkipVRegs(); auto MII = MBB->begin(); for (unsigned i = 0; i < IdempotentInstCount && MII != MBB->end(); ++i) { MachineInstr &MI = *MII++; Changed = true; unsigned vRegToRename = MI.getOperand(0).getReg(); - auto Rename = NVC.createVirtualRegister(MRI.getRegClass(vRegToRename)); + auto Rename = NVC.createVirtualRegister(vRegToRename); std::vector<MachineOperand *> RenameMOs; for (auto &MO : MRI.reg_operands(vRegToRename)) { diff --git a/lib/CodeGen/MIRParser/MILexer.cpp b/lib/CodeGen/MIRParser/MILexer.cpp index 265877c2f5b4..4899bd3f5811 100644 --- a/lib/CodeGen/MIRParser/MILexer.cpp +++ b/lib/CodeGen/MIRParser/MILexer.cpp @@ -1,9 +1,8 @@ //===- MILexer.cpp - Machine instructions lexer implementation ------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -205,6 +204,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("nuw" , MIToken::kw_nuw) .Case("nsw" , MIToken::kw_nsw) .Case("exact" , MIToken::kw_exact) + .Case("fpexcept", MIToken::kw_fpexcept) .Case("debug-location", MIToken::kw_debug_location) .Case("same_value", MIToken::kw_cfi_same_value) .Case("offset", MIToken::kw_cfi_offset) diff --git a/lib/CodeGen/MIRParser/MILexer.h b/lib/CodeGen/MIRParser/MILexer.h index ceff79087d81..0fe3f9f706db 100644 --- a/lib/CodeGen/MIRParser/MILexer.h +++ b/lib/CodeGen/MIRParser/MILexer.h @@ -1,9 +1,8 @@ //===- MILexer.h - Lexer for machine instructions ---------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -74,6 +73,7 @@ struct MIToken { kw_nuw, kw_nsw, kw_exact, + kw_fpexcept, kw_debug_location, kw_cfi_same_value, kw_cfi_offset, diff --git a/lib/CodeGen/MIRParser/MIParser.cpp b/lib/CodeGen/MIRParser/MIParser.cpp index 6f2d8bb53ac8..c0b800a0b870 100644 --- a/lib/CodeGen/MIRParser/MIParser.cpp +++ b/lib/CodeGen/MIRParser/MIParser.cpp @@ -1,9 +1,8 @@ //===- MIParser.cpp - Machine instructions parser implementation ----------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -11,7 +10,7 @@ // //===----------------------------------------------------------------------===// -#include "MIParser.h" +#include "llvm/CodeGen/MIRParser/MIParser.h" #include "MILexer.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/APSInt.h" @@ -27,6 +26,8 @@ #include "llvm/Analysis/MemoryLocation.h" #include "llvm/AsmParser/Parser.h" #include "llvm/AsmParser/SlotMapping.h" +#include "llvm/CodeGen/GlobalISel/RegisterBank.h" +#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/MIRPrinter.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -81,12 +82,242 @@ using namespace llvm; +void PerTargetMIParsingState::setTarget( + const TargetSubtargetInfo &NewSubtarget) { + + // If the subtarget changed, over conservatively assume everything is invalid. + if (&Subtarget == &NewSubtarget) + return; + + Names2InstrOpCodes.clear(); + Names2Regs.clear(); + Names2RegMasks.clear(); + Names2SubRegIndices.clear(); + Names2TargetIndices.clear(); + Names2DirectTargetFlags.clear(); + Names2BitmaskTargetFlags.clear(); + Names2MMOTargetFlags.clear(); + + initNames2RegClasses(); + initNames2RegBanks(); +} + +void PerTargetMIParsingState::initNames2Regs() { + if (!Names2Regs.empty()) + return; + + // The '%noreg' register is the register 0. + Names2Regs.insert(std::make_pair("noreg", 0)); + const auto *TRI = Subtarget.getRegisterInfo(); + assert(TRI && "Expected target register info"); + + for (unsigned I = 0, E = TRI->getNumRegs(); I < E; ++I) { + bool WasInserted = + Names2Regs.insert(std::make_pair(StringRef(TRI->getName(I)).lower(), I)) + .second; + (void)WasInserted; + assert(WasInserted && "Expected registers to be unique case-insensitively"); + } +} + +bool PerTargetMIParsingState::getRegisterByName(StringRef RegName, + unsigned &Reg) { + initNames2Regs(); + auto RegInfo = Names2Regs.find(RegName); + if (RegInfo == Names2Regs.end()) + return true; + Reg = RegInfo->getValue(); + return false; +} + +void PerTargetMIParsingState::initNames2InstrOpCodes() { + if (!Names2InstrOpCodes.empty()) + return; + const auto *TII = Subtarget.getInstrInfo(); + assert(TII && "Expected target instruction info"); + for (unsigned I = 0, E = TII->getNumOpcodes(); I < E; ++I) + Names2InstrOpCodes.insert(std::make_pair(StringRef(TII->getName(I)), I)); +} + +bool PerTargetMIParsingState::parseInstrName(StringRef InstrName, + unsigned &OpCode) { + initNames2InstrOpCodes(); + auto InstrInfo = Names2InstrOpCodes.find(InstrName); + if (InstrInfo == Names2InstrOpCodes.end()) + return true; + OpCode = InstrInfo->getValue(); + return false; +} + +void PerTargetMIParsingState::initNames2RegMasks() { + if (!Names2RegMasks.empty()) + return; + const auto *TRI = Subtarget.getRegisterInfo(); + assert(TRI && "Expected target register info"); + ArrayRef<const uint32_t *> RegMasks = TRI->getRegMasks(); + ArrayRef<const char *> RegMaskNames = TRI->getRegMaskNames(); + assert(RegMasks.size() == RegMaskNames.size()); + for (size_t I = 0, E = RegMasks.size(); I < E; ++I) + Names2RegMasks.insert( + std::make_pair(StringRef(RegMaskNames[I]).lower(), RegMasks[I])); +} + +const uint32_t *PerTargetMIParsingState::getRegMask(StringRef Identifier) { + initNames2RegMasks(); + auto RegMaskInfo = Names2RegMasks.find(Identifier); + if (RegMaskInfo == Names2RegMasks.end()) + return nullptr; + return RegMaskInfo->getValue(); +} + +void PerTargetMIParsingState::initNames2SubRegIndices() { + if (!Names2SubRegIndices.empty()) + return; + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); + for (unsigned I = 1, E = TRI->getNumSubRegIndices(); I < E; ++I) + Names2SubRegIndices.insert( + std::make_pair(TRI->getSubRegIndexName(I), I)); +} + +unsigned PerTargetMIParsingState::getSubRegIndex(StringRef Name) { + initNames2SubRegIndices(); + auto SubRegInfo = Names2SubRegIndices.find(Name); + if (SubRegInfo == Names2SubRegIndices.end()) + return 0; + return SubRegInfo->getValue(); +} + +void PerTargetMIParsingState::initNames2TargetIndices() { + if (!Names2TargetIndices.empty()) + return; + const auto *TII = Subtarget.getInstrInfo(); + assert(TII && "Expected target instruction info"); + auto Indices = TII->getSerializableTargetIndices(); + for (const auto &I : Indices) + Names2TargetIndices.insert(std::make_pair(StringRef(I.second), I.first)); +} + +bool PerTargetMIParsingState::getTargetIndex(StringRef Name, int &Index) { + initNames2TargetIndices(); + auto IndexInfo = Names2TargetIndices.find(Name); + if (IndexInfo == Names2TargetIndices.end()) + return true; + Index = IndexInfo->second; + return false; +} + +void PerTargetMIParsingState::initNames2DirectTargetFlags() { + if (!Names2DirectTargetFlags.empty()) + return; + + const auto *TII = Subtarget.getInstrInfo(); + assert(TII && "Expected target instruction info"); + auto Flags = TII->getSerializableDirectMachineOperandTargetFlags(); + for (const auto &I : Flags) + Names2DirectTargetFlags.insert( + std::make_pair(StringRef(I.second), I.first)); +} + +bool PerTargetMIParsingState::getDirectTargetFlag(StringRef Name, + unsigned &Flag) { + initNames2DirectTargetFlags(); + auto FlagInfo = Names2DirectTargetFlags.find(Name); + if (FlagInfo == Names2DirectTargetFlags.end()) + return true; + Flag = FlagInfo->second; + return false; +} + +void PerTargetMIParsingState::initNames2BitmaskTargetFlags() { + if (!Names2BitmaskTargetFlags.empty()) + return; + + const auto *TII = Subtarget.getInstrInfo(); + assert(TII && "Expected target instruction info"); + auto Flags = TII->getSerializableBitmaskMachineOperandTargetFlags(); + for (const auto &I : Flags) + Names2BitmaskTargetFlags.insert( + std::make_pair(StringRef(I.second), I.first)); +} + +bool PerTargetMIParsingState::getBitmaskTargetFlag(StringRef Name, + unsigned &Flag) { + initNames2BitmaskTargetFlags(); + auto FlagInfo = Names2BitmaskTargetFlags.find(Name); + if (FlagInfo == Names2BitmaskTargetFlags.end()) + return true; + Flag = FlagInfo->second; + return false; +} + +void PerTargetMIParsingState::initNames2MMOTargetFlags() { + if (!Names2MMOTargetFlags.empty()) + return; + + const auto *TII = Subtarget.getInstrInfo(); + assert(TII && "Expected target instruction info"); + auto Flags = TII->getSerializableMachineMemOperandTargetFlags(); + for (const auto &I : Flags) + Names2MMOTargetFlags.insert(std::make_pair(StringRef(I.second), I.first)); +} + +bool PerTargetMIParsingState::getMMOTargetFlag(StringRef Name, + MachineMemOperand::Flags &Flag) { + initNames2MMOTargetFlags(); + auto FlagInfo = Names2MMOTargetFlags.find(Name); + if (FlagInfo == Names2MMOTargetFlags.end()) + return true; + Flag = FlagInfo->second; + return false; +} + +void PerTargetMIParsingState::initNames2RegClasses() { + if (!Names2RegClasses.empty()) + return; + + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); + for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; ++I) { + const auto *RC = TRI->getRegClass(I); + Names2RegClasses.insert( + std::make_pair(StringRef(TRI->getRegClassName(RC)).lower(), RC)); + } +} + +void PerTargetMIParsingState::initNames2RegBanks() { + if (!Names2RegBanks.empty()) + return; + + const RegisterBankInfo *RBI = Subtarget.getRegBankInfo(); + // If the target does not support GlobalISel, we may not have a + // register bank info. + if (!RBI) + return; + + for (unsigned I = 0, E = RBI->getNumRegBanks(); I < E; ++I) { + const auto &RegBank = RBI->getRegBank(I); + Names2RegBanks.insert( + std::make_pair(StringRef(RegBank.getName()).lower(), &RegBank)); + } +} + +const TargetRegisterClass * +PerTargetMIParsingState::getRegClass(StringRef Name) { + auto RegClassInfo = Names2RegClasses.find(Name); + if (RegClassInfo == Names2RegClasses.end()) + return nullptr; + return RegClassInfo->getValue(); +} + +const RegisterBank *PerTargetMIParsingState::getRegBank(StringRef Name) { + auto RegBankInfo = Names2RegBanks.find(Name); + if (RegBankInfo == Names2RegBanks.end()) + return nullptr; + return RegBankInfo->getValue(); +} + PerFunctionMIParsingState::PerFunctionMIParsingState(MachineFunction &MF, - SourceMgr &SM, const SlotMapping &IRSlots, - const Name2RegClassMap &Names2RegClasses, - const Name2RegBankMap &Names2RegBanks) - : MF(MF), SM(&SM), IRSlots(IRSlots), Names2RegClasses(Names2RegClasses), - Names2RegBanks(Names2RegBanks) { + SourceMgr &SM, const SlotMapping &IRSlots, PerTargetMIParsingState &T) + : MF(MF), SM(&SM), IRSlots(IRSlots), Target(T) { } VRegInfo &PerFunctionMIParsingState::getVRegInfo(unsigned Num) { @@ -137,26 +368,10 @@ class MIParser { StringRef Source, CurrentSource; MIToken Token; PerFunctionMIParsingState &PFS; - /// Maps from instruction names to op codes. - StringMap<unsigned> Names2InstrOpCodes; - /// Maps from register names to registers. - StringMap<unsigned> Names2Regs; - /// Maps from register mask names to register masks. - StringMap<const uint32_t *> Names2RegMasks; - /// Maps from subregister names to subregister indices. - StringMap<unsigned> Names2SubRegIndices; /// Maps from slot numbers to function's unnamed basic blocks. DenseMap<unsigned, const BasicBlock *> Slots2BasicBlocks; /// Maps from slot numbers to function's unnamed values. DenseMap<unsigned, const Value *> Slots2Values; - /// Maps from target index names to target indices. - StringMap<int> Names2TargetIndices; - /// Maps from direct target flag names to the direct target flag values. - StringMap<unsigned> Names2DirectTargetFlags; - /// Maps from direct target flag names to the bitmask target flag values. - StringMap<unsigned> Names2BitmaskTargetFlags; - /// Maps from MMO target flag names to MMO target flag values. - StringMap<MachineMemOperand::Flags> Names2MMOTargetFlags; public: MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error, @@ -281,12 +496,6 @@ private: /// Otherwise return false. bool consumeIfPresent(MIToken::TokenKind TokenKind); - void initNames2InstrOpCodes(); - - /// Try to convert an instruction name to an opcode. Return true if the - /// instruction name is invalid. - bool parseInstrName(StringRef InstrName, unsigned &OpCode); - bool parseInstruction(unsigned &OpCode, unsigned &Flags); bool assignRegisterTies(MachineInstr &MI, @@ -295,62 +504,11 @@ private: bool verifyImplicitOperands(ArrayRef<ParsedMachineOperand> Operands, const MCInstrDesc &MCID); - void initNames2Regs(); - - /// Try to convert a register name to a register number. Return true if the - /// register name is invalid. - bool getRegisterByName(StringRef RegName, unsigned &Reg); - - void initNames2RegMasks(); - - /// Check if the given identifier is a name of a register mask. - /// - /// Return null if the identifier isn't a register mask. - const uint32_t *getRegMask(StringRef Identifier); - - void initNames2SubRegIndices(); - - /// Check if the given identifier is a name of a subregister index. - /// - /// Return 0 if the name isn't a subregister index class. - unsigned getSubRegIndex(StringRef Name); - const BasicBlock *getIRBlock(unsigned Slot); const BasicBlock *getIRBlock(unsigned Slot, const Function &F); const Value *getIRValue(unsigned Slot); - void initNames2TargetIndices(); - - /// Try to convert a name of target index to the corresponding target index. - /// - /// Return true if the name isn't a name of a target index. - bool getTargetIndex(StringRef Name, int &Index); - - void initNames2DirectTargetFlags(); - - /// Try to convert a name of a direct target flag to the corresponding - /// target flag. - /// - /// Return true if the name isn't a name of a direct flag. - bool getDirectTargetFlag(StringRef Name, unsigned &Flag); - - void initNames2BitmaskTargetFlags(); - - /// Try to convert a name of a bitmask target flag to the corresponding - /// target flag. - /// - /// Return true if the name isn't a name of a bitmask target flag. - bool getBitmaskTargetFlag(StringRef Name, unsigned &Flag); - - void initNames2MMOTargetFlags(); - - /// Try to convert a name of a MachineMemOperand target flag to the - /// corresponding target flag. - /// - /// Return true if the name isn't a name of a target MMO flag. - bool getMMOTargetFlag(StringRef Name, MachineMemOperand::Flags &Flag); - /// Get or create an MCSymbol for a given name. MCSymbol *getOrCreateMCSymbol(StringRef Name); @@ -978,7 +1136,8 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) { Token.is(MIToken::kw_reassoc) || Token.is(MIToken::kw_nuw) || Token.is(MIToken::kw_nsw) || - Token.is(MIToken::kw_exact)) { + Token.is(MIToken::kw_exact) || + Token.is(MIToken::kw_fpexcept)) { // Mine frame and fast math flags if (Token.is(MIToken::kw_frame_setup)) Flags |= MachineInstr::FrameSetup; @@ -1004,13 +1163,15 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) { Flags |= MachineInstr::NoSWrap; if (Token.is(MIToken::kw_exact)) Flags |= MachineInstr::IsExact; + if (Token.is(MIToken::kw_fpexcept)) + Flags |= MachineInstr::FPExcept; lex(); } if (Token.isNot(MIToken::Identifier)) return error("expected a machine instruction"); StringRef InstrName = Token.stringValue(); - if (parseInstrName(InstrName, OpCode)) + if (PFS.Target.parseInstrName(InstrName, OpCode)) return error(Twine("unknown machine instruction name '") + InstrName + "'"); lex(); return false; @@ -1019,7 +1180,7 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) { bool MIParser::parseNamedRegister(unsigned &Reg) { assert(Token.is(MIToken::NamedRegister) && "Needs NamedRegister token"); StringRef Name = Token.stringValue(); - if (getRegisterByName(Name, Reg)) + if (PFS.Target.getRegisterByName(Name, Reg)) return error(Twine("unknown register name '") + Name + "'"); return false; } @@ -1070,21 +1231,20 @@ bool MIParser::parseRegisterClassOrBank(VRegInfo &RegInfo) { StringRef Name = Token.stringValue(); // Was it a register class? - auto RCNameI = PFS.Names2RegClasses.find(Name); - if (RCNameI != PFS.Names2RegClasses.end()) { + const TargetRegisterClass *RC = PFS.Target.getRegClass(Name); + if (RC) { lex(); - const TargetRegisterClass &RC = *RCNameI->getValue(); switch (RegInfo.Kind) { case VRegInfo::UNKNOWN: case VRegInfo::NORMAL: RegInfo.Kind = VRegInfo::NORMAL; - if (RegInfo.Explicit && RegInfo.D.RC != &RC) { + if (RegInfo.Explicit && RegInfo.D.RC != RC) { const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); return error(Loc, Twine("conflicting register classes, previously: ") + Twine(TRI.getRegClassName(RegInfo.D.RC))); } - RegInfo.D.RC = &RC; + RegInfo.D.RC = RC; RegInfo.Explicit = true; return false; @@ -1098,10 +1258,9 @@ bool MIParser::parseRegisterClassOrBank(VRegInfo &RegInfo) { // Should be a register bank or a generic register. const RegisterBank *RegBank = nullptr; if (Name != "_") { - auto RBNameI = PFS.Names2RegBanks.find(Name); - if (RBNameI == PFS.Names2RegBanks.end()) + RegBank = PFS.Target.getRegBank(Name); + if (!RegBank) return error(Loc, "expected '_', register class, or register bank name"); - RegBank = RBNameI->getValue(); } lex(); @@ -1173,7 +1332,7 @@ bool MIParser::parseSubRegisterIndex(unsigned &SubReg) { if (Token.isNot(MIToken::Identifier)) return error("expected a subregister index after '.'"); auto Name = Token.stringValue(); - SubReg = getSubRegIndex(Name); + SubReg = PFS.Target.getSubRegIndex(Name); if (!SubReg) return error(Twine("use of unknown subregister index '") + Name + "'"); lex(); @@ -1341,6 +1500,19 @@ bool MIParser::parseIRConstant(StringRef::iterator Loc, const Constant *&C) { return false; } +// See LLT implemntation for bit size limits. +static bool verifyScalarSize(uint64_t Size) { + return Size != 0 && isUInt<16>(Size); +} + +static bool verifyVectorElementCount(uint64_t NumElts) { + return NumElts != 0 && isUInt<16>(NumElts); +} + +static bool verifyAddrSpace(uint64_t AddrSpace) { + return isUInt<24>(AddrSpace); +} + bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) { if (Token.range().front() == 's' || Token.range().front() == 'p') { StringRef SizeStr = Token.range().drop_front(); @@ -1349,12 +1521,19 @@ bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) { } if (Token.range().front() == 's') { - Ty = LLT::scalar(APSInt(Token.range().drop_front()).getZExtValue()); + auto ScalarSize = APSInt(Token.range().drop_front()).getZExtValue(); + if (!verifyScalarSize(ScalarSize)) + return error("invalid size for scalar type"); + + Ty = LLT::scalar(ScalarSize); lex(); return false; } else if (Token.range().front() == 'p') { const DataLayout &DL = MF.getDataLayout(); - unsigned AS = APSInt(Token.range().drop_front()).getZExtValue(); + uint64_t AS = APSInt(Token.range().drop_front()).getZExtValue(); + if (!verifyAddrSpace(AS)) + return error("invalid address space number"); + Ty = LLT::pointer(AS, DL.getPointerSizeInBits(AS)); lex(); return false; @@ -1369,6 +1548,9 @@ bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) { if (Token.isNot(MIToken::IntegerLiteral)) return error(Loc, "expected <M x sN> or <M x pA> for vector type"); uint64_t NumElements = Token.integerValue().getZExtValue(); + if (!verifyVectorElementCount(NumElements)) + return error("invalid number of vector elements"); + lex(); if (Token.isNot(MIToken::Identifier) || Token.stringValue() != "x") @@ -1381,11 +1563,17 @@ bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) { if (SizeStr.size() == 0 || !llvm::all_of(SizeStr, isdigit)) return error("expected integers after 's'/'p' type character"); - if (Token.range().front() == 's') - Ty = LLT::scalar(APSInt(Token.range().drop_front()).getZExtValue()); - else if (Token.range().front() == 'p') { + if (Token.range().front() == 's') { + auto ScalarSize = APSInt(Token.range().drop_front()).getZExtValue(); + if (!verifyScalarSize(ScalarSize)) + return error("invalid size for scalar type"); + Ty = LLT::scalar(ScalarSize); + } else if (Token.range().front() == 'p') { const DataLayout &DL = MF.getDataLayout(); - unsigned AS = APSInt(Token.range().drop_front()).getZExtValue(); + uint64_t AS = APSInt(Token.range().drop_front()).getZExtValue(); + if (!verifyAddrSpace(AS)) + return error("invalid address space number"); + Ty = LLT::pointer(AS, DL.getPointerSizeInBits(AS)); } else return error(Loc, "expected <M x sN> or <M x pA> for vector type"); @@ -1625,7 +1813,7 @@ bool MIParser::parseMCSymbolOperand(MachineOperand &Dest) { bool MIParser::parseSubRegisterIndexOperand(MachineOperand &Dest) { assert(Token.is(MIToken::SubRegisterIndex)); StringRef Name = Token.stringValue(); - unsigned SubRegIndex = getSubRegIndex(Token.stringValue()); + unsigned SubRegIndex = PFS.Target.getSubRegIndex(Token.stringValue()); if (SubRegIndex == 0) return error(Twine("unknown subregister index '") + Name + "'"); lex(); @@ -1669,6 +1857,11 @@ bool MIParser::parseDIExpression(MDNode *&Expr) { Elements.push_back(Op); continue; } + if (unsigned Enc = dwarf::getAttributeEncoding(Token.stringValue())) { + lex(); + Elements.push_back(Enc); + continue; + } return error(Twine("invalid DWARF op '") + Token.stringValue() + "'"); } @@ -2100,7 +2293,7 @@ bool MIParser::parseTargetIndexOperand(MachineOperand &Dest) { if (Token.isNot(MIToken::Identifier)) return error("expected the name of the target index"); int Index = 0; - if (getTargetIndex(Token.stringValue(), Index)) + if (PFS.Target.getTargetIndex(Token.stringValue(), Index)) return error("use of undefined target index '" + Token.stringValue() + "'"); lex(); if (expectAndConsume(MIToken::rparen)) @@ -2242,7 +2435,7 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest, case MIToken::Error: return true; case MIToken::Identifier: - if (const auto *RegMask = getRegMask(Token.stringValue())) { + if (const auto *RegMask = PFS.Target.getRegMask(Token.stringValue())) { Dest = MachineOperand::CreateRegMask(RegMask); lex(); break; @@ -2268,8 +2461,8 @@ bool MIParser::parseMachineOperandAndTargetFlags( return true; if (Token.isNot(MIToken::Identifier)) return error("expected the name of the target flag"); - if (getDirectTargetFlag(Token.stringValue(), TF)) { - if (getBitmaskTargetFlag(Token.stringValue(), TF)) + if (PFS.Target.getDirectTargetFlag(Token.stringValue(), TF)) { + if (PFS.Target.getBitmaskTargetFlag(Token.stringValue(), TF)) return error("use of undefined target flag '" + Token.stringValue() + "'"); } @@ -2279,7 +2472,7 @@ bool MIParser::parseMachineOperandAndTargetFlags( if (Token.isNot(MIToken::Identifier)) return error("expected the name of the target flag"); unsigned BitFlag = 0; - if (getBitmaskTargetFlag(Token.stringValue(), BitFlag)) + if (PFS.Target.getBitmaskTargetFlag(Token.stringValue(), BitFlag)) return error("use of undefined target flag '" + Token.stringValue() + "'"); // TODO: Report an error when using a duplicate bit target flag. @@ -2325,6 +2518,10 @@ bool MIParser::parseAlignment(unsigned &Alignment) { if (getUnsigned(Alignment)) return true; lex(); + + if (!isPowerOf2_32(Alignment)) + return error("expected a power-of-2 literal after 'align'"); + return false; } @@ -2436,7 +2633,7 @@ bool MIParser::parseMemoryOperandFlag(MachineMemOperand::Flags &Flags) { break; case MIToken::StringConstant: { MachineMemOperand::Flags TF; - if (getMMOTargetFlag(Token.stringValue(), TF)) + if (PFS.Target.getMMOTargetFlag(Token.stringValue(), TF)) return error("use of undefined target MMO flag '" + Token.stringValue() + "'"); Flags |= TF; @@ -2711,87 +2908,6 @@ bool MIParser::parsePreOrPostInstrSymbol(MCSymbol *&Symbol) { return false; } -void MIParser::initNames2InstrOpCodes() { - if (!Names2InstrOpCodes.empty()) - return; - const auto *TII = MF.getSubtarget().getInstrInfo(); - assert(TII && "Expected target instruction info"); - for (unsigned I = 0, E = TII->getNumOpcodes(); I < E; ++I) - Names2InstrOpCodes.insert(std::make_pair(StringRef(TII->getName(I)), I)); -} - -bool MIParser::parseInstrName(StringRef InstrName, unsigned &OpCode) { - initNames2InstrOpCodes(); - auto InstrInfo = Names2InstrOpCodes.find(InstrName); - if (InstrInfo == Names2InstrOpCodes.end()) - return true; - OpCode = InstrInfo->getValue(); - return false; -} - -void MIParser::initNames2Regs() { - if (!Names2Regs.empty()) - return; - // The '%noreg' register is the register 0. - Names2Regs.insert(std::make_pair("noreg", 0)); - const auto *TRI = MF.getSubtarget().getRegisterInfo(); - assert(TRI && "Expected target register info"); - for (unsigned I = 0, E = TRI->getNumRegs(); I < E; ++I) { - bool WasInserted = - Names2Regs.insert(std::make_pair(StringRef(TRI->getName(I)).lower(), I)) - .second; - (void)WasInserted; - assert(WasInserted && "Expected registers to be unique case-insensitively"); - } -} - -bool MIParser::getRegisterByName(StringRef RegName, unsigned &Reg) { - initNames2Regs(); - auto RegInfo = Names2Regs.find(RegName); - if (RegInfo == Names2Regs.end()) - return true; - Reg = RegInfo->getValue(); - return false; -} - -void MIParser::initNames2RegMasks() { - if (!Names2RegMasks.empty()) - return; - const auto *TRI = MF.getSubtarget().getRegisterInfo(); - assert(TRI && "Expected target register info"); - ArrayRef<const uint32_t *> RegMasks = TRI->getRegMasks(); - ArrayRef<const char *> RegMaskNames = TRI->getRegMaskNames(); - assert(RegMasks.size() == RegMaskNames.size()); - for (size_t I = 0, E = RegMasks.size(); I < E; ++I) - Names2RegMasks.insert( - std::make_pair(StringRef(RegMaskNames[I]).lower(), RegMasks[I])); -} - -const uint32_t *MIParser::getRegMask(StringRef Identifier) { - initNames2RegMasks(); - auto RegMaskInfo = Names2RegMasks.find(Identifier); - if (RegMaskInfo == Names2RegMasks.end()) - return nullptr; - return RegMaskInfo->getValue(); -} - -void MIParser::initNames2SubRegIndices() { - if (!Names2SubRegIndices.empty()) - return; - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - for (unsigned I = 1, E = TRI->getNumSubRegIndices(); I < E; ++I) - Names2SubRegIndices.insert( - std::make_pair(StringRef(TRI->getSubRegIndexName(I)).lower(), I)); -} - -unsigned MIParser::getSubRegIndex(StringRef Name) { - initNames2SubRegIndices(); - auto SubRegInfo = Names2SubRegIndices.find(Name); - if (SubRegInfo == Names2SubRegIndices.end()) - return 0; - return SubRegInfo->getValue(); -} - static void initSlots2BasicBlocks( const Function &F, DenseMap<unsigned, const BasicBlock *> &Slots2BasicBlocks) { @@ -2861,86 +2977,6 @@ const Value *MIParser::getIRValue(unsigned Slot) { return ValueInfo->second; } -void MIParser::initNames2TargetIndices() { - if (!Names2TargetIndices.empty()) - return; - const auto *TII = MF.getSubtarget().getInstrInfo(); - assert(TII && "Expected target instruction info"); - auto Indices = TII->getSerializableTargetIndices(); - for (const auto &I : Indices) - Names2TargetIndices.insert(std::make_pair(StringRef(I.second), I.first)); -} - -bool MIParser::getTargetIndex(StringRef Name, int &Index) { - initNames2TargetIndices(); - auto IndexInfo = Names2TargetIndices.find(Name); - if (IndexInfo == Names2TargetIndices.end()) - return true; - Index = IndexInfo->second; - return false; -} - -void MIParser::initNames2DirectTargetFlags() { - if (!Names2DirectTargetFlags.empty()) - return; - const auto *TII = MF.getSubtarget().getInstrInfo(); - assert(TII && "Expected target instruction info"); - auto Flags = TII->getSerializableDirectMachineOperandTargetFlags(); - for (const auto &I : Flags) - Names2DirectTargetFlags.insert( - std::make_pair(StringRef(I.second), I.first)); -} - -bool MIParser::getDirectTargetFlag(StringRef Name, unsigned &Flag) { - initNames2DirectTargetFlags(); - auto FlagInfo = Names2DirectTargetFlags.find(Name); - if (FlagInfo == Names2DirectTargetFlags.end()) - return true; - Flag = FlagInfo->second; - return false; -} - -void MIParser::initNames2BitmaskTargetFlags() { - if (!Names2BitmaskTargetFlags.empty()) - return; - const auto *TII = MF.getSubtarget().getInstrInfo(); - assert(TII && "Expected target instruction info"); - auto Flags = TII->getSerializableBitmaskMachineOperandTargetFlags(); - for (const auto &I : Flags) - Names2BitmaskTargetFlags.insert( - std::make_pair(StringRef(I.second), I.first)); -} - -bool MIParser::getBitmaskTargetFlag(StringRef Name, unsigned &Flag) { - initNames2BitmaskTargetFlags(); - auto FlagInfo = Names2BitmaskTargetFlags.find(Name); - if (FlagInfo == Names2BitmaskTargetFlags.end()) - return true; - Flag = FlagInfo->second; - return false; -} - -void MIParser::initNames2MMOTargetFlags() { - if (!Names2MMOTargetFlags.empty()) - return; - const auto *TII = MF.getSubtarget().getInstrInfo(); - assert(TII && "Expected target instruction info"); - auto Flags = TII->getSerializableMachineMemOperandTargetFlags(); - for (const auto &I : Flags) - Names2MMOTargetFlags.insert( - std::make_pair(StringRef(I.second), I.first)); -} - -bool MIParser::getMMOTargetFlag(StringRef Name, - MachineMemOperand::Flags &Flag) { - initNames2MMOTargetFlags(); - auto FlagInfo = Names2MMOTargetFlags.find(Name); - if (FlagInfo == Names2MMOTargetFlags.end()) - return true; - Flag = FlagInfo->second; - return false; -} - MCSymbol *MIParser::getOrCreateMCSymbol(StringRef Name) { // FIXME: Currently we can't recognize temporary or local symbols and call all // of the appropriate forms to create them. However, this handles basic cases diff --git a/lib/CodeGen/MIRParser/MIParser.h b/lib/CodeGen/MIRParser/MIParser.h deleted file mode 100644 index b06ceb21b740..000000000000 --- a/lib/CodeGen/MIRParser/MIParser.h +++ /dev/null @@ -1,125 +0,0 @@ -//===- MIParser.h - Machine Instructions Parser -----------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the function that parses the machine instructions. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_CODEGEN_MIRPARSER_MIPARSER_H -#define LLVM_LIB_CODEGEN_MIRPARSER_MIPARSER_H - -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/Support/Allocator.h" - -namespace llvm { - -class MachineBasicBlock; -class MachineFunction; -class MDNode; -class RegisterBank; -struct SlotMapping; -class SMDiagnostic; -class SourceMgr; -class StringRef; -class TargetRegisterClass; - -struct VRegInfo { - enum uint8_t { - UNKNOWN, NORMAL, GENERIC, REGBANK - } Kind = UNKNOWN; - bool Explicit = false; ///< VReg was explicitly specified in the .mir file. - union { - const TargetRegisterClass *RC; - const RegisterBank *RegBank; - } D; - unsigned VReg; - unsigned PreferredReg = 0; -}; - -using Name2RegClassMap = StringMap<const TargetRegisterClass *>; -using Name2RegBankMap = StringMap<const RegisterBank *>; - -struct PerFunctionMIParsingState { - BumpPtrAllocator Allocator; - MachineFunction &MF; - SourceMgr *SM; - const SlotMapping &IRSlots; - const Name2RegClassMap &Names2RegClasses; - const Name2RegBankMap &Names2RegBanks; - - DenseMap<unsigned, MachineBasicBlock *> MBBSlots; - DenseMap<unsigned, VRegInfo*> VRegInfos; - StringMap<VRegInfo*> VRegInfosNamed; - DenseMap<unsigned, int> FixedStackObjectSlots; - DenseMap<unsigned, int> StackObjectSlots; - DenseMap<unsigned, unsigned> ConstantPoolSlots; - DenseMap<unsigned, unsigned> JumpTableSlots; - - PerFunctionMIParsingState(MachineFunction &MF, SourceMgr &SM, - const SlotMapping &IRSlots, - const Name2RegClassMap &Names2RegClasses, - const Name2RegBankMap &Names2RegBanks); - - VRegInfo &getVRegInfo(unsigned Num); - VRegInfo &getVRegInfoNamed(StringRef RegName); -}; - -/// Parse the machine basic block definitions, and skip the machine -/// instructions. -/// -/// This function runs the first parsing pass on the machine function's body. -/// It parses only the machine basic block definitions and creates the machine -/// basic blocks in the given machine function. -/// -/// The machine instructions aren't parsed during the first pass because all -/// the machine basic blocks aren't defined yet - this makes it impossible to -/// resolve the machine basic block references. -/// -/// Return true if an error occurred. -bool parseMachineBasicBlockDefinitions(PerFunctionMIParsingState &PFS, - StringRef Src, SMDiagnostic &Error); - -/// Parse the machine instructions. -/// -/// This function runs the second parsing pass on the machine function's body. -/// It skips the machine basic block definitions and parses only the machine -/// instructions and basic block attributes like liveins and successors. -/// -/// The second parsing pass assumes that the first parsing pass already ran -/// on the given source string. -/// -/// Return true if an error occurred. -bool parseMachineInstructions(PerFunctionMIParsingState &PFS, StringRef Src, - SMDiagnostic &Error); - -bool parseMBBReference(PerFunctionMIParsingState &PFS, - MachineBasicBlock *&MBB, StringRef Src, - SMDiagnostic &Error); - -bool parseRegisterReference(PerFunctionMIParsingState &PFS, - unsigned &Reg, StringRef Src, - SMDiagnostic &Error); - -bool parseNamedRegisterReference(PerFunctionMIParsingState &PFS, unsigned &Reg, - StringRef Src, SMDiagnostic &Error); - -bool parseVirtualRegisterReference(PerFunctionMIParsingState &PFS, - VRegInfo *&Info, StringRef Src, - SMDiagnostic &Error); - -bool parseStackObjectReference(PerFunctionMIParsingState &PFS, int &FI, - StringRef Src, SMDiagnostic &Error); - -bool parseMDNode(PerFunctionMIParsingState &PFS, MDNode *&Node, StringRef Src, - SMDiagnostic &Error); - -} // end namespace llvm - -#endif // LLVM_LIB_CODEGEN_MIRPARSER_MIPARSER_H diff --git a/lib/CodeGen/MIRParser/MIRParser.cpp b/lib/CodeGen/MIRParser/MIRParser.cpp index 00da92a92ec6..b242934def80 100644 --- a/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/lib/CodeGen/MIRParser/MIRParser.cpp @@ -1,9 +1,8 @@ //===- MIRParser.cpp - MIR serialization format parser implementation -----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -13,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MIRParser/MIRParser.h" -#include "MIParser.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringMap.h" @@ -22,12 +20,14 @@ #include "llvm/AsmParser/SlotMapping.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/MIRParser/MIParser.h" #include "llvm/CodeGen/MIRYamlMapping.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DiagnosticInfo.h" @@ -40,6 +40,7 @@ #include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/YAMLTraits.h" +#include "llvm/Target/TargetMachine.h" #include <memory> using namespace llvm; @@ -54,10 +55,8 @@ class MIRParserImpl { StringRef Filename; LLVMContext &Context; SlotMapping IRSlots; - /// Maps from register class names to register classes. - Name2RegClassMap Names2RegClasses; - /// Maps from register bank names to register banks. - Name2RegBankMap Names2RegBanks; + std::unique_ptr<PerTargetMIParsingState> Target; + /// True when the MIR file doesn't have LLVM IR. Dummy IR functions are /// created and inserted into the given module when this is true. bool NoLLVMIR = false; @@ -117,6 +116,9 @@ public: bool initializeFrameInfo(PerFunctionMIParsingState &PFS, const yaml::MachineFunction &YamlMF); + bool initializeCallSiteInfo(PerFunctionMIParsingState &PFS, + const yaml::MachineFunction &YamlMF); + bool parseCalleeSavedRegister(PerFunctionMIParsingState &PFS, std::vector<CalleeSavedInfo> &CSIInfo, const yaml::StringValue &RegisterSource, @@ -151,20 +153,6 @@ private: SMDiagnostic diagFromBlockStringDiag(const SMDiagnostic &Error, SMRange SourceRange); - void initNames2RegClasses(const MachineFunction &MF); - void initNames2RegBanks(const MachineFunction &MF); - - /// Check if the given identifier is a name of a register class. - /// - /// Return null if the name isn't a register class. - const TargetRegisterClass *getRegClass(const MachineFunction &MF, - StringRef Name); - - /// Check if the given identifier is a name of a register bank. - /// - /// Return null if the name isn't a register bank. - const RegisterBank *getRegBank(const MachineFunction &MF, StringRef Name); - void computeFunctionProperties(MachineFunction &MF); }; @@ -271,8 +259,9 @@ bool MIRParserImpl::parseMachineFunctions(Module &M, MachineModuleInfo &MMI) { /// Create an empty function with the given name. static Function *createDummyFunction(StringRef Name, Module &M) { auto &Context = M.getContext(); - Function *F = cast<Function>(M.getOrInsertFunction( - Name, FunctionType::get(Type::getVoidTy(Context), false))); + Function *F = + Function::Create(FunctionType::get(Type::getVoidTy(Context), false), + Function::ExternalLinkage, Name, M); BasicBlock *BB = BasicBlock::Create(Context, "entry", F); new UnreachableInst(Context, BB); return F; @@ -282,6 +271,11 @@ bool MIRParserImpl::parseMachineFunction(Module &M, MachineModuleInfo &MMI) { // Parse the yaml. yaml::MachineFunction YamlMF; yaml::EmptyContext Ctx; + + const LLVMTargetMachine &TM = MMI.getTarget(); + YamlMF.MachineFuncInfo = std::unique_ptr<yaml::MachineFunctionInfo>( + TM.createDefaultFuncInfoYAML()); + yaml::yamlize(In, YamlMF, false, Ctx); if (In.error()) return true; @@ -346,12 +340,58 @@ void MIRParserImpl::computeFunctionProperties(MachineFunction &MF) { Properties.set(MachineFunctionProperties::Property::NoVRegs); } +bool MIRParserImpl::initializeCallSiteInfo( + PerFunctionMIParsingState &PFS, const yaml::MachineFunction &YamlMF) { + MachineFunction &MF = PFS.MF; + SMDiagnostic Error; + const LLVMTargetMachine &TM = MF.getTarget(); + for (auto YamlCSInfo : YamlMF.CallSitesInfo) { + yaml::CallSiteInfo::MachineInstrLoc MILoc = YamlCSInfo.CallLocation; + if (MILoc.BlockNum >= MF.size()) + return error(Twine(MF.getName()) + + Twine(" call instruction block out of range.") + + " Unable to reference bb:" + Twine(MILoc.BlockNum)); + auto CallB = std::next(MF.begin(), MILoc.BlockNum); + if (MILoc.Offset >= CallB->size()) + return error(Twine(MF.getName()) + + Twine(" call instruction offset out of range.") + + "Unable to reference instruction at bb: " + + Twine(MILoc.BlockNum) + " at offset:" + Twine(MILoc.Offset)); + auto CallI = std::next(CallB->begin(), MILoc.Offset); + if (!CallI->isCall()) + return error(Twine(MF.getName()) + + Twine(" call site info should reference call " + "instruction. Instruction at bb:") + + Twine(MILoc.BlockNum) + " at offset:" + Twine(MILoc.Offset) + + " is not a call instruction"); + MachineFunction::CallSiteInfo CSInfo; + for (auto ArgRegPair : YamlCSInfo.ArgForwardingRegs) { + unsigned Reg = 0; + if (parseNamedRegisterReference(PFS, Reg, ArgRegPair.Reg.Value, Error)) + return error(Error, ArgRegPair.Reg.SourceRange); + CSInfo.emplace_back(Reg, ArgRegPair.ArgNo); + } + + if (TM.Options.EnableDebugEntryValues) + MF.addCallArgsForwardingRegs(&*CallI, std::move(CSInfo)); + } + + if (YamlMF.CallSitesInfo.size() && !TM.Options.EnableDebugEntryValues) + return error(Twine("Call site info provided but not used")); + return false; +} + bool MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF, MachineFunction &MF) { // TODO: Recreate the machine function. - initNames2RegClasses(MF); - initNames2RegBanks(MF); + if (Target) { + // Avoid clearing state if we're using the same subtarget again. + Target->setTarget(MF.getSubtarget()); + } else { + Target.reset(new PerTargetMIParsingState(MF.getSubtarget())); + } + if (YamlMF.Alignment) MF.setAlignment(YamlMF.Alignment); MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice); @@ -367,8 +407,7 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF, if (YamlMF.FailedISel) MF.getProperties().set(MachineFunctionProperties::Property::FailedISel); - PerFunctionMIParsingState PFS(MF, SM, IRSlots, Names2RegClasses, - Names2RegBanks); + PerFunctionMIParsingState PFS(MF, SM, IRSlots, *Target); if (parseRegisterInfo(PFS, YamlMF)) return true; if (!YamlMF.Constants.empty()) { @@ -419,8 +458,32 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF, if (setupRegisterInfo(PFS, YamlMF)) return true; + if (YamlMF.MachineFuncInfo) { + const LLVMTargetMachine &TM = MF.getTarget(); + // Note this is called after the initial constructor of the + // MachineFunctionInfo based on the MachineFunction, which may depend on the + // IR. + + SMRange SrcRange; + if (TM.parseMachineFunctionInfo(*YamlMF.MachineFuncInfo, PFS, Error, + SrcRange)) { + return error(Error, SrcRange); + } + } + + // Set the reserved registers after parsing MachineFuncInfo. The target may + // have been recording information used to select the reserved registers + // there. + // FIXME: This is a temporary workaround until the reserved registers can be + // serialized. + MachineRegisterInfo &MRI = MF.getRegInfo(); + MRI.freezeReservedRegs(MF); + computeFunctionProperties(MF); + if (initializeCallSiteInfo(PFS, YamlMF)) + return false; + MF.getSubtarget().mirFileLoaded(MF); MF.verify(); @@ -449,12 +512,12 @@ bool MIRParserImpl::parseRegisterInfo(PerFunctionMIParsingState &PFS, Info.Kind = VRegInfo::GENERIC; Info.D.RegBank = nullptr; } else { - const auto *RC = getRegClass(MF, VReg.Class.Value); + const auto *RC = Target->getRegClass(VReg.Class.Value); if (RC) { Info.Kind = VRegInfo::NORMAL; Info.D.RC = RC; } else { - const RegisterBank *RegBank = getRegBank(MF, VReg.Class.Value); + const RegisterBank *RegBank = Target->getRegBank(VReg.Class.Value); if (!RegBank) return error( VReg.Class.SourceRange.Start, @@ -557,9 +620,6 @@ bool MIRParserImpl::setupRegisterInfo(const PerFunctionMIParsingState &PFS, } } - // FIXME: This is a temporary workaround until the reserved registers can be - // serialized. - MRI.freezeReservedRegs(MF); return Error; } @@ -567,6 +627,7 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS, const yaml::MachineFunction &YamlMF) { MachineFunction &MF = PFS.MF; MachineFrameInfo &MFI = MF.getFrameInfo(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); const Function &F = MF.getFunction(); const yaml::MachineFrameInfo &YamlMFI = YamlMF.FrameInfo; MFI.setFrameAddressIsTaken(YamlMFI.IsFrameAddressTaken); @@ -608,8 +669,12 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS, Object.IsImmutable, Object.IsAliased); else ObjectIdx = MFI.CreateFixedSpillStackObject(Object.Size, Object.Offset); - MFI.setObjectAlignment(ObjectIdx, Object.Alignment); + + if (!TFI->isSupportedStackID(Object.StackID)) + return error(Object.ID.SourceRange.Start, + Twine("StackID is not supported by target")); MFI.setStackID(ObjectIdx, Object.StackID); + MFI.setObjectAlignment(ObjectIdx, Object.Alignment); if (!PFS.FixedStackObjectSlots.insert(std::make_pair(Object.ID.Value, ObjectIdx)) .second) @@ -637,14 +702,17 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS, "' isn't defined in the function '" + F.getName() + "'"); } + if (!TFI->isSupportedStackID(Object.StackID)) + return error(Object.ID.SourceRange.Start, + Twine("StackID is not supported by target")); if (Object.Type == yaml::MachineStackObject::VariableSized) ObjectIdx = MFI.CreateVariableSizedObject(Object.Alignment, Alloca); else ObjectIdx = MFI.CreateStackObject( Object.Size, Object.Alignment, - Object.Type == yaml::MachineStackObject::SpillSlot, Alloca); + Object.Type == yaml::MachineStackObject::SpillSlot, Alloca, + Object.StackID); MFI.setObjectOffset(ObjectIdx, Object.Offset); - MFI.setStackID(ObjectIdx, Object.StackID); if (!PFS.StackObjectSlots.insert(std::make_pair(Object.ID.Value, ObjectIdx)) .second) @@ -844,48 +912,6 @@ SMDiagnostic MIRParserImpl::diagFromBlockStringDiag(const SMDiagnostic &Error, Error.getFixIts()); } -void MIRParserImpl::initNames2RegClasses(const MachineFunction &MF) { - if (!Names2RegClasses.empty()) - return; - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; ++I) { - const auto *RC = TRI->getRegClass(I); - Names2RegClasses.insert( - std::make_pair(StringRef(TRI->getRegClassName(RC)).lower(), RC)); - } -} - -void MIRParserImpl::initNames2RegBanks(const MachineFunction &MF) { - if (!Names2RegBanks.empty()) - return; - const RegisterBankInfo *RBI = MF.getSubtarget().getRegBankInfo(); - // If the target does not support GlobalISel, we may not have a - // register bank info. - if (!RBI) - return; - for (unsigned I = 0, E = RBI->getNumRegBanks(); I < E; ++I) { - const auto &RegBank = RBI->getRegBank(I); - Names2RegBanks.insert( - std::make_pair(StringRef(RegBank.getName()).lower(), &RegBank)); - } -} - -const TargetRegisterClass *MIRParserImpl::getRegClass(const MachineFunction &MF, - StringRef Name) { - auto RegClassInfo = Names2RegClasses.find(Name); - if (RegClassInfo == Names2RegClasses.end()) - return nullptr; - return RegClassInfo->getValue(); -} - -const RegisterBank *MIRParserImpl::getRegBank(const MachineFunction &MF, - StringRef Name) { - auto RegBankInfo = Names2RegBanks.find(Name); - if (RegBankInfo == Names2RegBanks.end()) - return nullptr; - return RegBankInfo->getValue(); -} - MIRParser::MIRParser(std::unique_ptr<MIRParserImpl> Impl) : Impl(std::move(Impl)) {} diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp index d9dcc428943f..0a95a0ced0f5 100644 --- a/lib/CodeGen/MIRPrinter.cpp +++ b/lib/CodeGen/MIRPrinter.cpp @@ -1,9 +1,8 @@ //===- MIRPrinter.cpp - MIR serialization format printer ------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -36,6 +35,7 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" @@ -129,6 +129,9 @@ public: const MachineJumpTableInfo &JTI); void convertStackObjects(yaml::MachineFunction &YMF, const MachineFunction &MF, ModuleSlotTracker &MST); + void convertCallSiteObjects(yaml::MachineFunction &YMF, + const MachineFunction &MF, + ModuleSlotTracker &MST); private: void initRegisterMaskIds(const MachineFunction &MF); @@ -212,10 +215,16 @@ void MIRPrinter::print(const MachineFunction &MF) { MST.incorporateFunction(MF.getFunction()); convert(MST, YamlMF.FrameInfo, MF.getFrameInfo()); convertStackObjects(YamlMF, MF, MST); + convertCallSiteObjects(YamlMF, MF, MST); if (const auto *ConstantPool = MF.getConstantPool()) convert(YamlMF, *ConstantPool); if (const auto *JumpTableInfo = MF.getJumpTableInfo()) convert(MST, YamlMF.JumpTableInfo, *JumpTableInfo); + + const TargetMachine &TM = MF.getTarget(); + YamlMF.MachineFuncInfo = + std::unique_ptr<yaml::MachineFunctionInfo>(TM.convertFuncInfoToYAML(MF)); + raw_string_ostream StrOS(YamlMF.Body.Value.Value); bool IsNewlineNeeded = false; for (const auto &MBB : MF) { @@ -352,7 +361,7 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF, const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); // Process fixed stack objects. unsigned ID = 0; - for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) { + for (int I = MFI.getObjectIndexBegin(); I < 0; ++I, ++ID) { if (MFI.isDeadObjectIndex(I)) continue; @@ -364,17 +373,17 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF, YamlObject.Offset = MFI.getObjectOffset(I); YamlObject.Size = MFI.getObjectSize(I); YamlObject.Alignment = MFI.getObjectAlignment(I); - YamlObject.StackID = MFI.getStackID(I); + YamlObject.StackID = (TargetStackID::Value)MFI.getStackID(I); YamlObject.IsImmutable = MFI.isImmutableObjectIndex(I); YamlObject.IsAliased = MFI.isAliasedObjectIndex(I); YMF.FixedStackObjects.push_back(YamlObject); StackObjectOperandMapping.insert( - std::make_pair(I, FrameIndexOperand::createFixed(ID++))); + std::make_pair(I, FrameIndexOperand::createFixed(ID))); } // Process ordinary stack objects. ID = 0; - for (int I = 0, E = MFI.getObjectIndexEnd(); I < E; ++I) { + for (int I = 0, E = MFI.getObjectIndexEnd(); I < E; ++I, ++ID) { if (MFI.isDeadObjectIndex(I)) continue; @@ -391,14 +400,17 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF, YamlObject.Offset = MFI.getObjectOffset(I); YamlObject.Size = MFI.getObjectSize(I); YamlObject.Alignment = MFI.getObjectAlignment(I); - YamlObject.StackID = MFI.getStackID(I); + YamlObject.StackID = (TargetStackID::Value)MFI.getStackID(I); YMF.StackObjects.push_back(YamlObject); StackObjectOperandMapping.insert(std::make_pair( - I, FrameIndexOperand::create(YamlObject.Name.Value, ID++))); + I, FrameIndexOperand::create(YamlObject.Name.Value, ID))); } for (const auto &CSInfo : MFI.getCalleeSavedInfo()) { + if (!CSInfo.isSpilledToReg() && MFI.isDeadObjectIndex(CSInfo.getFrameIdx())) + continue; + yaml::StringValue Reg; printRegMIR(CSInfo.getReg(), Reg, TRI); if (!CSInfo.isSpilledToReg()) { @@ -452,6 +464,39 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF, } } +void MIRPrinter::convertCallSiteObjects(yaml::MachineFunction &YMF, + const MachineFunction &MF, + ModuleSlotTracker &MST) { + const auto *TRI = MF.getSubtarget().getRegisterInfo(); + for (auto CSInfo : MF.getCallSitesInfo()) { + yaml::CallSiteInfo YmlCS; + yaml::CallSiteInfo::MachineInstrLoc CallLocation; + + // Prepare instruction position. + MachineBasicBlock::const_iterator CallI = CSInfo.first->getIterator(); + CallLocation.BlockNum = CallI->getParent()->getNumber(); + // Get call instruction offset from the beginning of block. + CallLocation.Offset = std::distance(CallI->getParent()->begin(), CallI); + YmlCS.CallLocation = CallLocation; + // Construct call arguments and theirs forwarding register info. + for (auto ArgReg : CSInfo.second) { + yaml::CallSiteInfo::ArgRegPair YmlArgReg; + YmlArgReg.ArgNo = ArgReg.ArgNo; + printRegMIR(ArgReg.Reg, YmlArgReg.Reg, TRI); + YmlCS.ArgForwardingRegs.emplace_back(YmlArgReg); + } + YMF.CallSitesInfo.push_back(YmlCS); + } + + // Sort call info by position of call instructions. + llvm::sort(YMF.CallSitesInfo.begin(), YMF.CallSitesInfo.end(), + [](yaml::CallSiteInfo A, yaml::CallSiteInfo B) { + if (A.CallLocation.BlockNum == B.CallLocation.BlockNum) + return A.CallLocation.Offset < B.CallLocation.Offset; + return A.CallLocation.BlockNum < B.CallLocation.BlockNum; + }); +} + void MIRPrinter::convert(yaml::MachineFunction &MF, const MachineConstantPool &ConstantPool) { unsigned ID = 0; @@ -706,6 +751,8 @@ void MIPrinter::print(const MachineInstr &MI) { OS << "nsw "; if (MI.getFlag(MachineInstr::IsExact)) OS << "exact "; + if (MI.getFlag(MachineInstr::FPExcept)) + OS << "fpexcept "; OS << TII->getName(MI.getOpcode()); if (I < E) diff --git a/lib/CodeGen/MIRPrintingPass.cpp b/lib/CodeGen/MIRPrintingPass.cpp index 1a8427430ea0..e032fffd658c 100644 --- a/lib/CodeGen/MIRPrintingPass.cpp +++ b/lib/CodeGen/MIRPrintingPass.cpp @@ -1,9 +1,8 @@ //===- MIRPrintingPass.cpp - Pass that prints out using the MIR format ----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 03771bc5dae1..4d29e883d879 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -1,9 +1,8 @@ //===-- llvm/CodeGen/MachineBasicBlock.cpp ----------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -133,8 +132,12 @@ void ilist_traits<MachineInstr>::transferNodesFromList(ilist_traits &FromList, instr_iterator First, instr_iterator Last) { assert(Parent->getParent() == FromList.Parent->getParent() && - "MachineInstr parent mismatch!"); - assert(this != &FromList && "Called without a real transfer..."); + "cannot transfer MachineInstrs between MachineFunctions"); + + // If it's within the same BB, there's nothing to do. + if (this == &FromList) + return; + assert(Parent != FromList.Parent && "Two lists have the same parent?"); // If splicing between two blocks within the same function, just update the @@ -995,7 +998,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, while (!KilledRegs.empty()) { unsigned Reg = KilledRegs.pop_back_val(); for (instr_iterator I = instr_end(), E = instr_begin(); I != E;) { - if (!(--I)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false)) + if (!(--I)->addRegisterKilled(Reg, TRI, /* AddIfNotFound= */ false)) continue; if (TargetRegisterInfo::isVirtualRegister(Reg)) LV->getVarInfo(Reg).Kills.push_back(&*I); diff --git a/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/lib/CodeGen/MachineBlockFrequencyInfo.cpp index 3459a9f71a73..53a35b7e89c2 100644 --- a/lib/CodeGen/MachineBlockFrequencyInfo.cpp +++ b/lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -1,9 +1,8 @@ //===- MachineBlockFrequencyInfo.cpp - MBB Frequency Analysis -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 4fee9c4ea027..639b588766a1 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -1,9 +1,8 @@ //===- MachineBlockPlacement.cpp - Basic Block Code Layout optimization ---===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -452,15 +451,28 @@ class MachineBlockPlacement : public MachineFunctionPass { void buildChain(const MachineBasicBlock *BB, BlockChain &Chain, BlockFilterSet *BlockFilter = nullptr); + bool canMoveBottomBlockToTop(const MachineBasicBlock *BottomBlock, + const MachineBasicBlock *OldTop); + bool hasViableTopFallthrough(const MachineBasicBlock *Top, + const BlockFilterSet &LoopBlockSet); + BlockFrequency TopFallThroughFreq(const MachineBasicBlock *Top, + const BlockFilterSet &LoopBlockSet); + BlockFrequency FallThroughGains(const MachineBasicBlock *NewTop, + const MachineBasicBlock *OldTop, + const MachineBasicBlock *ExitBB, + const BlockFilterSet &LoopBlockSet); + MachineBasicBlock *findBestLoopTopHelper(MachineBasicBlock *OldTop, + const MachineLoop &L, const BlockFilterSet &LoopBlockSet); MachineBasicBlock *findBestLoopTop( const MachineLoop &L, const BlockFilterSet &LoopBlockSet); MachineBasicBlock *findBestLoopExit( - const MachineLoop &L, const BlockFilterSet &LoopBlockSet); + const MachineLoop &L, const BlockFilterSet &LoopBlockSet, + BlockFrequency &ExitFreq); BlockFilterSet collectLoopBlockSet(const MachineLoop &L); void buildLoopChains(const MachineLoop &L); void rotateLoop( BlockChain &LoopChain, const MachineBasicBlock *ExitingBB, - const BlockFilterSet &LoopBlockSet); + BlockFrequency ExitFreq, const BlockFilterSet &LoopBlockSet); void rotateLoopWithProfile( BlockChain &LoopChain, const MachineLoop &L, const BlockFilterSet &LoopBlockSet); @@ -938,8 +950,8 @@ MachineBlockPlacement::getBestNonConflictingEdges( // Sort for highest frequency. auto Cmp = [](WeightedEdge A, WeightedEdge B) { return A.Weight > B.Weight; }; - std::stable_sort(Edges[0].begin(), Edges[0].end(), Cmp); - std::stable_sort(Edges[1].begin(), Edges[1].end(), Cmp); + llvm::stable_sort(Edges[0], Cmp); + llvm::stable_sort(Edges[1], Cmp); auto BestA = Edges[0].begin(); auto BestB = Edges[1].begin(); // Arrange for the correct answer to be in BestA and BestB @@ -1527,15 +1539,12 @@ MachineBlockPlacement::selectBestSuccessor( // profitable than BestSucc. Position is important because we preserve it and // prefer first best match. Here we aren't comparing in order, so we capture // the position instead. - if (DupCandidates.size() != 0) { - auto cmp = - [](const std::tuple<BranchProbability, MachineBasicBlock *> &a, - const std::tuple<BranchProbability, MachineBasicBlock *> &b) { - return std::get<0>(a) > std::get<0>(b); - }; - std::stable_sort(DupCandidates.begin(), DupCandidates.end(), cmp); - } - for(auto &Tup : DupCandidates) { + llvm::stable_sort(DupCandidates, + [](std::tuple<BranchProbability, MachineBasicBlock *> L, + std::tuple<BranchProbability, MachineBasicBlock *> R) { + return std::get<0>(L) > std::get<0>(R); + }); + for (auto &Tup : DupCandidates) { BranchProbability DupProb; MachineBasicBlock *Succ; std::tie(DupProb, Succ) = Tup; @@ -1757,63 +1766,238 @@ void MachineBlockPlacement::buildChain( << getBlockName(*Chain.begin()) << "\n"); } -/// Find the best loop top block for layout. +// If bottom of block BB has only one successor OldTop, in most cases it is +// profitable to move it before OldTop, except the following case: +// +// -->OldTop<- +// | . | +// | . | +// | . | +// ---Pred | +// | | +// BB----- +// +// If BB is moved before OldTop, Pred needs a taken branch to BB, and it can't +// layout the other successor below it, so it can't reduce taken branch. +// In this case we keep its original layout. +bool +MachineBlockPlacement::canMoveBottomBlockToTop( + const MachineBasicBlock *BottomBlock, + const MachineBasicBlock *OldTop) { + if (BottomBlock->pred_size() != 1) + return true; + MachineBasicBlock *Pred = *BottomBlock->pred_begin(); + if (Pred->succ_size() != 2) + return true; + + MachineBasicBlock *OtherBB = *Pred->succ_begin(); + if (OtherBB == BottomBlock) + OtherBB = *Pred->succ_rbegin(); + if (OtherBB == OldTop) + return false; + + return true; +} + +// Find out the possible fall through frequence to the top of a loop. +BlockFrequency +MachineBlockPlacement::TopFallThroughFreq( + const MachineBasicBlock *Top, + const BlockFilterSet &LoopBlockSet) { + BlockFrequency MaxFreq = 0; + for (MachineBasicBlock *Pred : Top->predecessors()) { + BlockChain *PredChain = BlockToChain[Pred]; + if (!LoopBlockSet.count(Pred) && + (!PredChain || Pred == *std::prev(PredChain->end()))) { + // Found a Pred block can be placed before Top. + // Check if Top is the best successor of Pred. + auto TopProb = MBPI->getEdgeProbability(Pred, Top); + bool TopOK = true; + for (MachineBasicBlock *Succ : Pred->successors()) { + auto SuccProb = MBPI->getEdgeProbability(Pred, Succ); + BlockChain *SuccChain = BlockToChain[Succ]; + // Check if Succ can be placed after Pred. + // Succ should not be in any chain, or it is the head of some chain. + if (!LoopBlockSet.count(Succ) && (SuccProb > TopProb) && + (!SuccChain || Succ == *SuccChain->begin())) { + TopOK = false; + break; + } + } + if (TopOK) { + BlockFrequency EdgeFreq = MBFI->getBlockFreq(Pred) * + MBPI->getEdgeProbability(Pred, Top); + if (EdgeFreq > MaxFreq) + MaxFreq = EdgeFreq; + } + } + } + return MaxFreq; +} + +// Compute the fall through gains when move NewTop before OldTop. +// +// In following diagram, edges marked as "-" are reduced fallthrough, edges +// marked as "+" are increased fallthrough, this function computes +// +// SUM(increased fallthrough) - SUM(decreased fallthrough) +// +// | +// | - +// V +// --->OldTop +// | . +// | . +// +| . + +// | Pred ---> +// | |- +// | V +// --- NewTop <--- +// |- +// V +// +BlockFrequency +MachineBlockPlacement::FallThroughGains( + const MachineBasicBlock *NewTop, + const MachineBasicBlock *OldTop, + const MachineBasicBlock *ExitBB, + const BlockFilterSet &LoopBlockSet) { + BlockFrequency FallThrough2Top = TopFallThroughFreq(OldTop, LoopBlockSet); + BlockFrequency FallThrough2Exit = 0; + if (ExitBB) + FallThrough2Exit = MBFI->getBlockFreq(NewTop) * + MBPI->getEdgeProbability(NewTop, ExitBB); + BlockFrequency BackEdgeFreq = MBFI->getBlockFreq(NewTop) * + MBPI->getEdgeProbability(NewTop, OldTop); + + // Find the best Pred of NewTop. + MachineBasicBlock *BestPred = nullptr; + BlockFrequency FallThroughFromPred = 0; + for (MachineBasicBlock *Pred : NewTop->predecessors()) { + if (!LoopBlockSet.count(Pred)) + continue; + BlockChain *PredChain = BlockToChain[Pred]; + if (!PredChain || Pred == *std::prev(PredChain->end())) { + BlockFrequency EdgeFreq = MBFI->getBlockFreq(Pred) * + MBPI->getEdgeProbability(Pred, NewTop); + if (EdgeFreq > FallThroughFromPred) { + FallThroughFromPred = EdgeFreq; + BestPred = Pred; + } + } + } + + // If NewTop is not placed after Pred, another successor can be placed + // after Pred. + BlockFrequency NewFreq = 0; + if (BestPred) { + for (MachineBasicBlock *Succ : BestPred->successors()) { + if ((Succ == NewTop) || (Succ == BestPred) || !LoopBlockSet.count(Succ)) + continue; + if (ComputedEdges.find(Succ) != ComputedEdges.end()) + continue; + BlockChain *SuccChain = BlockToChain[Succ]; + if ((SuccChain && (Succ != *SuccChain->begin())) || + (SuccChain == BlockToChain[BestPred])) + continue; + BlockFrequency EdgeFreq = MBFI->getBlockFreq(BestPred) * + MBPI->getEdgeProbability(BestPred, Succ); + if (EdgeFreq > NewFreq) + NewFreq = EdgeFreq; + } + BlockFrequency OrigEdgeFreq = MBFI->getBlockFreq(BestPred) * + MBPI->getEdgeProbability(BestPred, NewTop); + if (NewFreq > OrigEdgeFreq) { + // If NewTop is not the best successor of Pred, then Pred doesn't + // fallthrough to NewTop. So there is no FallThroughFromPred and + // NewFreq. + NewFreq = 0; + FallThroughFromPred = 0; + } + } + + BlockFrequency Result = 0; + BlockFrequency Gains = BackEdgeFreq + NewFreq; + BlockFrequency Lost = FallThrough2Top + FallThrough2Exit + + FallThroughFromPred; + if (Gains > Lost) + Result = Gains - Lost; + return Result; +} + +/// Helper function of findBestLoopTop. Find the best loop top block +/// from predecessors of old top. +/// +/// Look for a block which is strictly better than the old top for laying +/// out before the old top of the loop. This looks for only two patterns: +/// +/// 1. a block has only one successor, the old loop top +/// +/// Because such a block will always result in an unconditional jump, +/// rotating it in front of the old top is always profitable. +/// +/// 2. a block has two successors, one is old top, another is exit +/// and it has more than one predecessors /// -/// Look for a block which is strictly better than the loop header for laying -/// out at the top of the loop. This looks for one and only one pattern: -/// a latch block with no conditional exit. This block will cause a conditional -/// jump around it or will be the bottom of the loop if we lay it out in place, -/// but if it it doesn't end up at the bottom of the loop for any reason, -/// rotation alone won't fix it. Because such a block will always result in an -/// unconditional jump (for the backedge) rotating it in front of the loop -/// header is always profitable. +/// If it is below one of its predecessors P, only P can fall through to +/// it, all other predecessors need a jump to it, and another conditional +/// jump to loop header. If it is moved before loop header, all its +/// predecessors jump to it, then fall through to loop header. So all its +/// predecessors except P can reduce one taken branch. +/// At the same time, move it before old top increases the taken branch +/// to loop exit block, so the reduced taken branch will be compared with +/// the increased taken branch to the loop exit block. MachineBasicBlock * -MachineBlockPlacement::findBestLoopTop(const MachineLoop &L, - const BlockFilterSet &LoopBlockSet) { - // Placing the latch block before the header may introduce an extra branch - // that skips this block the first time the loop is executed, which we want - // to avoid when optimising for size. - // FIXME: in theory there is a case that does not introduce a new branch, - // i.e. when the layout predecessor does not fallthrough to the loop header. - // In practice this never happens though: there always seems to be a preheader - // that can fallthrough and that is also placed before the header. - if (F->getFunction().optForSize()) - return L.getHeader(); - +MachineBlockPlacement::findBestLoopTopHelper( + MachineBasicBlock *OldTop, + const MachineLoop &L, + const BlockFilterSet &LoopBlockSet) { // Check that the header hasn't been fused with a preheader block due to // crazy branches. If it has, we need to start with the header at the top to // prevent pulling the preheader into the loop body. - BlockChain &HeaderChain = *BlockToChain[L.getHeader()]; + BlockChain &HeaderChain = *BlockToChain[OldTop]; if (!LoopBlockSet.count(*HeaderChain.begin())) - return L.getHeader(); + return OldTop; - LLVM_DEBUG(dbgs() << "Finding best loop top for: " - << getBlockName(L.getHeader()) << "\n"); + LLVM_DEBUG(dbgs() << "Finding best loop top for: " << getBlockName(OldTop) + << "\n"); - BlockFrequency BestPredFreq; + BlockFrequency BestGains = 0; MachineBasicBlock *BestPred = nullptr; - for (MachineBasicBlock *Pred : L.getHeader()->predecessors()) { + for (MachineBasicBlock *Pred : OldTop->predecessors()) { if (!LoopBlockSet.count(Pred)) continue; - LLVM_DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", has " + if (Pred == L.getHeader()) + continue; + LLVM_DEBUG(dbgs() << " old top pred: " << getBlockName(Pred) << ", has " << Pred->succ_size() << " successors, "; MBFI->printBlockFreq(dbgs(), Pred) << " freq\n"); - if (Pred->succ_size() > 1) + if (Pred->succ_size() > 2) continue; - BlockFrequency PredFreq = MBFI->getBlockFreq(Pred); - if (!BestPred || PredFreq > BestPredFreq || - (!(PredFreq < BestPredFreq) && - Pred->isLayoutSuccessor(L.getHeader()))) { + MachineBasicBlock *OtherBB = nullptr; + if (Pred->succ_size() == 2) { + OtherBB = *Pred->succ_begin(); + if (OtherBB == OldTop) + OtherBB = *Pred->succ_rbegin(); + } + + if (!canMoveBottomBlockToTop(Pred, OldTop)) + continue; + + BlockFrequency Gains = FallThroughGains(Pred, OldTop, OtherBB, + LoopBlockSet); + if ((Gains > 0) && (Gains > BestGains || + ((Gains == BestGains) && Pred->isLayoutSuccessor(OldTop)))) { BestPred = Pred; - BestPredFreq = PredFreq; + BestGains = Gains; } } // If no direct predecessor is fine, just use the loop header. if (!BestPred) { LLVM_DEBUG(dbgs() << " final top unchanged\n"); - return L.getHeader(); + return OldTop; } // Walk backwards through any straight line of predecessors. @@ -1826,6 +2010,34 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L, return BestPred; } +/// Find the best loop top block for layout. +/// +/// This function iteratively calls findBestLoopTopHelper, until no new better +/// BB can be found. +MachineBasicBlock * +MachineBlockPlacement::findBestLoopTop(const MachineLoop &L, + const BlockFilterSet &LoopBlockSet) { + // Placing the latch block before the header may introduce an extra branch + // that skips this block the first time the loop is executed, which we want + // to avoid when optimising for size. + // FIXME: in theory there is a case that does not introduce a new branch, + // i.e. when the layout predecessor does not fallthrough to the loop header. + // In practice this never happens though: there always seems to be a preheader + // that can fallthrough and that is also placed before the header. + if (F->getFunction().hasOptSize()) + return L.getHeader(); + + MachineBasicBlock *OldTop = nullptr; + MachineBasicBlock *NewTop = L.getHeader(); + while (NewTop != OldTop) { + OldTop = NewTop; + NewTop = findBestLoopTopHelper(OldTop, L, LoopBlockSet); + if (NewTop != OldTop) + ComputedEdges[NewTop] = { OldTop, false }; + } + return NewTop; +} + /// Find the best loop exiting block for layout. /// /// This routine implements the logic to analyze the loop looking for the best @@ -1833,7 +2045,8 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L, /// fallthrough opportunities. MachineBasicBlock * MachineBlockPlacement::findBestLoopExit(const MachineLoop &L, - const BlockFilterSet &LoopBlockSet) { + const BlockFilterSet &LoopBlockSet, + BlockFrequency &ExitFreq) { // We don't want to layout the loop linearly in all cases. If the loop header // is just a normal basic block in the loop, we want to look for what block // within the loop is the best one to layout at the top. However, if the loop @@ -1944,9 +2157,43 @@ MachineBlockPlacement::findBestLoopExit(const MachineLoop &L, LLVM_DEBUG(dbgs() << " Best exiting block: " << getBlockName(ExitingBB) << "\n"); + ExitFreq = BestExitEdgeFreq; return ExitingBB; } +/// Check if there is a fallthrough to loop header Top. +/// +/// 1. Look for a Pred that can be layout before Top. +/// 2. Check if Top is the most possible successor of Pred. +bool +MachineBlockPlacement::hasViableTopFallthrough( + const MachineBasicBlock *Top, + const BlockFilterSet &LoopBlockSet) { + for (MachineBasicBlock *Pred : Top->predecessors()) { + BlockChain *PredChain = BlockToChain[Pred]; + if (!LoopBlockSet.count(Pred) && + (!PredChain || Pred == *std::prev(PredChain->end()))) { + // Found a Pred block can be placed before Top. + // Check if Top is the best successor of Pred. + auto TopProb = MBPI->getEdgeProbability(Pred, Top); + bool TopOK = true; + for (MachineBasicBlock *Succ : Pred->successors()) { + auto SuccProb = MBPI->getEdgeProbability(Pred, Succ); + BlockChain *SuccChain = BlockToChain[Succ]; + // Check if Succ can be placed after Pred. + // Succ should not be in any chain, or it is the head of some chain. + if ((!SuccChain || Succ == *SuccChain->begin()) && SuccProb > TopProb) { + TopOK = false; + break; + } + } + if (TopOK) + return true; + } + } + return false; +} + /// Attempt to rotate an exiting block to the bottom of the loop. /// /// Once we have built a chain, try to rotate it to line up the hot exit block @@ -1955,6 +2202,7 @@ MachineBlockPlacement::findBestLoopExit(const MachineLoop &L, /// of its bottom already, don't rotate it. void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, const MachineBasicBlock *ExitingBB, + BlockFrequency ExitFreq, const BlockFilterSet &LoopBlockSet) { if (!ExitingBB) return; @@ -1966,15 +2214,7 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, if (Bottom == ExitingBB) return; - bool ViableTopFallthrough = false; - for (MachineBasicBlock *Pred : Top->predecessors()) { - BlockChain *PredChain = BlockToChain[Pred]; - if (!LoopBlockSet.count(Pred) && - (!PredChain || Pred == *std::prev(PredChain->end()))) { - ViableTopFallthrough = true; - break; - } - } + bool ViableTopFallthrough = hasViableTopFallthrough(Top, LoopBlockSet); // If the header has viable fallthrough, check whether the current loop // bottom is a viable exiting block. If so, bail out as rotating will @@ -1986,6 +2226,12 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, (!SuccChain || Succ == *SuccChain->begin())) return; } + + // Rotate will destroy the top fallthrough, we need to ensure the new exit + // frequency is larger than top fallthrough. + BlockFrequency FallThrough2Top = TopFallThroughFreq(Top, LoopBlockSet); + if (FallThrough2Top >= ExitFreq) + return; } BlockChain::iterator ExitIt = llvm::find(LoopChain, ExitingBB); @@ -2041,8 +2287,6 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, void MachineBlockPlacement::rotateLoopWithProfile( BlockChain &LoopChain, const MachineLoop &L, const BlockFilterSet &LoopBlockSet) { - auto HeaderBB = L.getHeader(); - auto HeaderIter = llvm::find(LoopChain, HeaderBB); auto RotationPos = LoopChain.end(); BlockFrequency SmallestRotationCost = BlockFrequency::getMaxFrequency(); @@ -2062,12 +2306,13 @@ void MachineBlockPlacement::rotateLoopWithProfile( // chain head is not the loop header. As we only consider natural loops with // single header, this computation can be done only once. BlockFrequency HeaderFallThroughCost(0); - for (auto *Pred : HeaderBB->predecessors()) { + MachineBasicBlock *ChainHeaderBB = *LoopChain.begin(); + for (auto *Pred : ChainHeaderBB->predecessors()) { BlockChain *PredChain = BlockToChain[Pred]; if (!LoopBlockSet.count(Pred) && (!PredChain || Pred == *std::prev(PredChain->end()))) { - auto EdgeFreq = - MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, HeaderBB); + auto EdgeFreq = MBFI->getBlockFreq(Pred) * + MBPI->getEdgeProbability(Pred, ChainHeaderBB); auto FallThruCost = ScaleBlockFrequency(EdgeFreq, MisfetchCost); // If the predecessor has only an unconditional jump to the header, we // need to consider the cost of this jump. @@ -2117,7 +2362,7 @@ void MachineBlockPlacement::rotateLoopWithProfile( // If the current BB is the loop header, we need to take into account the // cost of the missed fall through edge from outside of the loop to the // header. - if (Iter != HeaderIter) + if (Iter != LoopChain.begin()) Cost += HeaderFallThroughCost; // Collect the loop exit cost by summing up frequencies of all exit edges @@ -2238,9 +2483,7 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) { // loop. This will default to the header, but may end up as one of the // predecessors to the header if there is one which will result in strictly // fewer branches in the loop body. - // When we use profile data to rotate the loop, this is unnecessary. - MachineBasicBlock *LoopTop = - RotateLoopWithProfile ? L.getHeader() : findBestLoopTop(L, LoopBlockSet); + MachineBasicBlock *LoopTop = findBestLoopTop(L, LoopBlockSet); // If we selected just the header for the loop top, look for a potentially // profitable exit block in the event that rotating the loop can eliminate @@ -2249,8 +2492,9 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) { // Loops are processed innermost to uttermost, make sure we clear // PreferredLoopExit before processing a new loop. PreferredLoopExit = nullptr; + BlockFrequency ExitFreq; if (!RotateLoopWithProfile && LoopTop == L.getHeader()) - PreferredLoopExit = findBestLoopExit(L, LoopBlockSet); + PreferredLoopExit = findBestLoopExit(L, LoopBlockSet, ExitFreq); BlockChain &LoopChain = *BlockToChain[LoopTop]; @@ -2270,7 +2514,7 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) { if (RotateLoopWithProfile) rotateLoopWithProfile(LoopChain, L, LoopBlockSet); else - rotateLoop(LoopChain, PreferredLoopExit, LoopBlockSet); + rotateLoop(LoopChain, PreferredLoopExit, ExitFreq, LoopBlockSet); LLVM_DEBUG({ // Crash at the end so we get all of the debugging output first. @@ -2497,8 +2741,8 @@ void MachineBlockPlacement::alignBlocks() { // exclusively on the loop info here so that we can align backedges in // unnatural CFGs and backedges that were introduced purely because of the // loop rotations done during this layout pass. - if (F->getFunction().optForMinSize() || - (F->getFunction().optForSize() && !TLI->alignLoopsWithOptSize())) + if (F->getFunction().hasMinSize() || + (F->getFunction().hasOptSize() && !TLI->alignLoopsWithOptSize())) return; BlockChain &FunctionChain = *BlockToChain[&F->front()]; if (FunctionChain.begin() == FunctionChain.end()) @@ -2773,7 +3017,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { if (allowTailDupPlacement()) { MPDT = &getAnalysis<MachinePostDominatorTree>(); - if (MF.getFunction().optForSize()) + if (MF.getFunction().hasOptSize()) TailDupSize = 1; bool PreRegAlloc = false; TailDup.initMF(MF, PreRegAlloc, MBPI, /* LayoutMode */ true, TailDupSize); @@ -2796,7 +3040,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(), getAnalysisIfAvailable<MachineModuleInfo>(), MLI, - /*AfterBlockPlacement=*/true)) { + /*AfterPlacement=*/true)) { // Redo the layout if tail merging creates/removes/moves blocks. BlockToChain.clear(); ComputedEdges.clear(); diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp index e4952aaaba06..d2277ce51746 100644 --- a/lib/CodeGen/MachineBranchProbabilityInfo.cpp +++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp @@ -1,9 +1,8 @@ //===- MachineBranchProbabilityInfo.cpp - Machine Branch Probability Info -===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 6ee8571c28aa..2df6d40d9293 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -1,9 +1,8 @@ //===- MachineCSE.cpp - Machine Common Subexpression Elimination Pass -----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -20,6 +19,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CFG.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" @@ -50,6 +50,8 @@ using namespace llvm; STATISTIC(NumCoalesces, "Number of copies coalesced"); STATISTIC(NumCSEs, "Number of common subexpression eliminated"); +STATISTIC(NumPREs, "Number of partial redundant expression" + " transformed to fully redundant"); STATISTIC(NumPhysCSEs, "Number of physreg referencing common subexpr eliminated"); STATISTIC(NumCrossBBCSEs, @@ -85,6 +87,7 @@ namespace { void releaseMemory() override { ScopeMap.clear(); + PREMap.clear(); Exps.clear(); } @@ -95,9 +98,12 @@ namespace { ScopedHashTable<MachineInstr *, unsigned, MachineInstrExpressionTrait, AllocatorTy>; using ScopeType = ScopedHTType::ScopeTy; + using PhysDefVector = SmallVector<std::pair<unsigned, unsigned>, 2>; unsigned LookAheadLimit = 0; DenseMap<MachineBasicBlock *, ScopeType *> ScopeMap; + DenseMap<MachineInstr *, MachineBasicBlock *, MachineInstrExpressionTrait> + PREMap; ScopedHTType VNT; SmallVector<MachineInstr *, 64> Exps; unsigned CurrVN = 0; @@ -109,22 +115,24 @@ namespace { MachineBasicBlock::const_iterator E) const; bool hasLivePhysRegDefUses(const MachineInstr *MI, const MachineBasicBlock *MBB, - SmallSet<unsigned,8> &PhysRefs, - SmallVectorImpl<unsigned> &PhysDefs, - bool &PhysUseDef) const; + SmallSet<unsigned, 8> &PhysRefs, + PhysDefVector &PhysDefs, bool &PhysUseDef) const; bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, - SmallSet<unsigned,8> &PhysRefs, - SmallVectorImpl<unsigned> &PhysDefs, - bool &NonLocal) const; + SmallSet<unsigned, 8> &PhysRefs, + PhysDefVector &PhysDefs, bool &NonLocal) const; bool isCSECandidate(MachineInstr *MI); bool isProfitableToCSE(unsigned CSReg, unsigned Reg, - MachineInstr *CSMI, MachineInstr *MI); + MachineBasicBlock *CSBB, MachineInstr *MI); void EnterScope(MachineBasicBlock *MBB); void ExitScope(MachineBasicBlock *MBB); - bool ProcessBlock(MachineBasicBlock *MBB); + bool ProcessBlockCSE(MachineBasicBlock *MBB); void ExitScopeIfDone(MachineDomTreeNode *Node, DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren); bool PerformCSE(MachineDomTreeNode *Node); + + bool isPRECandidate(MachineInstr *MI); + bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB); + bool PerformSimplePRE(MachineDominatorTree *DT); }; } // end anonymous namespace @@ -256,9 +264,9 @@ static bool isCallerPreservedOrConstPhysReg(unsigned Reg, /// instruction does not uses a physical register. bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, const MachineBasicBlock *MBB, - SmallSet<unsigned,8> &PhysRefs, - SmallVectorImpl<unsigned> &PhysDefs, - bool &PhysUseDef) const{ + SmallSet<unsigned, 8> &PhysRefs, + PhysDefVector &PhysDefs, + bool &PhysUseDef) const { // First, add all uses to PhysRefs. for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg() || MO.isDef()) @@ -278,7 +286,8 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, // (which currently contains only uses), set the PhysUseDef flag. PhysUseDef = false; MachineBasicBlock::const_iterator I = MI; I = std::next(I); - for (const MachineOperand &MO : MI->operands()) { + for (const auto &MOP : llvm::enumerate(MI->operands())) { + const MachineOperand &MO = MOP.value(); if (!MO.isReg() || !MO.isDef()) continue; unsigned Reg = MO.getReg(); @@ -293,20 +302,21 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, // common since this pass is run before livevariables. We can scan // forward a few instructions and check if it is obviously dead. if (!MO.isDead() && !isPhysDefTriviallyDead(Reg, I, MBB->end())) - PhysDefs.push_back(Reg); + PhysDefs.push_back(std::make_pair(MOP.index(), Reg)); } // Finally, add all defs to PhysRefs as well. for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) - for (MCRegAliasIterator AI(PhysDefs[i], TRI, true); AI.isValid(); ++AI) + for (MCRegAliasIterator AI(PhysDefs[i].second, TRI, true); AI.isValid(); + ++AI) PhysRefs.insert(*AI); return !PhysRefs.empty(); } bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, - SmallSet<unsigned,8> &PhysRefs, - SmallVectorImpl<unsigned> &PhysDefs, + SmallSet<unsigned, 8> &PhysRefs, + PhysDefVector &PhysDefs, bool &NonLocal) const { // For now conservatively returns false if the common subexpression is // not in the same basic block as the given instruction. The only exception @@ -320,7 +330,8 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, return false; for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) { - if (MRI->isAllocatable(PhysDefs[i]) || MRI->isReserved(PhysDefs[i])) + if (MRI->isAllocatable(PhysDefs[i].second) || + MRI->isReserved(PhysDefs[i].second)) // Avoid extending live range of physical registers if they are //allocatable or reserved. return false; @@ -381,7 +392,7 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) { // Ignore stuff that we obviously can't move. if (MI->mayStore() || MI->isCall() || MI->isTerminator() || - MI->hasUnmodeledSideEffects()) + MI->mayRaiseFPException() || MI->hasUnmodeledSideEffects()) return false; if (MI->mayLoad()) { @@ -404,9 +415,10 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) { } /// isProfitableToCSE - Return true if it's profitable to eliminate MI with a -/// common expression that defines Reg. +/// common expression that defines Reg. CSBB is basic block where CSReg is +/// defined. bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, - MachineInstr *CSMI, MachineInstr *MI) { + MachineBasicBlock *CSBB, MachineInstr *MI) { // FIXME: Heuristics that works around the lack the live range splitting. // If CSReg is used at all uses of Reg, CSE should not increase register @@ -432,7 +444,6 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, // an immediate predecessor. We don't want to increase register pressure and // end up causing other computation to be spilled. if (TII->isAsCheapAsAMove(*MI)) { - MachineBasicBlock *CSBB = CSMI->getParent(); MachineBasicBlock *BB = MI->getParent(); if (CSBB != BB && !CSBB->isSuccessor(BB)) return false; @@ -487,7 +498,7 @@ void MachineCSE::ExitScope(MachineBasicBlock *MBB) { ScopeMap.erase(SI); } -bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { +bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) { bool Changed = false; SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs; @@ -536,7 +547,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // It's also not safe if the instruction uses physical registers. bool CrossMBBPhysDef = false; SmallSet<unsigned, 8> PhysRefs; - SmallVector<unsigned, 2> PhysDefs; + PhysDefVector PhysDefs; bool PhysUseDef = false; if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, PhysDefs, PhysUseDef)) { @@ -597,7 +608,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { TargetRegisterInfo::isVirtualRegister(NewReg) && "Do not CSE physical register defs!"); - if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) { + if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), MI)) { LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); DoCSE = false; break; @@ -635,6 +646,9 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // we should make sure it is not dead at CSMI. for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate) CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false); + for (auto PhysDef : PhysDefs) + if (!MI->getOperand(PhysDef.first).isDead()) + CSMI->getOperand(PhysDef.first).setIsDead(false); // Go through implicit defs of CSMI and MI, and clear the kill flags on // their uses in all the instructions between CSMI and MI. @@ -663,9 +677,9 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // Add physical register defs now coming in from a predecessor to MBB // livein list. while (!PhysDefs.empty()) { - unsigned LiveIn = PhysDefs.pop_back_val(); - if (!MBB->isLiveIn(LiveIn)) - MBB->addLiveIn(LiveIn); + auto LiveIn = PhysDefs.pop_back_val(); + if (!MBB->isLiveIn(LiveIn.second)) + MBB->addLiveIn(LiveIn.second); } ++NumCrossBBCSEs; } @@ -734,7 +748,7 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { for (MachineDomTreeNode *Node : Scopes) { MachineBasicBlock *MBB = Node->getBlock(); EnterScope(MBB); - Changed |= ProcessBlock(MBB); + Changed |= ProcessBlockCSE(MBB); // If it's a leaf node, it's done. Traverse upwards to pop ancestors. ExitScopeIfDone(Node, OpenChildren); } @@ -742,6 +756,104 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { return Changed; } +// We use stronger checks for PRE candidate rather than for CSE ones to embrace +// checks inside ProcessBlockCSE(), not only inside isCSECandidate(). This helps +// to exclude instrs created by PRE that won't be CSEed later. +bool MachineCSE::isPRECandidate(MachineInstr *MI) { + if (!isCSECandidate(MI) || + MI->isNotDuplicable() || + MI->mayLoad() || + MI->isAsCheapAsAMove() || + MI->getNumDefs() != 1 || + MI->getNumExplicitDefs() != 1) + return false; + + for (auto def : MI->defs()) + if (!TRI->isVirtualRegister(def.getReg())) + return false; + + for (auto use : MI->uses()) + if (use.isReg() && !TRI->isVirtualRegister(use.getReg())) + return false; + + return true; +} + +bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT, + MachineBasicBlock *MBB) { + bool Changed = false; + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) { + MachineInstr *MI = &*I; + ++I; + + if (!isPRECandidate(MI)) + continue; + + if (!PREMap.count(MI)) { + PREMap[MI] = MBB; + continue; + } + + auto MBB1 = PREMap[MI]; + assert( + !DT->properlyDominates(MBB, MBB1) && + "MBB cannot properly dominate MBB1 while DFS through dominators tree!"); + auto CMBB = DT->findNearestCommonDominator(MBB, MBB1); + if (!CMBB->isLegalToHoistInto()) + continue; + + // Two instrs are partial redundant if their basic blocks are reachable + // from one to another but one doesn't dominate another. + if (CMBB != MBB1) { + auto BB = MBB->getBasicBlock(), BB1 = MBB1->getBasicBlock(); + if (BB != nullptr && BB1 != nullptr && + (isPotentiallyReachable(BB1, BB) || + isPotentiallyReachable(BB, BB1))) { + + assert(MI->getOperand(0).isDef() && + "First operand of instr with one explicit def must be this def"); + unsigned VReg = MI->getOperand(0).getReg(); + unsigned NewReg = MRI->cloneVirtualRegister(VReg); + if (!isProfitableToCSE(NewReg, VReg, CMBB, MI)) + continue; + MachineInstr &NewMI = + TII->duplicate(*CMBB, CMBB->getFirstTerminator(), *MI); + NewMI.getOperand(0).setReg(NewReg); + + PREMap[MI] = CMBB; + ++NumPREs; + Changed = true; + } + } + } + return Changed; +} + +// This simple PRE (partial redundancy elimination) pass doesn't actually +// eliminate partial redundancy but transforms it to full redundancy, +// anticipating that the next CSE step will eliminate this created redundancy. +// If CSE doesn't eliminate this, than created instruction will remain dead +// and eliminated later by Remove Dead Machine Instructions pass. +bool MachineCSE::PerformSimplePRE(MachineDominatorTree *DT) { + SmallVector<MachineDomTreeNode *, 32> BBs; + + PREMap.clear(); + bool Changed = false; + BBs.push_back(DT->getRootNode()); + do { + auto Node = BBs.pop_back_val(); + const std::vector<MachineDomTreeNode *> &Children = Node->getChildren(); + for (MachineDomTreeNode *Child : Children) + BBs.push_back(Child); + + MachineBasicBlock *MBB = Node->getBlock(); + Changed |= ProcessBlockPRE(DT, MBB); + + } while (!BBs.empty()); + + return Changed; +} + bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -752,5 +864,8 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); DT = &getAnalysis<MachineDominatorTree>(); LookAheadLimit = TII->getMachineCSELookAheadLimit(); - return PerformCSE(DT->getRootNode()); + bool ChangedPRE, ChangedCSE; + ChangedPRE = PerformSimplePRE(DT); + ChangedCSE = PerformCSE(DT->getRootNode()); + return ChangedPRE || ChangedCSE; } diff --git a/lib/CodeGen/MachineCombiner.cpp b/lib/CodeGen/MachineCombiner.cpp index f51b482e20e3..0584ec0bd2b3 100644 --- a/lib/CodeGen/MachineCombiner.cpp +++ b/lib/CodeGen/MachineCombiner.cpp @@ -1,9 +1,8 @@ //===---- MachineCombiner.cpp - Instcombining on SSA form machine code ----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -559,16 +558,15 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { continue; LLVM_DEBUG(if (dump_intrs) { - dbgs() << "\tFor the Pattern (" << (int)P << ") these instructions could be removed\n"; - for (auto const *InstrPtr : DelInstrs) { - dbgs() << "\t\t" << STI->getSchedInfoStr(*InstrPtr) << ": "; - InstrPtr->print(dbgs(), false, false, false, TII); - } + dbgs() << "\tFor the Pattern (" << (int)P + << ") these instructions could be removed\n"; + for (auto const *InstrPtr : DelInstrs) + InstrPtr->print(dbgs(), /*IsStandalone*/false, /*SkipOpers*/false, + /*SkipDebugLoc*/false, /*AddNewLine*/true, TII); dbgs() << "\tThese instructions could replace the removed ones\n"; - for (auto const *InstrPtr : InsInstrs) { - dbgs() << "\t\t" << STI->getSchedInfoStr(*InstrPtr) << ": "; - InstrPtr->print(dbgs(), false, false, false, TII); - } + for (auto const *InstrPtr : InsInstrs) + InstrPtr->print(dbgs(), /*IsStandalone*/false, /*SkipOpers*/false, + /*SkipDebugLoc*/false, /*AddNewLine*/true, TII); }); bool SubstituteAlways = false; @@ -641,7 +639,7 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) { MLI = &getAnalysis<MachineLoopInfo>(); Traces = &getAnalysis<MachineTraceMetrics>(); MinInstr = nullptr; - OptSize = MF.getFunction().optForSize(); + OptSize = MF.getFunction().hasOptSize(); LLVM_DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n'); if (!TII->useMachineCombiner()) { diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp index 19879fe89007..9fc12ac89e12 100644 --- a/lib/CodeGen/MachineCopyPropagation.cpp +++ b/lib/CodeGen/MachineCopyPropagation.cpp @@ -1,9 +1,8 @@ //===- MachineCopyPropagation.cpp - Machine Copy Propagation Pass ---------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/MachineDominanceFrontier.cpp b/lib/CodeGen/MachineDominanceFrontier.cpp index b559e4e513a6..6704298c17d6 100644 --- a/lib/CodeGen/MachineDominanceFrontier.cpp +++ b/lib/CodeGen/MachineDominanceFrontier.cpp @@ -1,9 +1,8 @@ //===- MachineDominanceFrontier.cpp ---------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp index 6b2802626456..1dfba8638c22 100644 --- a/lib/CodeGen/MachineDominators.cpp +++ b/lib/CodeGen/MachineDominators.cpp @@ -1,9 +1,8 @@ //===- MachineDominators.cpp - Machine Dominator Calculation --------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/MachineFrameInfo.cpp b/lib/CodeGen/MachineFrameInfo.cpp index 0b316871dbdf..bae3a4333bda 100644 --- a/lib/CodeGen/MachineFrameInfo.cpp +++ b/lib/CodeGen/MachineFrameInfo.cpp @@ -1,9 +1,8 @@ //===-- MachineFrameInfo.cpp ---------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -57,7 +56,8 @@ int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, !IsSpillSlot, StackID)); int Index = (int)Objects.size() - NumFixedObjects - 1; assert(Index >= 0 && "Bad frame index!"); - ensureMaxAlignment(Alignment); + if (StackID == 0) + ensureMaxAlignment(Alignment); return Index; } @@ -92,7 +92,7 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); Objects.insert(Objects.begin(), StackObject(Size, Alignment, SPOffset, IsImmutable, - /*isSpillSlot=*/false, /*Alloca=*/nullptr, + /*IsSpillSlot=*/false, /*Alloca=*/nullptr, IsAliased)); return -++NumFixedObjects; } @@ -142,11 +142,15 @@ unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const { // should keep in mind that there's tight coupling between the two. for (int i = getObjectIndexBegin(); i != 0; ++i) { + // Only estimate stack size of default stack. + if (getStackID(i) != TargetStackID::Default) + continue; int FixedOff = -getObjectOffset(i); if (FixedOff > Offset) Offset = FixedOff; } for (unsigned i = 0, e = getObjectIndexEnd(); i != e; ++i) { - if (isDeadObjectIndex(i)) + // Only estimate stack size of live objects on default stack. + if (isDeadObjectIndex(i) || getStackID(i) != TargetStackID::Default) continue; Offset += getObjectSize(i); unsigned Align = getObjectAlignment(i); diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 3495319670a5..4df5ce2dcedc 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -1,9 +1,8 @@ //===- MachineFunction.cpp ------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -44,6 +43,7 @@ #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" @@ -165,7 +165,7 @@ void MachineFunction::init() { !F.hasFnAttribute("no-realign-stack"); FrameInfo = new (Allocator) MachineFrameInfo( getFnStackAlignment(STI, F), /*StackRealignable=*/CanRealignSP, - /*ForceRealign=*/CanRealignSP && + /*ForcedRealign=*/CanRealignSP && F.hasFnAttribute(Attribute::StackAlignment)); if (F.hasFnAttribute(Attribute::StackAlignment)) @@ -175,7 +175,7 @@ void MachineFunction::init() { Alignment = STI->getTargetLowering()->getMinFunctionAlignment(); // FIXME: Shouldn't use pref alignment if explicit alignment is set on F. - // FIXME: Use Function::optForSize(). + // FIXME: Use Function::hasOptSize(). if (!F.hasFnAttribute(Attribute::OptimizeForSize)) Alignment = std::max(Alignment, STI->getTargetLowering()->getPrefFunctionAlignment()); @@ -274,6 +274,12 @@ bool MachineFunction::shouldSplitStack() const { return getFunction().hasFnAttribute("split-stack"); } +LLVM_NODISCARD unsigned +MachineFunction::addFrameInst(const MCCFIInstruction &Inst) { + FrameInstructions.push_back(Inst); + return FrameInstructions.size() - 1; +} + /// This discards all of the MachineBasicBlock numbers and recomputes them. /// This guarantees that the MBB numbers are sequential, dense, and match the /// ordering of the blocks within the function. If a specific MachineBasicBlock @@ -357,6 +363,13 @@ MachineInstr &MachineFunction::CloneMachineInstrBundle(MachineBasicBlock &MBB, /// ~MachineInstr() destructor must be empty. void MachineFunction::DeleteMachineInstr(MachineInstr *MI) { + // Verify that a call site info is at valid state. This assertion should + // be triggered during the implementation of support for the + // call site info of a new architecture. If the assertion is triggered, + // back trace will tell where to insert a call to updateCallSiteInfo(). + assert((!MI->isCall(MachineInstr::IgnoreBundle) || + CallSitesInfo.find(MI) == CallSitesInfo.end()) && + "Call site info was not updated!"); // Strip it for parts. The operand array and the MI object itself are // independently recyclable. if (MI->Operands) @@ -396,19 +409,18 @@ MachineMemOperand *MachineFunction::getMachineMemOperand( MachineMemOperand * MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO, int64_t Offset, uint64_t Size) { - if (MMO->getValue()) - return new (Allocator) - MachineMemOperand(MachinePointerInfo(MMO->getValue(), - MMO->getOffset()+Offset), - MMO->getFlags(), Size, MMO->getBaseAlignment(), - AAMDNodes(), nullptr, MMO->getSyncScopeID(), - MMO->getOrdering(), MMO->getFailureOrdering()); + const MachinePointerInfo &PtrInfo = MMO->getPointerInfo(); + + // If there is no pointer value, the offset isn't tracked so we need to adjust + // the base alignment. + unsigned Align = PtrInfo.V.isNull() + ? MinAlign(MMO->getBaseAlignment(), Offset) + : MMO->getBaseAlignment(); + return new (Allocator) - MachineMemOperand(MachinePointerInfo(MMO->getPseudoValue(), - MMO->getOffset()+Offset), - MMO->getFlags(), Size, MMO->getBaseAlignment(), - AAMDNodes(), nullptr, MMO->getSyncScopeID(), - MMO->getOrdering(), MMO->getFailureOrdering()); + MachineMemOperand(PtrInfo.getWithOffset(Offset), MMO->getFlags(), Size, + Align, AAMDNodes(), nullptr, MMO->getSyncScopeID(), + MMO->getOrdering(), MMO->getFailureOrdering()); } MachineMemOperand * @@ -425,6 +437,15 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO, MMO->getOrdering(), MMO->getFailureOrdering()); } +MachineMemOperand * +MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO, + MachineMemOperand::Flags Flags) { + return new (Allocator) MachineMemOperand( + MMO->getPointerInfo(), Flags, MMO->getSize(), MMO->getBaseAlignment(), + MMO->getAAInfo(), MMO->getRanges(), MMO->getSyncScopeID(), + MMO->getOrdering(), MMO->getFailureOrdering()); +} + MachineInstr::ExtraInfo * MachineFunction::createMIExtraInfo(ArrayRef<MachineMemOperand *> MMOs, MCSymbol *PreInstrSymbol, @@ -802,6 +823,32 @@ try_next:; return FilterID; } +void MachineFunction::addCodeViewHeapAllocSite(MachineInstr *I, MDNode *MD) { + MCSymbol *BeginLabel = Ctx.createTempSymbol("heapallocsite", true); + MCSymbol *EndLabel = Ctx.createTempSymbol("heapallocsite", true); + I->setPreInstrSymbol(*this, BeginLabel); + I->setPostInstrSymbol(*this, EndLabel); + + DIType *DI = dyn_cast<DIType>(MD); + CodeViewHeapAllocSites.push_back(std::make_tuple(BeginLabel, EndLabel, DI)); +} + +void MachineFunction::updateCallSiteInfo(const MachineInstr *Old, + const MachineInstr *New) { + if (!Target.Options.EnableDebugEntryValues || Old == New) + return; + + assert(Old->isCall() && (!New || New->isCall()) && + "Call site info referes only to call instructions!"); + CallSiteInfoMap::iterator CSIt = CallSitesInfo.find(Old); + if (CSIt == CallSitesInfo.end()) + return; + CallSiteInfo CSInfo = std::move(CSIt->second); + CallSitesInfo.erase(CSIt); + if (New) + CallSitesInfo[New] = CSInfo; +} + /// \} //===----------------------------------------------------------------------===// @@ -888,9 +935,11 @@ void MachineJumpTableInfo::print(raw_ostream &OS) const { OS << "Jump Tables:\n"; for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) { - OS << printJumpTableEntryReference(i) << ": "; + OS << printJumpTableEntryReference(i) << ':'; for (unsigned j = 0, f = JumpTables[i].MBBs.size(); j != f; ++j) OS << ' ' << printMBBReference(*JumpTables[i].MBBs[j]); + if (i != e) + OS << '\n'; } OS << '\n'; diff --git a/lib/CodeGen/MachineFunctionPass.cpp b/lib/CodeGen/MachineFunctionPass.cpp index 5db4e299fa70..0da4cf3fc90c 100644 --- a/lib/CodeGen/MachineFunctionPass.cpp +++ b/lib/CodeGen/MachineFunctionPass.cpp @@ -1,9 +1,8 @@ //===-- MachineFunctionPass.cpp -------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/MachineFunctionPrinterPass.cpp b/lib/CodeGen/MachineFunctionPrinterPass.cpp index 9c96ba748778..0ea8975cc74c 100644 --- a/lib/CodeGen/MachineFunctionPrinterPass.cpp +++ b/lib/CodeGen/MachineFunctionPrinterPass.cpp @@ -1,9 +1,8 @@ //===-- MachineFunctionPrinterPass.cpp ------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 764a84c7e132..e5c398a2d10c 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -1,9 +1,8 @@ //===- lib/CodeGen/MachineInstr.cpp ---------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -26,6 +25,7 @@ #include "llvm/Analysis/MemoryLocation.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineInstrBundle.h" @@ -50,9 +50,9 @@ #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSlotTracker.h" +#include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" -#include "llvm/IR/Operator.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSymbol.h" @@ -225,12 +225,13 @@ void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) { } #ifndef NDEBUG - bool isMetaDataOp = Op.getType() == MachineOperand::MO_Metadata; + bool isDebugOp = Op.getType() == MachineOperand::MO_Metadata || + Op.getType() == MachineOperand::MO_MCSymbol; // OpNo now points as the desired insertion point. Unless this is a variadic // instruction, only implicit regs are allowed beyond MCID->getNumOperands(). // RegMask operands go between the explicit and implicit operands. assert((isImpReg || Op.isRegMask() || MCID->isVariadic() || - OpNo < MCID->getNumOperands() || isMetaDataOp) && + OpNo < MCID->getNumOperands() || isDebugOp) && "Trying to add an operand to a machine instr that is already done!"); #endif @@ -512,45 +513,65 @@ void MachineInstr::setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol) { MF.createMIExtraInfo(memoperands(), getPreInstrSymbol(), Symbol)); } +void MachineInstr::cloneInstrSymbols(MachineFunction &MF, + const MachineInstr &MI) { + if (this == &MI) + // Nothing to do for a self-clone! + return; + + assert(&MF == MI.getMF() && + "Invalid machine functions when cloning instruction symbols!"); + + setPreInstrSymbol(MF, MI.getPreInstrSymbol()); + setPostInstrSymbol(MF, MI.getPostInstrSymbol()); +} + uint16_t MachineInstr::mergeFlagsWith(const MachineInstr &Other) const { // For now, the just return the union of the flags. If the flags get more // complicated over time, we might need more logic here. return getFlags() | Other.getFlags(); } -void MachineInstr::copyIRFlags(const Instruction &I) { +uint16_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) { + uint16_t MIFlags = 0; // Copy the wrapping flags. if (const OverflowingBinaryOperator *OB = dyn_cast<OverflowingBinaryOperator>(&I)) { if (OB->hasNoSignedWrap()) - setFlag(MachineInstr::MIFlag::NoSWrap); + MIFlags |= MachineInstr::MIFlag::NoSWrap; if (OB->hasNoUnsignedWrap()) - setFlag(MachineInstr::MIFlag::NoUWrap); + MIFlags |= MachineInstr::MIFlag::NoUWrap; } // Copy the exact flag. if (const PossiblyExactOperator *PE = dyn_cast<PossiblyExactOperator>(&I)) if (PE->isExact()) - setFlag(MachineInstr::MIFlag::IsExact); + MIFlags |= MachineInstr::MIFlag::IsExact; // Copy the fast-math flags. if (const FPMathOperator *FP = dyn_cast<FPMathOperator>(&I)) { const FastMathFlags Flags = FP->getFastMathFlags(); if (Flags.noNaNs()) - setFlag(MachineInstr::MIFlag::FmNoNans); + MIFlags |= MachineInstr::MIFlag::FmNoNans; if (Flags.noInfs()) - setFlag(MachineInstr::MIFlag::FmNoInfs); + MIFlags |= MachineInstr::MIFlag::FmNoInfs; if (Flags.noSignedZeros()) - setFlag(MachineInstr::MIFlag::FmNsz); + MIFlags |= MachineInstr::MIFlag::FmNsz; if (Flags.allowReciprocal()) - setFlag(MachineInstr::MIFlag::FmArcp); + MIFlags |= MachineInstr::MIFlag::FmArcp; if (Flags.allowContract()) - setFlag(MachineInstr::MIFlag::FmContract); + MIFlags |= MachineInstr::MIFlag::FmContract; if (Flags.approxFunc()) - setFlag(MachineInstr::MIFlag::FmAfn); + MIFlags |= MachineInstr::MIFlag::FmAfn; if (Flags.allowReassoc()) - setFlag(MachineInstr::MIFlag::FmReassoc); + MIFlags |= MachineInstr::MIFlag::FmReassoc; } + + return MIFlags; +} + +void MachineInstr::copyIRFlags(const Instruction &I) { + Flags = copyFlagsFromInstruction(I); } bool MachineInstr::hasPropertyInBundle(uint64_t Mask, QueryType Type) const { @@ -1157,7 +1178,7 @@ bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const { } if (isPosition() || isDebugInstr() || isTerminator() || - hasUnmodeledSideEffects()) + mayRaiseFPException() || hasUnmodeledSideEffects()) return false; // See if this instruction does a load. If so, we have to guarantee that the @@ -1173,8 +1194,8 @@ bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const { return true; } -bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other, - bool UseTBAA) { +bool MachineInstr::mayAlias(AliasAnalysis *AA, const MachineInstr &Other, + bool UseTBAA) const { const MachineFunction *MF = getMF(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); const MachineFrameInfo &MFI = MF->getFrameInfo(); @@ -1304,7 +1325,11 @@ bool MachineInstr::isDereferenceableInvariantLoad(AliasAnalysis *AA) const { const MachineFrameInfo &MFI = getParent()->getParent()->getFrameInfo(); for (MachineMemOperand *MMO : memoperands()) { - if (MMO->isVolatile()) return false; + if (!MMO->isUnordered()) + // If the memory operand has ordering side effects, we can't move the + // instruction. Such an instruction is technically an invariant load, + // but the caller code would need updated to expect that. + return false; if (MMO->isStore()) return false; if (MMO->isInvariant() && MMO->isDereferenceable()) continue; @@ -1447,7 +1472,7 @@ void MachineInstr::print(raw_ostream &OS, bool IsStandalone, bool SkipOpers, ModuleSlotTracker MST(M); if (F) MST.incorporateFunction(*F); - print(OS, MST, IsStandalone, SkipOpers, SkipDebugLoc, TII); + print(OS, MST, IsStandalone, SkipOpers, SkipDebugLoc, AddNewLine, TII); } void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, @@ -1519,6 +1544,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << "nsw "; if (getFlag(MachineInstr::IsExact)) OS << "exact "; + if (getFlag(MachineInstr::FPExcept)) + OS << "fpexcept "; // Print the opcode name. if (TII) @@ -1905,7 +1932,7 @@ void MachineInstr::setRegisterDefReadUndef(unsigned Reg, bool IsUndef) { void MachineInstr::addRegisterDefined(unsigned Reg, const TargetRegisterInfo *RegInfo) { if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - MachineOperand *MO = findRegisterDefOperand(Reg, false, RegInfo); + MachineOperand *MO = findRegisterDefOperand(Reg, false, false, RegInfo); if (MO) return; } else { @@ -2050,7 +2077,7 @@ static const DIExpression *computeExprForSpill(const MachineInstr &MI) { const DIExpression *Expr = MI.getDebugExpression(); if (MI.isIndirectDebugValue()) { assert(MI.getOperand(1).getImm() == 0 && "DBG_VALUE with nonzero offset"); - Expr = DIExpression::prepend(Expr, DIExpression::WithDeref); + Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore); } return Expr; } @@ -2100,3 +2127,54 @@ void MachineInstr::changeDebugValuesDefReg(unsigned Reg) { for (auto *DBI : DbgValues) DBI->getOperand(0).setReg(Reg); } + +using MMOList = SmallVector<const MachineMemOperand *, 2>; + +static unsigned getSpillSlotSize(MMOList &Accesses, + const MachineFrameInfo &MFI) { + unsigned Size = 0; + for (auto A : Accesses) + if (MFI.isSpillSlotObjectIndex( + cast<FixedStackPseudoSourceValue>(A->getPseudoValue()) + ->getFrameIndex())) + Size += A->getSize(); + return Size; +} + +Optional<unsigned> +MachineInstr::getSpillSize(const TargetInstrInfo *TII) const { + int FI; + if (TII->isStoreToStackSlotPostFE(*this, FI)) { + const MachineFrameInfo &MFI = getMF()->getFrameInfo(); + if (MFI.isSpillSlotObjectIndex(FI)) + return (*memoperands_begin())->getSize(); + } + return None; +} + +Optional<unsigned> +MachineInstr::getFoldedSpillSize(const TargetInstrInfo *TII) const { + MMOList Accesses; + if (TII->hasStoreToStackSlot(*this, Accesses)) + return getSpillSlotSize(Accesses, getMF()->getFrameInfo()); + return None; +} + +Optional<unsigned> +MachineInstr::getRestoreSize(const TargetInstrInfo *TII) const { + int FI; + if (TII->isLoadFromStackSlotPostFE(*this, FI)) { + const MachineFrameInfo &MFI = getMF()->getFrameInfo(); + if (MFI.isSpillSlotObjectIndex(FI)) + return (*memoperands_begin())->getSize(); + } + return None; +} + +Optional<unsigned> +MachineInstr::getFoldedRestoreSize(const TargetInstrInfo *TII) const { + MMOList Accesses; + if (TII->hasLoadFromStackSlot(*this, Accesses)) + return getSpillSlotSize(Accesses, getMF()->getFrameInfo()); + return None; +} diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp index ae378cc8c464..32e266e9401e 100644 --- a/lib/CodeGen/MachineInstrBundle.cpp +++ b/lib/CodeGen/MachineInstrBundle.cpp @@ -1,9 +1,8 @@ //===-- lib/CodeGen/MachineInstrBundle.cpp --------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 58fd1f238420..1107e609c258 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -1,9 +1,8 @@ //===- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ----------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp index 2bce59235057..3b8b430d1b0f 100644 --- a/lib/CodeGen/MachineLoopInfo.cpp +++ b/lib/CodeGen/MachineLoopInfo.cpp @@ -1,9 +1,8 @@ //===- MachineLoopInfo.cpp - Natural Loop Calculator ----------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index 6ef8de88f8b1..aadcd7319799 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -1,9 +1,8 @@ //===-- llvm/CodeGen/MachineModuleInfo.cpp ----------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -206,11 +205,11 @@ MachineModuleInfo::~MachineModuleInfo() = default; bool MachineModuleInfo::doInitialization(Module &M) { ObjFileMMI = nullptr; CurCallSite = 0; - UsesVAFloatArgument = UsesMorestackAddr = false; + UsesMSVCFloatingPoint = UsesMorestackAddr = false; HasSplitStack = HasNosplitStack = false; AddrLabelSymbols = nullptr; TheModule = &M; - DbgInfoAvailable = !empty(M.debug_compile_units()); + DbgInfoAvailable = !llvm::empty(M.debug_compile_units()); return false; } @@ -328,22 +327,3 @@ char FreeMachineFunction::ID; FunctionPass *llvm::createFreeMachineFunctionPass() { return new FreeMachineFunction(); } - -//===- MMI building helpers -----------------------------------------------===// - -void llvm::computeUsesVAFloatArgument(const CallInst &I, - MachineModuleInfo &MMI) { - FunctionType *FT = - cast<FunctionType>(I.getCalledValue()->getType()->getContainedType(0)); - if (FT->isVarArg() && !MMI.usesVAFloatArgument()) { - for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { - Type *T = I.getArgOperand(i)->getType(); - for (auto i : post_order(T)) { - if (i->isFloatingPointTy()) { - MMI.setUsesVAFloatArgument(true); - return; - } - } - } - } -} diff --git a/lib/CodeGen/MachineModuleInfoImpls.cpp b/lib/CodeGen/MachineModuleInfoImpls.cpp index 7b4f64bfe60d..16d24880ebe4 100644 --- a/lib/CodeGen/MachineModuleInfoImpls.cpp +++ b/lib/CodeGen/MachineModuleInfoImpls.cpp @@ -1,9 +1,8 @@ //===- llvm/CodeGen/MachineModuleInfoImpls.cpp ----------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/MachineOperand.cpp b/lib/CodeGen/MachineOperand.cpp index 05e51e1873cf..4fa4ea7f6cf5 100644 --- a/lib/CodeGen/MachineOperand.cpp +++ b/lib/CodeGen/MachineOperand.cpp @@ -1,9 +1,8 @@ //===- lib/CodeGen/MachineOperand.cpp -------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -25,6 +24,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/ModuleSlotTracker.h" +#include "llvm/MC/MCDwarf.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" @@ -181,6 +181,19 @@ void MachineOperand::ChangeToES(const char *SymName, setTargetFlags(TargetFlags); } +void MachineOperand::ChangeToGA(const GlobalValue *GV, int64_t Offset, + unsigned char TargetFlags) { + assert((!isReg() || !isTied()) && + "Cannot change a tied operand into a global address"); + + removeRegFromUses(); + + OpKind = MO_GlobalAddress; + Contents.OffsetedInfo.Val.GV = GV; + setOffset(Offset); + setTargetFlags(TargetFlags); +} + void MachineOperand::ChangeToMCSymbol(MCSymbol *Sym) { assert((!isReg() || !isTied()) && "Cannot change a tied operand into an MCSymbol"); @@ -329,7 +342,7 @@ hash_code llvm::hash_value(const MachineOperand &MO) { switch (MO.getType()) { case MachineOperand::MO_Register: // Register operands don't have target flags. - return hash_combine(MO.getType(), MO.getReg(), MO.getSubReg(), MO.isDef()); + return hash_combine(MO.getType(), (unsigned)MO.getReg(), MO.getSubReg(), MO.isDef()); case MachineOperand::MO_Immediate: return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getImm()); case MachineOperand::MO_CImmediate: @@ -348,7 +361,7 @@ hash_code llvm::hash_value(const MachineOperand &MO) { return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex()); case MachineOperand::MO_ExternalSymbol: return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getOffset(), - MO.getSymbolName()); + StringRef(MO.getSymbolName())); case MachineOperand::MO_GlobalAddress: return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getGlobal(), MO.getOffset()); @@ -994,7 +1007,7 @@ MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f, assert((PtrInfo.V.isNull() || PtrInfo.V.is<const PseudoSourceValue *>() || isa<PointerType>(PtrInfo.V.get<const Value *>()->getType())) && "invalid pointer value"); - assert(getBaseAlignment() == a && "Alignment is not a power of 2!"); + assert(getBaseAlignment() == a && a != 0 && "Alignment is not a power of 2!"); assert((isLoad() || isStore()) && "Not a load/store!"); AtomicInfo.SSID = static_cast<unsigned>(SSID); @@ -1125,7 +1138,7 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, printLLVMNameWithoutPrefix( OS, cast<ExternalSymbolPseudoSourceValue>(PVal)->getSymbol()); break; - case PseudoSourceValue::TargetCustom: + default: // FIXME: This is not necessarily the correct MIR serialization format for // a custom pseudo source value, but at least it allows // -print-machineinstrs to work on a target with custom pseudo source diff --git a/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp index 906d5560d568..27db9106b337 100644 --- a/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp +++ b/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp @@ -1,9 +1,8 @@ ///===- MachineOptimizationRemarkEmitter.cpp - Opt Diagnostic -*- C++ -*---===// /// -/// The LLVM Compiler Infrastructure -/// -/// This file is distributed under the University of Illinois Open Source -/// License. See LICENSE.TXT for details. +/// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +/// See https://llvm.org/LICENSE.txt for license information. +/// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception /// ///===---------------------------------------------------------------------===// /// \file diff --git a/lib/CodeGen/MachineOutliner.cpp b/lib/CodeGen/MachineOutliner.cpp index ad96c0e579e4..80a235aeaa5c 100644 --- a/lib/CodeGen/MachineOutliner.cpp +++ b/lib/CodeGen/MachineOutliner.cpp @@ -1,9 +1,8 @@ //===---- MachineOutliner.cpp - Outline instructions -----------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// @@ -74,8 +73,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include <functional> -#include <map> -#include <sstream> #include <tuple> #include <vector> @@ -1095,19 +1092,15 @@ MachineOutliner::createOutlinedFunction(Module &M, OutlinedFunction &OF, InstructionMapper &Mapper, unsigned Name) { - // Create the function name. This should be unique. For now, just hash the - // module name and include it in the function name plus the number of this - // function. - std::ostringstream NameStream; + // Create the function name. This should be unique. // FIXME: We should have a better naming scheme. This should be stable, // regardless of changes to the outliner's cost model/traversal order. - NameStream << "OUTLINED_FUNCTION_" << Name; + std::string FunctionName = ("OUTLINED_FUNCTION_" + Twine(Name)).str(); // Create the function using an IR-level function. LLVMContext &C = M.getContext(); - Function *F = dyn_cast<Function>( - M.getOrInsertFunction(NameStream.str(), Type::getVoidTy(C))); - assert(F && "Function was null!"); + Function *F = Function::Create(FunctionType::get(Type::getVoidTy(C), false), + Function::ExternalLinkage, FunctionName, M); // NOTE: If this is linkonceodr, then we can take advantage of linker deduping // which gives us better results when we outline from linkonceodr functions. @@ -1205,11 +1198,10 @@ bool MachineOutliner::outline(Module &M, unsigned OutlinedFunctionNum = 0; // Sort by benefit. The most beneficial functions should be outlined first. - std::stable_sort( - FunctionList.begin(), FunctionList.end(), - [](const OutlinedFunction &LHS, const OutlinedFunction &RHS) { - return LHS.getBenefit() > RHS.getBenefit(); - }); + llvm::stable_sort(FunctionList, [](const OutlinedFunction &LHS, + const OutlinedFunction &RHS) { + return LHS.getBenefit() > RHS.getBenefit(); + }); // Walk over each function, outlining them as we go along. Functions are // outlined greedily, based off the sort above. @@ -1253,8 +1245,9 @@ bool MachineOutliner::outline(Module &M, if (MBB.getParent()->getProperties().hasProperty( MachineFunctionProperties::Property::TracksLiveness)) { // Helper lambda for adding implicit def operands to the call - // instruction. - auto CopyDefs = [&CallInst](MachineInstr &MI) { + // instruction. It also updates call site information for moved + // code. + auto CopyDefsAndUpdateCalls = [&CallInst](MachineInstr &MI) { for (MachineOperand &MOP : MI.operands()) { // Skip over anything that isn't a register. if (!MOP.isReg()) @@ -1266,13 +1259,16 @@ bool MachineOutliner::outline(Module &M, MOP.getReg(), true, /* isDef = true */ true /* isImp = true */)); } + if (MI.isCall()) + MI.getMF()->updateCallSiteInfo(&MI); }; // Copy over the defs in the outlined range. // First inst in outlined range <-- Anything that's defined in this // ... .. range has to be added as an // implicit Last inst in outlined range <-- def to the call - // instruction. - std::for_each(CallInst, std::next(EndIt), CopyDefs); + // instruction. Also remove call site information for outlined block + // of code. + std::for_each(CallInst, std::next(EndIt), CopyDefsAndUpdateCalls); } // Erase from the point after where the call was inserted up to, and diff --git a/lib/CodeGen/MachinePipeliner.cpp b/lib/CodeGen/MachinePipeliner.cpp index 4d451bdd7f69..54df522d371a 100644 --- a/lib/CodeGen/MachinePipeliner.cpp +++ b/lib/CodeGen/MachinePipeliner.cpp @@ -1,9 +1,8 @@ //===- MachinePipeliner.cpp - Machine Software Pipeliner Pass -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -97,6 +96,14 @@ using namespace llvm; STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline"); STATISTIC(NumPipelined, "Number of loops software pipelined"); STATISTIC(NumNodeOrderIssues, "Number of node order issues found"); +STATISTIC(NumFailBranch, "Pipeliner abort due to unknown branch"); +STATISTIC(NumFailLoop, "Pipeliner abort due to unsupported loop"); +STATISTIC(NumFailPreheader, "Pipeliner abort due to missing preheader"); +STATISTIC(NumFailLargeMaxMII, "Pipeliner abort due to MaxMII too large"); +STATISTIC(NumFailZeroMII, "Pipeliner abort due to zero MII"); +STATISTIC(NumFailNoSchedule, "Pipeliner abort due to no schedule found"); +STATISTIC(NumFailZeroStage, "Pipeliner abort due to zero stage"); +STATISTIC(NumFailLargeMaxStage, "Pipeliner abort due to too many stages"); /// A command line option to turn software pipelining on or off. static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), @@ -141,6 +148,11 @@ static cl::opt<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii", cl::ReallyHidden, cl::init(false), cl::ZeroOrMore, cl::desc("Ignore RecMII")); +static cl::opt<bool> SwpShowResMask("pipeliner-show-mask", cl::Hidden, + cl::init(false)); +static cl::opt<bool> SwpDebugResource("pipeliner-dbg-res", cl::Hidden, + cl::init(false)); + namespace llvm { // A command line option to enable the CopyToPhi DAG mutation. @@ -180,6 +192,16 @@ bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) { !EnableSWPOptSize.getPosition()) return false; + if (!mf.getSubtarget().enableMachinePipeliner()) + return false; + + // Cannot pipeline loops without instruction itineraries if we are using + // DFA for the pipeliner. + if (mf.getSubtarget().useDFAforSMS() && + (!mf.getSubtarget().getInstrItineraryData() || + mf.getSubtarget().getInstrItineraryData()->isEmpty())) + return false; + MF = &mf; MLI = &getAnalysis<MachineLoopInfo>(); MDT = &getAnalysis<MachineDominatorTree>(); @@ -211,8 +233,11 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) { } #endif - if (!canPipelineLoop(L)) + setPragmaPipelineOptions(L); + if (!canPipelineLoop(L)) { + LLVM_DEBUG(dbgs() << "\n!!! Can not pipeline loop.\n"); return Changed; + } ++NumTrytoPipeline; @@ -221,6 +246,50 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) { return Changed; } +void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) { + MachineBasicBlock *LBLK = L.getTopBlock(); + + if (LBLK == nullptr) + return; + + const BasicBlock *BBLK = LBLK->getBasicBlock(); + if (BBLK == nullptr) + return; + + const Instruction *TI = BBLK->getTerminator(); + if (TI == nullptr) + return; + + MDNode *LoopID = TI->getMetadata(LLVMContext::MD_loop); + if (LoopID == nullptr) + return; + + assert(LoopID->getNumOperands() > 0 && "requires atleast one operand"); + assert(LoopID->getOperand(0) == LoopID && "invalid loop"); + + for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) { + MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); + + if (MD == nullptr) + continue; + + MDString *S = dyn_cast<MDString>(MD->getOperand(0)); + + if (S == nullptr) + continue; + + if (S->getString() == "llvm.loop.pipeline.initiationinterval") { + assert(MD->getNumOperands() == 2 && + "Pipeline initiation interval hint metadata should have two operands."); + II_setByPragma = + mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue(); + assert(II_setByPragma >= 1 && "Pipeline initiation interval must be positive."); + } else if (S->getString() == "llvm.loop.pipeline.disable") { + disabledByPragma = true; + } + } +} + /// Return true if the loop can be software pipelined. The algorithm is /// restricted to loops with a single basic block. Make sure that the /// branch in the loop can be analyzed. @@ -228,21 +297,36 @@ bool MachinePipeliner::canPipelineLoop(MachineLoop &L) { if (L.getNumBlocks() != 1) return false; + if (disabledByPragma) + return false; + // Check if the branch can't be understood because we can't do pipelining // if that's the case. LI.TBB = nullptr; LI.FBB = nullptr; LI.BrCond.clear(); - if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond)) + if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond)) { + LLVM_DEBUG( + dbgs() << "Unable to analyzeBranch, can NOT pipeline current Loop\n"); + NumFailBranch++; return false; + } LI.LoopInductionVar = nullptr; LI.LoopCompare = nullptr; - if (TII->analyzeLoop(L, LI.LoopInductionVar, LI.LoopCompare)) + if (TII->analyzeLoop(L, LI.LoopInductionVar, LI.LoopCompare)) { + LLVM_DEBUG( + dbgs() << "Unable to analyzeLoop, can NOT pipeline current Loop\n"); + NumFailLoop++; return false; + } - if (!L.getLoopPreheader()) + if (!L.getLoopPreheader()) { + LLVM_DEBUG( + dbgs() << "Preheader not found, can NOT pipeline current Loop\n"); + NumFailPreheader++; return false; + } // Remove any subregisters from inputs to phi nodes. preprocessPhiNodes(*L.getHeader()); @@ -286,7 +370,8 @@ void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) { bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) { assert(L.getBlocks().size() == 1 && "SMS works on single blocks only."); - SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo); + SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo, + II_setByPragma); MachineBasicBlock *MBB = L.getHeader(); // The kernel should not include any terminator instructions. These @@ -309,6 +394,20 @@ bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) { return SMS.hasNewSchedule(); } +void SwingSchedulerDAG::setMII(unsigned ResMII, unsigned RecMII) { + if (II_setByPragma > 0) + MII = II_setByPragma; + else + MII = std::max(ResMII, RecMII); +} + +void SwingSchedulerDAG::setMAX_II() { + if (II_setByPragma > 0) + MAX_II = II_setByPragma; + else + MAX_II = MII + 10; +} + /// We override the schedule function in ScheduleDAGInstrs to implement the /// scheduling part of the Swing Modulo Scheduling algorithm. void SwingSchedulerDAG::schedule() { @@ -335,17 +434,28 @@ void SwingSchedulerDAG::schedule() { if (SwpIgnoreRecMII) RecMII = 0; - MII = std::max(ResMII, RecMII); - LLVM_DEBUG(dbgs() << "MII = " << MII << " (rec=" << RecMII - << ", res=" << ResMII << ")\n"); + setMII(ResMII, RecMII); + setMAX_II(); + + LLVM_DEBUG(dbgs() << "MII = " << MII << " MAX_II = " << MAX_II + << " (rec=" << RecMII << ", res=" << ResMII << ")\n"); // Can't schedule a loop without a valid MII. - if (MII == 0) + if (MII == 0) { + LLVM_DEBUG( + dbgs() + << "0 is not a valid Minimal Initiation Interval, can NOT schedule\n"); + NumFailZeroMII++; return; + } // Don't pipeline large loops. - if (SwpMaxMii != -1 && (int)MII > SwpMaxMii) + if (SwpMaxMii != -1 && (int)MII > SwpMaxMii) { + LLVM_DEBUG(dbgs() << "MII > " << SwpMaxMii + << ", we don't pipleline large loops\n"); + NumFailLargeMaxMII++; return; + } computeNodeFunctions(NodeSets); @@ -362,7 +472,7 @@ void SwingSchedulerDAG::schedule() { } }); - std::stable_sort(NodeSets.begin(), NodeSets.end(), std::greater<NodeSet>()); + llvm::stable_sort(NodeSets, std::greater<NodeSet>()); groupRemainingNodes(NodeSets); @@ -383,17 +493,27 @@ void SwingSchedulerDAG::schedule() { SMSchedule Schedule(Pass.MF); Scheduled = schedulePipeline(Schedule); - if (!Scheduled) + if (!Scheduled){ + LLVM_DEBUG(dbgs() << "No schedule found, return\n"); + NumFailNoSchedule++; return; + } unsigned numStages = Schedule.getMaxStageCount(); // No need to generate pipeline if there are no overlapped iterations. - if (numStages == 0) + if (numStages == 0) { + LLVM_DEBUG( + dbgs() << "No overlapped iterations, no need to generate pipeline\n"); + NumFailZeroStage++; return; - + } // Check that the maximum stage count is less than user-defined limit. - if (SwpMaxStages > -1 && (int)numStages > SwpMaxStages) + if (SwpMaxStages > -1 && (int)numStages > SwpMaxStages) { + LLVM_DEBUG(dbgs() << "numStages:" << numStages << ">" << SwpMaxStages + << " : too many stages, abort\n"); + NumFailLargeMaxStage++; return; + } generatePipelinedLoop(Schedule); ++NumPipelined; @@ -467,7 +587,8 @@ static bool isSuccOrder(SUnit *SUa, SUnit *SUb) { /// Return true if the instruction causes a chain between memory /// references before and after it. static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA) { - return MI.isCall() || MI.hasUnmodeledSideEffects() || + return MI.isCall() || MI.mayRaiseFPException() || + MI.hasUnmodeledSideEffects() || (MI.hasOrderedMemoryRef() && (!MI.mayLoad() || !MI.isDereferenceableInvariantLoad(AA))); } @@ -475,16 +596,16 @@ static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA) { /// Return the underlying objects for the memory references of an instruction. /// This function calls the code in ValueTracking, but first checks that the /// instruction has a memory operand. -static void getUnderlyingObjects(MachineInstr *MI, - SmallVectorImpl<Value *> &Objs, +static void getUnderlyingObjects(const MachineInstr *MI, + SmallVectorImpl<const Value *> &Objs, const DataLayout &DL) { if (!MI->hasOneMemOperand()) return; MachineMemOperand *MM = *MI->memoperands_begin(); if (!MM->getValue()) return; - GetUnderlyingObjects(const_cast<Value *>(MM->getValue()), Objs, DL); - for (Value *V : Objs) { + GetUnderlyingObjects(MM->getValue(), Objs, DL); + for (const Value *V : Objs) { if (!isIdentifiedObject(V)) { Objs.clear(); return; @@ -498,7 +619,7 @@ static void getUnderlyingObjects(MachineInstr *MI, /// dependence. This code is very similar to the code in ScheduleDAGInstrs /// but that code doesn't create loop carried dependences. void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) { - MapVector<Value *, SmallVector<SUnit *, 4>> PendingLoads; + MapVector<const Value *, SmallVector<SUnit *, 4>> PendingLoads; Value *UnknownValue = UndefValue::get(Type::getVoidTy(MF.getFunction().getContext())); for (auto &SU : SUnits) { @@ -506,7 +627,7 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) { if (isDependenceBarrier(MI, AA)) PendingLoads.clear(); else if (MI.mayLoad()) { - SmallVector<Value *, 4> Objs; + SmallVector<const Value *, 4> Objs; getUnderlyingObjects(&MI, Objs, MF.getDataLayout()); if (Objs.empty()) Objs.push_back(UnknownValue); @@ -515,12 +636,12 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) { SUs.push_back(&SU); } } else if (MI.mayStore()) { - SmallVector<Value *, 4> Objs; + SmallVector<const Value *, 4> Objs; getUnderlyingObjects(&MI, Objs, MF.getDataLayout()); if (Objs.empty()) Objs.push_back(UnknownValue); for (auto V : Objs) { - MapVector<Value *, SmallVector<SUnit *, 4>>::iterator I = + MapVector<const Value *, SmallVector<SUnit *, 4>>::iterator I = PendingLoads.find(V); if (I == PendingLoads.end()) continue; @@ -531,7 +652,7 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) { // First, perform the cheaper check that compares the base register. // If they are the same and the load offset is less than the store // offset, then mark the dependence as loop carried potentially. - MachineOperand *BaseOp1, *BaseOp2; + const MachineOperand *BaseOp1, *BaseOp2; int64_t Offset1, Offset2; if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1, TRI) && TII->getMemOperandWithOffset(MI, BaseOp2, Offset2, TRI)) { @@ -744,27 +865,55 @@ namespace { // the number of functional unit choices. struct FuncUnitSorter { const InstrItineraryData *InstrItins; + const MCSubtargetInfo *STI; DenseMap<unsigned, unsigned> Resources; - FuncUnitSorter(const InstrItineraryData *IID) : InstrItins(IID) {} + FuncUnitSorter(const TargetSubtargetInfo &TSI) + : InstrItins(TSI.getInstrItineraryData()), STI(&TSI) {} // Compute the number of functional unit alternatives needed // at each stage, and take the minimum value. We prioritize the // instructions by the least number of choices first. unsigned minFuncUnits(const MachineInstr *Inst, unsigned &F) const { - unsigned schedClass = Inst->getDesc().getSchedClass(); + unsigned SchedClass = Inst->getDesc().getSchedClass(); unsigned min = UINT_MAX; - for (const InstrStage *IS = InstrItins->beginStage(schedClass), - *IE = InstrItins->endStage(schedClass); - IS != IE; ++IS) { - unsigned funcUnits = IS->getUnits(); - unsigned numAlternatives = countPopulation(funcUnits); - if (numAlternatives < min) { - min = numAlternatives; - F = funcUnits; + if (InstrItins && !InstrItins->isEmpty()) { + for (const InstrStage &IS : + make_range(InstrItins->beginStage(SchedClass), + InstrItins->endStage(SchedClass))) { + unsigned funcUnits = IS.getUnits(); + unsigned numAlternatives = countPopulation(funcUnits); + if (numAlternatives < min) { + min = numAlternatives; + F = funcUnits; + } } + return min; + } + if (STI && STI->getSchedModel().hasInstrSchedModel()) { + const MCSchedClassDesc *SCDesc = + STI->getSchedModel().getSchedClassDesc(SchedClass); + if (!SCDesc->isValid()) + // No valid Schedule Class Desc for schedClass, should be + // Pseudo/PostRAPseudo + return min; + + for (const MCWriteProcResEntry &PRE : + make_range(STI->getWriteProcResBegin(SCDesc), + STI->getWriteProcResEnd(SCDesc))) { + if (!PRE.Cycles) + continue; + const MCProcResourceDesc *ProcResource = + STI->getSchedModel().getProcResource(PRE.ProcResourceIdx); + unsigned NumUnits = ProcResource->NumUnits; + if (NumUnits < min) { + min = NumUnits; + F = PRE.ProcResourceIdx; + } + } + return min; } - return min; + llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!"); } // Compute the critical resources needed by the instruction. This @@ -774,13 +923,34 @@ struct FuncUnitSorter { // the same, highly used, functional unit have high priority. void calcCriticalResources(MachineInstr &MI) { unsigned SchedClass = MI.getDesc().getSchedClass(); - for (const InstrStage *IS = InstrItins->beginStage(SchedClass), - *IE = InstrItins->endStage(SchedClass); - IS != IE; ++IS) { - unsigned FuncUnits = IS->getUnits(); - if (countPopulation(FuncUnits) == 1) - Resources[FuncUnits]++; + if (InstrItins && !InstrItins->isEmpty()) { + for (const InstrStage &IS : + make_range(InstrItins->beginStage(SchedClass), + InstrItins->endStage(SchedClass))) { + unsigned FuncUnits = IS.getUnits(); + if (countPopulation(FuncUnits) == 1) + Resources[FuncUnits]++; + } + return; + } + if (STI && STI->getSchedModel().hasInstrSchedModel()) { + const MCSchedClassDesc *SCDesc = + STI->getSchedModel().getSchedClassDesc(SchedClass); + if (!SCDesc->isValid()) + // No valid Schedule Class Desc for schedClass, should be + // Pseudo/PostRAPseudo + return; + + for (const MCWriteProcResEntry &PRE : + make_range(STI->getWriteProcResBegin(SCDesc), + STI->getWriteProcResEnd(SCDesc))) { + if (!PRE.Cycles) + continue; + Resources[PRE.ProcResourceIdx]++; + } + return; } + llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!"); } /// Return true if IS1 has less priority than IS2. @@ -803,14 +973,15 @@ struct FuncUnitSorter { /// to add it to each existing DFA, until a legal space is found. If the /// instruction cannot be reserved in an existing DFA, we create a new one. unsigned SwingSchedulerDAG::calculateResMII() { - SmallVector<DFAPacketizer *, 8> Resources; + + LLVM_DEBUG(dbgs() << "calculateResMII:\n"); + SmallVector<ResourceManager*, 8> Resources; MachineBasicBlock *MBB = Loop.getHeader(); - Resources.push_back(TII->CreateTargetScheduleState(MF.getSubtarget())); + Resources.push_back(new ResourceManager(&MF.getSubtarget())); // Sort the instructions by the number of available choices for scheduling, // least to most. Use the number of critical resources as the tie breaker. - FuncUnitSorter FUS = - FuncUnitSorter(MF.getSubtarget().getInstrItineraryData()); + FuncUnitSorter FUS = FuncUnitSorter(MF.getSubtarget()); for (MachineBasicBlock::iterator I = MBB->getFirstNonPHI(), E = MBB->getFirstTerminator(); I != E; ++I) @@ -832,33 +1003,40 @@ unsigned SwingSchedulerDAG::calculateResMII() { // DFA is needed for each cycle. unsigned NumCycles = getSUnit(MI)->Latency; unsigned ReservedCycles = 0; - SmallVectorImpl<DFAPacketizer *>::iterator RI = Resources.begin(); - SmallVectorImpl<DFAPacketizer *>::iterator RE = Resources.end(); + SmallVectorImpl<ResourceManager *>::iterator RI = Resources.begin(); + SmallVectorImpl<ResourceManager *>::iterator RE = Resources.end(); + LLVM_DEBUG({ + dbgs() << "Trying to reserve resource for " << NumCycles + << " cycles for \n"; + MI->dump(); + }); for (unsigned C = 0; C < NumCycles; ++C) while (RI != RE) { - if ((*RI++)->canReserveResources(*MI)) { + if ((*RI)->canReserveResources(*MI)) { + (*RI)->reserveResources(*MI); ++ReservedCycles; break; } + RI++; } - // Start reserving resources using existing DFAs. - for (unsigned C = 0; C < ReservedCycles; ++C) { - --RI; - (*RI)->reserveResources(*MI); - } + LLVM_DEBUG(dbgs() << "ReservedCycles:" << ReservedCycles + << ", NumCycles:" << NumCycles << "\n"); // Add new DFAs, if needed, to reserve resources. for (unsigned C = ReservedCycles; C < NumCycles; ++C) { - DFAPacketizer *NewResource = - TII->CreateTargetScheduleState(MF.getSubtarget()); + LLVM_DEBUG(if (SwpDebugResource) dbgs() + << "NewResource created to reserve resources" + << "\n"); + ResourceManager *NewResource = new ResourceManager(&MF.getSubtarget()); assert(NewResource->canReserveResources(*MI) && "Reserve error."); NewResource->reserveResources(*MI); Resources.push_back(NewResource); } } int Resmii = Resources.size(); + LLVM_DEBUG(dbgs() << "Retrun Res MII:" << Resmii << "\n"); // Delete the memory for each of the DFAs that were created earlier. - for (DFAPacketizer *RI : Resources) { - DFAPacketizer *D = RI; + for (ResourceManager *RI : Resources) { + ResourceManager *D = RI; delete D; } Resources.clear(); @@ -1517,7 +1695,7 @@ void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) { } } -/// Add the node to the set, and add all is its connected nodes to the set. +/// Add the node to the set, and add all of its connected nodes to the set. void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet, SetVector<SUnit *> &NodesAdded) { NewSet.insert(SU); @@ -1741,12 +1919,16 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) { /// Process the nodes in the computed order and create the pipelined schedule /// of the instructions, if possible. Return true if a schedule is found. bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) { - if (NodeOrder.empty()) + + if (NodeOrder.empty()){ + LLVM_DEBUG(dbgs() << "NodeOrder is empty! abort scheduling\n" ); return false; + } bool scheduleFound = false; + unsigned II = 0; // Keep increasing II until a valid schedule is found. - for (unsigned II = MII; II < MII + 10 && !scheduleFound; ++II) { + for (II = MII; II <= MAX_II && !scheduleFound; ++II) { Schedule.reset(); Schedule.setInitiationInterval(II); LLVM_DEBUG(dbgs() << "Try to schedule with " << II << "\n"); @@ -1767,13 +1949,14 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) { Schedule.computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart, II, this); LLVM_DEBUG({ + dbgs() << "\n"; dbgs() << "Inst (" << SU->NodeNum << ") "; SU->getInstr()->dump(); dbgs() << "\n"; }); LLVM_DEBUG({ - dbgs() << "\tes: " << EarlyStart << " ls: " << LateStart - << " me: " << SchedEnd << " ms: " << SchedStart << "\n"; + dbgs() << format("\tes: %8x ls: %8x me: %8x ms: %8x\n", EarlyStart, + LateStart, SchedEnd, SchedStart); }); if (EarlyStart > LateStart || SchedEnd < EarlyStart || @@ -1818,7 +2001,8 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) { scheduleFound = Schedule.isValidSchedule(this); } - LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound << "\n"); + LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound << " (II=" << II + << ")\n"); if (scheduleFound) Schedule.finalizeSchedule(this); @@ -1847,6 +2031,10 @@ void SwingSchedulerDAG::generatePipelinedLoop(SMSchedule &Schedule) { InstrMapTy InstrMap; SmallVector<MachineBasicBlock *, 4> PrologBBs; + + MachineBasicBlock *PreheaderBB = MLI->getLoopFor(BB)->getLoopPreheader(); + assert(PreheaderBB != nullptr && + "Need to add code to handle loops w/o preheader"); // Generate the prolog instructions that set up the pipeline. generateProlog(Schedule, MaxStageCount, KernelBB, VRMap, PrologBBs); MF.insert(BB->getIterator(), KernelBB); @@ -1903,7 +2091,7 @@ void SwingSchedulerDAG::generatePipelinedLoop(SMSchedule &Schedule) { removeDeadInstructions(KernelBB, EpilogBBs); // Add branches between prolog and epilog blocks. - addBranches(PrologBBs, KernelBB, EpilogBBs, Schedule, VRMap); + addBranches(*PreheaderBB, PrologBBs, KernelBB, EpilogBBs, Schedule, VRMap); // Remove the original loop since it's no longer referenced. for (auto &I : *BB) @@ -2242,7 +2430,7 @@ void SwingSchedulerDAG::generateExistingPhis( // Use the value defined by the Phi, unless we're generating the first // epilog and the Phi refers to a Phi in a different stage. else if (VRMap[PrevStage - np].count(Def) && - (!LoopDefIsPhi || PrevStage != LastStageNum)) + (!LoopDefIsPhi || (PrevStage != LastStageNum) || (LoopValStage == StageScheduled))) PhiOp2 = VRMap[PrevStage - np][Def]; } @@ -2588,7 +2776,8 @@ static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) { /// Create branches from each prolog basic block to the appropriate epilog /// block. These edges are needed if the loop ends before reaching the /// kernel. -void SwingSchedulerDAG::addBranches(MBBVectorTy &PrologBBs, +void SwingSchedulerDAG::addBranches(MachineBasicBlock &PreheaderBB, + MBBVectorTy &PrologBBs, MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs, SMSchedule &Schedule, ValueMapTy *VRMap) { @@ -2615,8 +2804,8 @@ void SwingSchedulerDAG::addBranches(MBBVectorTy &PrologBBs, // Check if the LOOP0 has already been removed. If so, then there is no need // to reduce the trip count. if (LC != 0) - LC = TII->reduceLoopCount(*Prolog, IndVar, *Cmp, Cond, PrevInsts, j, - MaxIter); + LC = TII->reduceLoopCount(*Prolog, PreheaderBB, IndVar, *Cmp, Cond, + PrevInsts, j, MaxIter); // Record the value of the first trip count, which is used to determine if // branches and blocks can be removed for constant trip counts. @@ -2657,7 +2846,7 @@ void SwingSchedulerDAG::addBranches(MBBVectorTy &PrologBBs, /// during each iteration. Set Delta to the amount of the change. bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) { const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - MachineOperand *BaseOp; + const MachineOperand *BaseOp; int64_t Offset; if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI)) return false; @@ -2698,7 +2887,9 @@ void SwingSchedulerDAG::updateMemOperands(MachineInstr &NewMI, return; SmallVector<MachineMemOperand *, 2> NewMMOs; for (MachineMemOperand *MMO : NewMI.memoperands()) { - if (MMO->isVolatile() || (MMO->isInvariant() && MMO->isDereferenceable()) || + // TODO: Figure out whether isAtomic is really necessary (see D57601). + if (MMO->isVolatile() || MMO->isAtomic() || + (MMO->isInvariant() && MMO->isDereferenceable()) || (!MMO->getValue())) { NewMMOs.push_back(MMO); continue; @@ -3058,6 +3249,7 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep, // Assume ordered loads and stores may have a loop carried dependence. if (SI->hasUnmodeledSideEffects() || DI->hasUnmodeledSideEffects() || + SI->mayRaiseFPException() || DI->mayRaiseFPException() || SI->hasOrderedMemoryRef() || DI->hasOrderedMemoryRef()) return true; @@ -3069,7 +3261,7 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep, if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD)) return true; - MachineOperand *BaseOpS, *BaseOpD; + const MachineOperand *BaseOpS, *BaseOpD; int64_t OffsetS, OffsetD; const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!TII->getMemOperandWithOffset(*SI, BaseOpS, OffsetS, TRI) || @@ -3097,12 +3289,14 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep, // This is the main test, which checks the offset values and the loop // increment value to determine if the accesses may be loop carried. - if (OffsetS >= OffsetD) - return OffsetS + AccessSizeS > DeltaS; - else - return OffsetD + AccessSizeD > DeltaD; + if (AccessSizeS == MemoryLocation::UnknownSize || + AccessSizeD == MemoryLocation::UnknownSize) + return true; - return true; + if (DeltaS != DeltaD || DeltaS < AccessSizeS || DeltaD < AccessSizeD) + return true; + + return (OffsetS + (int64_t)AccessSizeS < OffsetD + (int64_t)AccessSizeD); } void SwingSchedulerDAG::postprocessDAG() { @@ -3117,6 +3311,10 @@ void SwingSchedulerDAG::postprocessDAG() { /// the relative values of StartCycle and EndCycle. bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) { bool forward = true; + LLVM_DEBUG({ + dbgs() << "Trying to insert node between " << StartCycle << " and " + << EndCycle << " II: " << II << "\n"; + }); if (StartCycle > EndCycle) forward = false; @@ -3125,8 +3323,9 @@ bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) { for (int curCycle = StartCycle; curCycle != termCycle; forward ? ++curCycle : --curCycle) { - // Add the already scheduled instructions at the specified cycle to the DFA. - Resources->clearResources(); + // Add the already scheduled instructions at the specified cycle to the + // DFA. + ProcItinResources.clearResources(); for (int checkCycle = FirstCycle + ((curCycle - FirstCycle) % II); checkCycle <= LastCycle; checkCycle += II) { std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[checkCycle]; @@ -3136,13 +3335,13 @@ bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) { I != E; ++I) { if (ST.getInstrInfo()->isZeroCost((*I)->getInstr()->getOpcode())) continue; - assert(Resources->canReserveResources(*(*I)->getInstr()) && + assert(ProcItinResources.canReserveResources(*(*I)->getInstr()) && "These instructions have already been scheduled."); - Resources->reserveResources(*(*I)->getInstr()); + ProcItinResources.reserveResources(*(*I)->getInstr()); } } if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) || - Resources->canReserveResources(*SU->getInstr())) { + ProcItinResources.canReserveResources(*SU->getInstr())) { LLVM_DEBUG({ dbgs() << "\tinsert at cycle " << curCycle << " "; SU->getInstr()->dump(); @@ -3360,6 +3559,14 @@ void SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU, if (Pos < MoveUse) MoveUse = Pos; } + // We did not handle HW dependences in previous for loop, + // and we normally set Latency = 0 for Anti deps, + // so may have nodes in same cycle with Anti denpendent on HW regs. + else if (S.getKind() == SDep::Anti && stageScheduled(*I) == StageInst1) { + OrderBeforeUse = true; + if ((MoveUse == 0) || (Pos < MoveUse)) + MoveUse = Pos; + } } for (auto &P : SU->Preds) { if (P.getSUnit() != *I) @@ -3523,9 +3730,8 @@ void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const { for (SDep &PredEdge : SU->Preds) { SUnit *PredSU = PredEdge.getSUnit(); - unsigned PredIndex = - std::get<1>(*std::lower_bound(Indices.begin(), Indices.end(), - std::make_pair(PredSU, 0), CompareKey)); + unsigned PredIndex = std::get<1>( + *llvm::lower_bound(Indices, std::make_pair(PredSU, 0), CompareKey)); if (!PredSU->getInstr()->isPHI() && PredIndex < Index) { PredBefore = true; Pred = PredSU; @@ -3535,9 +3741,13 @@ void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const { for (SDep &SuccEdge : SU->Succs) { SUnit *SuccSU = SuccEdge.getSUnit(); - unsigned SuccIndex = - std::get<1>(*std::lower_bound(Indices.begin(), Indices.end(), - std::make_pair(SuccSU, 0), CompareKey)); + // Do not process a boundary node, it was not included in NodeOrder, + // hence not in Indices either, call to std::lower_bound() below will + // return Indices.end(). + if (SuccSU->isBoundaryNode()) + continue; + unsigned SuccIndex = std::get<1>( + *llvm::lower_bound(Indices, std::make_pair(SuccSU, 0), CompareKey)); if (!SuccSU->getInstr()->isPHI() && SuccIndex < Index) { SuccBefore = true; Succ = SuccSU; @@ -3548,9 +3758,8 @@ void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const { if (PredBefore && SuccBefore && !SU->getInstr()->isPHI()) { // instructions in circuits are allowed to be scheduled // after both a successor and predecessor. - bool InCircuit = std::any_of( - Circuits.begin(), Circuits.end(), - [SU](const NodeSet &Circuit) { return Circuit.count(SU); }); + bool InCircuit = llvm::any_of( + Circuits, [SU](const NodeSet &Circuit) { return Circuit.count(SU); }); if (InCircuit) LLVM_DEBUG(dbgs() << "In a circuit, predecessor ";); else { @@ -3740,5 +3949,140 @@ LLVM_DUMP_METHOD void NodeSet::dump() const { print(dbgs()); } #endif +void ResourceManager::initProcResourceVectors( + const MCSchedModel &SM, SmallVectorImpl<uint64_t> &Masks) { + unsigned ProcResourceID = 0; + + // We currently limit the resource kinds to 64 and below so that we can use + // uint64_t for Masks + assert(SM.getNumProcResourceKinds() < 64 && + "Too many kinds of resources, unsupported"); + // Create a unique bitmask for every processor resource unit. + // Skip resource at index 0, since it always references 'InvalidUnit'. + Masks.resize(SM.getNumProcResourceKinds()); + for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) { + const MCProcResourceDesc &Desc = *SM.getProcResource(I); + if (Desc.SubUnitsIdxBegin) + continue; + Masks[I] = 1ULL << ProcResourceID; + ProcResourceID++; + } + // Create a unique bitmask for every processor resource group. + for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) { + const MCProcResourceDesc &Desc = *SM.getProcResource(I); + if (!Desc.SubUnitsIdxBegin) + continue; + Masks[I] = 1ULL << ProcResourceID; + for (unsigned U = 0; U < Desc.NumUnits; ++U) + Masks[I] |= Masks[Desc.SubUnitsIdxBegin[U]]; + ProcResourceID++; + } + LLVM_DEBUG({ + if (SwpShowResMask) { + dbgs() << "ProcResourceDesc:\n"; + for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) { + const MCProcResourceDesc *ProcResource = SM.getProcResource(I); + dbgs() << format(" %16s(%2d): Mask: 0x%08x, NumUnits:%2d\n", + ProcResource->Name, I, Masks[I], + ProcResource->NumUnits); + } + dbgs() << " -----------------\n"; + } + }); +} + +bool ResourceManager::canReserveResources(const MCInstrDesc *MID) const { + + LLVM_DEBUG({ + if (SwpDebugResource) + dbgs() << "canReserveResources:\n"; + }); + if (UseDFA) + return DFAResources->canReserveResources(MID); + + unsigned InsnClass = MID->getSchedClass(); + const MCSchedClassDesc *SCDesc = SM.getSchedClassDesc(InsnClass); + if (!SCDesc->isValid()) { + LLVM_DEBUG({ + dbgs() << "No valid Schedule Class Desc for schedClass!\n"; + dbgs() << "isPseduo:" << MID->isPseudo() << "\n"; + }); + return true; + } + + const MCWriteProcResEntry *I = STI->getWriteProcResBegin(SCDesc); + const MCWriteProcResEntry *E = STI->getWriteProcResEnd(SCDesc); + for (; I != E; ++I) { + if (!I->Cycles) + continue; + const MCProcResourceDesc *ProcResource = + SM.getProcResource(I->ProcResourceIdx); + unsigned NumUnits = ProcResource->NumUnits; + LLVM_DEBUG({ + if (SwpDebugResource) + dbgs() << format(" %16s(%2d): Count: %2d, NumUnits:%2d, Cycles:%2d\n", + ProcResource->Name, I->ProcResourceIdx, + ProcResourceCount[I->ProcResourceIdx], NumUnits, + I->Cycles); + }); + if (ProcResourceCount[I->ProcResourceIdx] >= NumUnits) + return false; + } + LLVM_DEBUG(if (SwpDebugResource) dbgs() << "return true\n\n";); + return true; +} + +void ResourceManager::reserveResources(const MCInstrDesc *MID) { + LLVM_DEBUG({ + if (SwpDebugResource) + dbgs() << "reserveResources:\n"; + }); + if (UseDFA) + return DFAResources->reserveResources(MID); + unsigned InsnClass = MID->getSchedClass(); + const MCSchedClassDesc *SCDesc = SM.getSchedClassDesc(InsnClass); + if (!SCDesc->isValid()) { + LLVM_DEBUG({ + dbgs() << "No valid Schedule Class Desc for schedClass!\n"; + dbgs() << "isPseduo:" << MID->isPseudo() << "\n"; + }); + return; + } + for (const MCWriteProcResEntry &PRE : + make_range(STI->getWriteProcResBegin(SCDesc), + STI->getWriteProcResEnd(SCDesc))) { + if (!PRE.Cycles) + continue; + ++ProcResourceCount[PRE.ProcResourceIdx]; + LLVM_DEBUG({ + if (SwpDebugResource) { + const MCProcResourceDesc *ProcResource = + SM.getProcResource(PRE.ProcResourceIdx); + dbgs() << format(" %16s(%2d): Count: %2d, NumUnits:%2d, Cycles:%2d\n", + ProcResource->Name, PRE.ProcResourceIdx, + ProcResourceCount[PRE.ProcResourceIdx], + ProcResource->NumUnits, PRE.Cycles); + } + }); + } + LLVM_DEBUG({ + if (SwpDebugResource) + dbgs() << "reserveResources: done!\n\n"; + }); +} + +bool ResourceManager::canReserveResources(const MachineInstr &MI) const { + return canReserveResources(&MI.getDesc()); +} + +void ResourceManager::reserveResources(const MachineInstr &MI) { + return reserveResources(&MI.getDesc()); +} + +void ResourceManager::clearResources() { + if (UseDFA) + return DFAResources->clearResources(); + std::fill(ProcResourceCount.begin(), ProcResourceCount.end(), 0); +} diff --git a/lib/CodeGen/MachinePostDominators.cpp b/lib/CodeGen/MachinePostDominators.cpp index 488377998cb3..7f220ed1fd8f 100644 --- a/lib/CodeGen/MachinePostDominators.cpp +++ b/lib/CodeGen/MachinePostDominators.cpp @@ -1,9 +1,8 @@ //===- MachinePostDominators.cpp -Machine Post Dominator Calculation ------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/MachineRegionInfo.cpp b/lib/CodeGen/MachineRegionInfo.cpp index 2619d8f78276..2961d456be0d 100644 --- a/lib/CodeGen/MachineRegionInfo.cpp +++ b/lib/CodeGen/MachineRegionInfo.cpp @@ -1,9 +1,8 @@ //===- lib/Codegen/MachineRegionInfo.cpp ----------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index 6e5ca45d5e5e..f0fd0405d69d 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -1,9 +1,8 @@ //===- lib/Codegen/MachineRegisterInfo.cpp --------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -155,7 +154,7 @@ unsigned MachineRegisterInfo::createIncompleteVirtualRegister(StringRef Name) { /// createVirtualRegister - Create and return a new virtual register in the /// function with the specified register class. /// -unsigned +Register MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name) { assert(RegClass && "Cannot create register without RegClass!"); @@ -170,7 +169,7 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass, return Reg; } -unsigned MachineRegisterInfo::cloneVirtualRegister(unsigned VReg, +Register MachineRegisterInfo::cloneVirtualRegister(Register VReg, StringRef Name) { unsigned Reg = createIncompleteVirtualRegister(Name); VRegInfo[Reg].first = VRegInfo[VReg].first; @@ -185,7 +184,7 @@ void MachineRegisterInfo::setType(unsigned VReg, LLT Ty) { VRegToType[VReg] = Ty; } -unsigned +Register MachineRegisterInfo::createGenericVirtualRegister(LLT Ty, StringRef Name) { // New virtual register number. unsigned Reg = createIncompleteVirtualRegister(Name); @@ -424,6 +423,13 @@ bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const { return ++UI == use_nodbg_end(); } +bool MachineRegisterInfo::hasOneNonDBGUser(unsigned RegNo) const { + use_instr_nodbg_iterator UI = use_instr_nodbg_begin(RegNo); + if (UI == use_instr_nodbg_end()) + return false; + return ++UI == use_instr_nodbg_end(); +} + /// clearKillFlags - Iterate over all the uses of the given register and /// clear the kill flag from the MachineOperand. This function is used by /// optimization passes which extend register lifetimes and need only diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp index 542491eabbf2..e8b42047b49f 100644 --- a/lib/CodeGen/MachineSSAUpdater.cpp +++ b/lib/CodeGen/MachineSSAUpdater.cpp @@ -1,9 +1,8 @@ //===- MachineSSAUpdater.cpp - Unstructured SSA Update Tool ---------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 90dad9d399fe..ae1170ad1be6 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -1,9 +1,8 @@ //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -487,13 +486,17 @@ getSchedRegions(MachineBasicBlock *MBB, MachineInstr &MI = *std::prev(I); if (isSchedBoundary(&MI, &*MBB, MF, TII)) break; - if (!MI.isDebugInstr()) + if (!MI.isDebugInstr()) { // MBB::size() uses instr_iterator to count. Here we need a bundle to // count as a single instruction. ++NumRegionInstrs; + } } - Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs)); + // It's possible we found a scheduling region that only has debug + // instructions. Don't bother scheduling these. + if (NumRegionInstrs != 0) + Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs)); } if (RegionsTopDown) @@ -605,23 +608,6 @@ LLVM_DUMP_METHOD void ReadyQueue::dump() const { // Provide a vtable anchor. ScheduleDAGMI::~ScheduleDAGMI() = default; -bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) { - return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU); -} - -bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) { - if (SuccSU != &ExitSU) { - // Do not use WillCreateCycle, it assumes SD scheduling. - // If Pred is reachable from Succ, then the edge creates a cycle. - if (Topo.IsReachable(PredDep.getSUnit(), SuccSU)) - return false; - Topo.AddPred(SuccSU, PredDep.getSUnit()); - } - SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial()); - // Return true regardless of whether a new edge needed to be inserted. - return true; -} - /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When /// NumPredsLeft reaches zero, release the successor node. /// @@ -762,8 +748,6 @@ void ScheduleDAGMI::schedule() { // Build the DAG. buildSchedGraph(AA); - Topo.InitDAGTopologicalSorting(); - postprocessDAG(); SmallVector<SUnit*, 8> TopRoots, BotRoots; @@ -1212,8 +1196,6 @@ void ScheduleDAGMILive::schedule() { LLVM_DEBUG(SchedImpl->dumpPolicy()); buildDAGWithRegPressure(); - Topo.InitDAGTopologicalSorting(); - postprocessDAG(); SmallVector<SUnit*, 8> TopRoots, BotRoots; @@ -1484,10 +1466,10 @@ namespace { class BaseMemOpClusterMutation : public ScheduleDAGMutation { struct MemOpInfo { SUnit *SU; - MachineOperand *BaseOp; + const MachineOperand *BaseOp; int64_t Offset; - MemOpInfo(SUnit *su, MachineOperand *Op, int64_t ofs) + MemOpInfo(SUnit *su, const MachineOperand *Op, int64_t ofs) : SU(su), BaseOp(Op), Offset(ofs) {} bool operator<(const MemOpInfo &RHS) const { @@ -1533,7 +1515,7 @@ public: void apply(ScheduleDAGInstrs *DAGInstrs) override; protected: - void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG); + void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG); }; class StoreClusterMutation : public BaseMemOpClusterMutation { @@ -1570,10 +1552,10 @@ createStoreClusterDAGMutation(const TargetInstrInfo *TII, } // end namespace llvm void BaseMemOpClusterMutation::clusterNeighboringMemOps( - ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) { + ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG) { SmallVector<MemOpInfo, 32> MemOpRecords; for (SUnit *SU : MemOps) { - MachineOperand *BaseOp; + const MachineOperand *BaseOp; int64_t Offset; if (TII->getMemOperandWithOffset(*SU->getInstr(), BaseOp, Offset, TRI)) MemOpRecords.push_back(MemOpInfo(SU, BaseOp, Offset)); @@ -1610,9 +1592,7 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps( } /// Callback from DAG postProcessing to create cluster edges for loads. -void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) { - ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); - +void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) { // Map DAG NodeNum to store chain ID. DenseMap<unsigned, unsigned> StoreChainIDs; // Map each store chain to a set of dependent MemOps. @@ -1857,9 +1837,15 @@ SchedBoundary::~SchedBoundary() { delete HazardRec; } /// Given a Count of resource usage and a Latency value, return true if a /// SchedBoundary becomes resource limited. +/// If we are checking after scheduling a node, we should return true when +/// we just reach the resource limit. static bool checkResourceLimit(unsigned LFactor, unsigned Count, - unsigned Latency) { - return (int)(Count - (Latency * LFactor)) > (int)LFactor; + unsigned Latency, bool AfterSchedNode) { + int ResCntFactor = (int)(Count - (Latency * LFactor)); + if (AfterSchedNode) + return ResCntFactor >= (int)LFactor; + else + return ResCntFactor > (int)LFactor; } void SchedBoundary::reset() { @@ -1883,6 +1869,7 @@ void SchedBoundary::reset() { ZoneCritResIdx = 0; IsResourceLimited = false; ReservedCycles.clear(); + ReservedCyclesIndex.clear(); #ifndef NDEBUG // Track the maximum number of stall cycles that could arise either from the // latency of a DAG edge or the number of cycles that a processor resource is @@ -1921,8 +1908,17 @@ init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) { SchedModel = smodel; Rem = rem; if (SchedModel->hasInstrSchedModel()) { - ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds()); - ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle); + unsigned ResourceCount = SchedModel->getNumProcResourceKinds(); + ReservedCyclesIndex.resize(ResourceCount); + ExecutedResCounts.resize(ResourceCount); + unsigned NumUnits = 0; + + for (unsigned i = 0; i < ResourceCount; ++i) { + ReservedCyclesIndex[i] = NumUnits; + NumUnits += SchedModel->getProcResource(i)->NumUnits; + } + + ReservedCycles.resize(NumUnits, InvalidCycle); } } @@ -1943,11 +1939,11 @@ unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) { return 0; } -/// Compute the next cycle at which the given processor resource can be -/// scheduled. -unsigned SchedBoundary:: -getNextResourceCycle(unsigned PIdx, unsigned Cycles) { - unsigned NextUnreserved = ReservedCycles[PIdx]; +/// Compute the next cycle at which the given processor resource unit +/// can be scheduled. +unsigned SchedBoundary::getNextResourceCycleByInstance(unsigned InstanceIdx, + unsigned Cycles) { + unsigned NextUnreserved = ReservedCycles[InstanceIdx]; // If this resource has never been used, always return cycle zero. if (NextUnreserved == InvalidCycle) return 0; @@ -1957,6 +1953,29 @@ getNextResourceCycle(unsigned PIdx, unsigned Cycles) { return NextUnreserved; } +/// Compute the next cycle at which the given processor resource can be +/// scheduled. Returns the next cycle and the index of the processor resource +/// instance in the reserved cycles vector. +std::pair<unsigned, unsigned> +SchedBoundary::getNextResourceCycle(unsigned PIdx, unsigned Cycles) { + unsigned MinNextUnreserved = InvalidCycle; + unsigned InstanceIdx = 0; + unsigned StartIndex = ReservedCyclesIndex[PIdx]; + unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits; + assert(NumberOfInstances > 0 && + "Cannot have zero instances of a ProcResource"); + + for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End; + ++I) { + unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles); + if (MinNextUnreserved > NextUnreserved) { + InstanceIdx = I; + MinNextUnreserved = NextUnreserved; + } + } + return std::make_pair(MinNextUnreserved, InstanceIdx); +} + /// Does this SU have a hazard within the current instruction group. /// /// The scheduler supports two modes of hazard recognition. The first is the @@ -1998,14 +2017,16 @@ bool SchedBoundary::checkHazard(SUnit *SU) { SchedModel->getWriteProcResEnd(SC))) { unsigned ResIdx = PE.ProcResourceIdx; unsigned Cycles = PE.Cycles; - unsigned NRCycle = getNextResourceCycle(ResIdx, Cycles); + unsigned NRCycle, InstanceIdx; + std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(ResIdx, Cycles); if (NRCycle > CurrCycle) { #ifndef NDEBUG MaxObservedStall = std::max(Cycles, MaxObservedStall); #endif LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") " - << SchedModel->getResourceName(ResIdx) << "=" - << NRCycle << "c\n"); + << SchedModel->getResourceName(ResIdx) + << '[' << InstanceIdx - ReservedCyclesIndex[ResIdx] << ']' + << "=" << NRCycle << "c\n"); return true; } } @@ -2119,7 +2140,7 @@ void SchedBoundary::bumpCycle(unsigned NextCycle) { CheckPending = true; IsResourceLimited = checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(), - getScheduledLatency()); + getScheduledLatency(), true); LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n'); @@ -2160,10 +2181,12 @@ countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) { << "c\n"); } // For reserved resources, record the highest cycle using the resource. - unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles); + unsigned NextAvailable, InstanceIdx; + std::tie(NextAvailable, InstanceIdx) = getNextResourceCycle(PIdx, Cycles); if (NextAvailable > CurrCycle) { LLVM_DEBUG(dbgs() << " Resource conflict: " - << SchedModel->getProcResource(PIdx)->Name + << SchedModel->getResourceName(PIdx) + << '[' << InstanceIdx - ReservedCyclesIndex[PIdx] << ']' << " reserved until @" << NextAvailable << "\n"); } return NextAvailable; @@ -2179,6 +2202,8 @@ void SchedBoundary::bumpNode(SUnit *SU) { HazardRec->Reset(); } HazardRec->EmitInstruction(SU); + // Scheduling an instruction may have made pending instructions available. + CheckPending = true; } // checkHazard should prevent scheduling multiple instructions per cycle that // exceed the issue width. @@ -2251,12 +2276,13 @@ void SchedBoundary::bumpNode(SUnit *SU) { PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { unsigned PIdx = PI->ProcResourceIdx; if (SchedModel->getProcResource(PIdx)->BufferSize == 0) { + unsigned ReservedUntil, InstanceIdx; + std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(PIdx, 0); if (isTop()) { - ReservedCycles[PIdx] = - std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles); - } - else - ReservedCycles[PIdx] = NextCycle; + ReservedCycles[InstanceIdx] = + std::max(ReservedUntil, NextCycle + PI->Cycles); + } else + ReservedCycles[InstanceIdx] = NextCycle; } } } @@ -2282,7 +2308,7 @@ void SchedBoundary::bumpNode(SUnit *SU) { // resource limited. If a stall occurred, bumpCycle does this. IsResourceLimited = checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(), - getScheduledLatency()); + getScheduledLatency(), true); // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle // resets CurrMOps. Loop to handle instructions with more MOps than issue in @@ -2501,7 +2527,7 @@ void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA, RemLatency = computeRemLatency(CurrZone); RemLatencyComputed = true; OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(), - OtherCount, RemLatency); + OtherCount, RemLatency, false); } // Schedule aggressively for latency in PostRA mode. We don't check for @@ -2741,8 +2767,10 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs); // After subtarget overrides, apply command line options. - if (!EnableRegPressure) + if (!EnableRegPressure) { RegionPolicy.ShouldTrackPressure = false; + RegionPolicy.ShouldTrackLaneMasks = false; + } // Check -misched-topdown/bottomup can force or unforce scheduling direction. // e.g. -misched-bottomup=false allows scheduling in both directions. diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index cdc597db6401..41db2c88ce50 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -1,9 +1,8 @@ //===- MachineSink.cpp - Sinking for machine instructions -----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -585,9 +584,8 @@ MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB, AllSuccs.push_back(DTChild->getBlock()); // Sort Successors according to their loop depth or block frequency info. - std::stable_sort( - AllSuccs.begin(), AllSuccs.end(), - [this](const MachineBasicBlock *L, const MachineBasicBlock *R) { + llvm::stable_sort( + AllSuccs, [this](const MachineBasicBlock *L, const MachineBasicBlock *R) { uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0; uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0; bool HasBlockFreq = LHSFreq != 0 && RHSFreq != 0; @@ -716,7 +714,7 @@ static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI, !PredBB->getTerminator()->getMetadata(LLVMContext::MD_make_implicit)) return false; - MachineOperand *BaseOp; + const MachineOperand *BaseOp; int64_t Offset; if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI)) return false; @@ -1203,6 +1201,9 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, } bool PostRAMachineSinking::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF.getFunction())) + return false; + bool Changed = false; const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp index e62ed3094651..f9505df4e7f4 100644 --- a/lib/CodeGen/MachineTraceMetrics.cpp +++ b/lib/CodeGen/MachineTraceMetrics.cpp @@ -1,9 +1,8 @@ //===- lib/CodeGen/MachineTraceMetrics.cpp --------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 534d3699db29..0ad792ac62cf 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -1,9 +1,8 @@ //===- MachineVerifier.cpp - Machine Code Verifier ------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -219,7 +218,7 @@ namespace { bool isAllocatable(unsigned Reg) const { return Reg < TRI->getNumRegs() && TRI->isInAllocatableClass(Reg) && - !regsReserved.test(Reg); + !regsReserved.test(Reg); } // Analysis information if available @@ -231,6 +230,9 @@ namespace { void visitMachineFunctionBefore(); void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB); void visitMachineBundleBefore(const MachineInstr *MI); + + bool verifyVectorElementMatch(LLT Ty0, LLT Ty1, const MachineInstr *MI); + void verifyPreISelGenericInstruction(const MachineInstr *MI); void visitMachineInstrBefore(const MachineInstr *MI); void visitMachineOperand(const MachineOperand *MO, unsigned MONum); void visitMachineInstrAfter(const MachineInstr *MI); @@ -838,7 +840,7 @@ void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) { if (MI->isTerminator() && !TII->isPredicated(*MI)) { if (!FirstTerminator) FirstTerminator = MI; - } else if (FirstTerminator) { + } else if (FirstTerminator && !MI->isDebugEntryValue()) { report("Non-terminator instruction after the first terminator", MI); errs() << "First terminator was:\t" << *FirstTerminator; } @@ -889,109 +891,150 @@ void MachineVerifier::verifyInlineAsm(const MachineInstr *MI) { } } -void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { - const MCInstrDesc &MCID = MI->getDesc(); - if (MI->getNumOperands() < MCID.getNumOperands()) { - report("Too few operands", MI); - errs() << MCID.getNumOperands() << " operands expected, but " - << MI->getNumOperands() << " given.\n"; +/// Check that types are consistent when two operands need to have the same +/// number of vector elements. +/// \return true if the types are valid. +bool MachineVerifier::verifyVectorElementMatch(LLT Ty0, LLT Ty1, + const MachineInstr *MI) { + if (Ty0.isVector() != Ty1.isVector()) { + report("operand types must be all-vector or all-scalar", MI); + // Generally we try to report as many issues as possible at once, but in + // this case it's not clear what should we be comparing the size of the + // scalar with: the size of the whole vector or its lane. Instead of + // making an arbitrary choice and emitting not so helpful message, let's + // avoid the extra noise and stop here. + return false; } - if (MI->isPHI()) { - if (MF->getProperties().hasProperty( - MachineFunctionProperties::Property::NoPHIs)) - report("Found PHI instruction with NoPHIs property set", MI); + if (Ty0.isVector() && Ty0.getNumElements() != Ty1.getNumElements()) { + report("operand types must preserve number of vector elements", MI); + return false; + } - if (FirstNonPHI) - report("Found PHI instruction after non-PHI", MI); - } else if (FirstNonPHI == nullptr) - FirstNonPHI = MI; + return true; +} - // Check the tied operands. - if (MI->isInlineAsm()) - verifyInlineAsm(MI); +void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { + if (isFunctionSelected) + report("Unexpected generic instruction in a Selected function", MI); - // Check the MachineMemOperands for basic consistency. - for (MachineInstr::mmo_iterator I = MI->memoperands_begin(), - E = MI->memoperands_end(); + const MCInstrDesc &MCID = MI->getDesc(); + unsigned NumOps = MI->getNumOperands(); + + // Check types. + SmallVector<LLT, 4> Types; + for (unsigned I = 0, E = std::min(MCID.getNumOperands(), NumOps); I != E; ++I) { - if ((*I)->isLoad() && !MI->mayLoad()) - report("Missing mayLoad flag", MI); - if ((*I)->isStore() && !MI->mayStore()) - report("Missing mayStore flag", MI); - } + if (!MCID.OpInfo[I].isGenericType()) + continue; + // Generic instructions specify type equality constraints between some of + // their operands. Make sure these are consistent. + size_t TypeIdx = MCID.OpInfo[I].getGenericTypeIndex(); + Types.resize(std::max(TypeIdx + 1, Types.size())); + + const MachineOperand *MO = &MI->getOperand(I); + if (!MO->isReg()) { + report("generic instruction must use register operands", MI); + continue; + } - // Debug values must not have a slot index. - // Other instructions must have one, unless they are inside a bundle. - if (LiveInts) { - bool mapped = !LiveInts->isNotInMIMap(*MI); - if (MI->isDebugInstr()) { - if (mapped) - report("Debug instruction has a slot index", MI); - } else if (MI->isInsideBundle()) { - if (mapped) - report("Instruction inside bundle has a slot index", MI); + LLT OpTy = MRI->getType(MO->getReg()); + // Don't report a type mismatch if there is no actual mismatch, only a + // type missing, to reduce noise: + if (OpTy.isValid()) { + // Only the first valid type for a type index will be printed: don't + // overwrite it later so it's always clear which type was expected: + if (!Types[TypeIdx].isValid()) + Types[TypeIdx] = OpTy; + else if (Types[TypeIdx] != OpTy) + report("Type mismatch in generic instruction", MO, I, OpTy); } else { - if (!mapped) - report("Missing slot index", MI); + // Generic instructions must have types attached to their operands. + report("Generic instruction is missing a virtual register type", MO, I); } } - if (isPreISelGenericOpcode(MCID.getOpcode())) { - if (isFunctionSelected) - report("Unexpected generic instruction in a Selected function", MI); - - // Check types. - SmallVector<LLT, 4> Types; - for (unsigned I = 0; I < MCID.getNumOperands(); ++I) { - if (!MCID.OpInfo[I].isGenericType()) - continue; - // Generic instructions specify type equality constraints between some of - // their operands. Make sure these are consistent. - size_t TypeIdx = MCID.OpInfo[I].getGenericTypeIndex(); - Types.resize(std::max(TypeIdx + 1, Types.size())); - - const MachineOperand *MO = &MI->getOperand(I); - LLT OpTy = MRI->getType(MO->getReg()); - // Don't report a type mismatch if there is no actual mismatch, only a - // type missing, to reduce noise: - if (OpTy.isValid()) { - // Only the first valid type for a type index will be printed: don't - // overwrite it later so it's always clear which type was expected: - if (!Types[TypeIdx].isValid()) - Types[TypeIdx] = OpTy; - else if (Types[TypeIdx] != OpTy) - report("Type mismatch in generic instruction", MO, I, OpTy); - } else { - // Generic instructions must have types attached to their operands. - report("Generic instruction is missing a virtual register type", MO, I); - } - } - - // Generic opcodes must not have physical register operands. - for (unsigned I = 0; I < MI->getNumOperands(); ++I) { - const MachineOperand *MO = &MI->getOperand(I); - if (MO->isReg() && TargetRegisterInfo::isPhysicalRegister(MO->getReg())) - report("Generic instruction cannot have physical register", MO, I); - } + // Generic opcodes must not have physical register operands. + for (unsigned I = 0; I < MI->getNumOperands(); ++I) { + const MachineOperand *MO = &MI->getOperand(I); + if (MO->isReg() && TargetRegisterInfo::isPhysicalRegister(MO->getReg())) + report("Generic instruction cannot have physical register", MO, I); } + // Avoid out of bounds in checks below. This was already reported earlier. + if (MI->getNumOperands() < MCID.getNumOperands()) + return; + StringRef ErrorInfo; if (!TII->verifyInstruction(*MI, ErrorInfo)) report(ErrorInfo.data(), MI); // Verify properties of various specific instruction types - switch(MI->getOpcode()) { - default: + switch (MI->getOpcode()) { + case TargetOpcode::G_CONSTANT: + case TargetOpcode::G_FCONSTANT: { + if (MI->getNumOperands() < MCID.getNumOperands()) + break; + + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + if (DstTy.isVector()) + report("Instruction cannot use a vector result type", MI); + + if (MI->getOpcode() == TargetOpcode::G_CONSTANT) { + if (!MI->getOperand(1).isCImm()) { + report("G_CONSTANT operand must be cimm", MI); + break; + } + + const ConstantInt *CI = MI->getOperand(1).getCImm(); + if (CI->getBitWidth() != DstTy.getSizeInBits()) + report("inconsistent constant size", MI); + } else { + if (!MI->getOperand(1).isFPImm()) { + report("G_FCONSTANT operand must be fpimm", MI); + break; + } + const ConstantFP *CF = MI->getOperand(1).getFPImm(); + + if (APFloat::getSizeInBits(CF->getValueAPF().getSemantics()) != + DstTy.getSizeInBits()) { + report("inconsistent constant size", MI); + } + } + break; + } case TargetOpcode::G_LOAD: case TargetOpcode::G_STORE: + case TargetOpcode::G_ZEXTLOAD: + case TargetOpcode::G_SEXTLOAD: { + LLT ValTy = MRI->getType(MI->getOperand(0).getReg()); + LLT PtrTy = MRI->getType(MI->getOperand(1).getReg()); + if (!PtrTy.isPointer()) + report("Generic memory instruction must access a pointer", MI); + // Generic loads and stores must have a single MachineMemOperand // describing that access. - if (!MI->hasOneMemOperand()) + if (!MI->hasOneMemOperand()) { report("Generic instruction accessing memory must have one mem operand", MI); + } else { + const MachineMemOperand &MMO = **MI->memoperands_begin(); + if (MI->getOpcode() == TargetOpcode::G_ZEXTLOAD || + MI->getOpcode() == TargetOpcode::G_SEXTLOAD) { + if (MMO.getSizeInBits() >= ValTy.getSizeInBits()) + report("Generic extload must have a narrower memory type", MI); + } else if (MI->getOpcode() == TargetOpcode::G_LOAD) { + if (MMO.getSize() > ValTy.getSizeInBytes()) + report("load memory size cannot exceed result size", MI); + } else if (MI->getOpcode() == TargetOpcode::G_STORE) { + if (ValTy.getSizeInBytes() < MMO.getSize()) + report("store memory size cannot exceed value size", MI); + } + } + break; + } case TargetOpcode::G_PHI: { LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); if (!DstTy.isValid() || @@ -1009,6 +1052,70 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { MI); break; } + case TargetOpcode::G_BITCAST: { + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + LLT SrcTy = MRI->getType(MI->getOperand(1).getReg()); + if (!DstTy.isValid() || !SrcTy.isValid()) + break; + + if (SrcTy.isPointer() != DstTy.isPointer()) + report("bitcast cannot convert between pointers and other types", MI); + + if (SrcTy.getSizeInBits() != DstTy.getSizeInBits()) + report("bitcast sizes must match", MI); + break; + } + case TargetOpcode::G_INTTOPTR: + case TargetOpcode::G_PTRTOINT: + case TargetOpcode::G_ADDRSPACE_CAST: { + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + LLT SrcTy = MRI->getType(MI->getOperand(1).getReg()); + if (!DstTy.isValid() || !SrcTy.isValid()) + break; + + verifyVectorElementMatch(DstTy, SrcTy, MI); + + DstTy = DstTy.getScalarType(); + SrcTy = SrcTy.getScalarType(); + + if (MI->getOpcode() == TargetOpcode::G_INTTOPTR) { + if (!DstTy.isPointer()) + report("inttoptr result type must be a pointer", MI); + if (SrcTy.isPointer()) + report("inttoptr source type must not be a pointer", MI); + } else if (MI->getOpcode() == TargetOpcode::G_PTRTOINT) { + if (!SrcTy.isPointer()) + report("ptrtoint source type must be a pointer", MI); + if (DstTy.isPointer()) + report("ptrtoint result type must not be a pointer", MI); + } else { + assert(MI->getOpcode() == TargetOpcode::G_ADDRSPACE_CAST); + if (!SrcTy.isPointer() || !DstTy.isPointer()) + report("addrspacecast types must be pointers", MI); + else { + if (SrcTy.getAddressSpace() == DstTy.getAddressSpace()) + report("addrspacecast must convert different address spaces", MI); + } + } + + break; + } + case TargetOpcode::G_GEP: { + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + LLT PtrTy = MRI->getType(MI->getOperand(1).getReg()); + LLT OffsetTy = MRI->getType(MI->getOperand(2).getReg()); + if (!DstTy.isValid() || !PtrTy.isValid() || !OffsetTy.isValid()) + break; + + if (!PtrTy.getScalarType().isPointer()) + report("gep first operand must be a pointer", MI); + + if (OffsetTy.getScalarType().isPointer()) + report("gep offset operand must not be a pointer", MI); + + // TODO: Is the offset allowed to be a scalar with a vector? + break; + } case TargetOpcode::G_SEXT: case TargetOpcode::G_ZEXT: case TargetOpcode::G_ANYEXT: @@ -1021,30 +1128,18 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { // instructions aren't guaranteed to have the right number of operands or // types attached to them at this point assert(MCID.getNumOperands() == 2 && "Expected 2 operands G_*{EXT,TRUNC}"); - if (MI->getNumOperands() < MCID.getNumOperands()) - break; LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); LLT SrcTy = MRI->getType(MI->getOperand(1).getReg()); if (!DstTy.isValid() || !SrcTy.isValid()) break; - LLT DstElTy = DstTy.isVector() ? DstTy.getElementType() : DstTy; - LLT SrcElTy = SrcTy.isVector() ? SrcTy.getElementType() : SrcTy; + LLT DstElTy = DstTy.getScalarType(); + LLT SrcElTy = SrcTy.getScalarType(); if (DstElTy.isPointer() || SrcElTy.isPointer()) report("Generic extend/truncate can not operate on pointers", MI); - if (DstTy.isVector() != SrcTy.isVector()) { - report("Generic extend/truncate must be all-vector or all-scalar", MI); - // Generally we try to report as many issues as possible at once, but in - // this case it's not clear what should we be comparing the size of the - // scalar with: the size of the whole vector or its lane. Instead of - // making an arbitrary choice and emitting not so helpful message, let's - // avoid the extra noise and stop here. - break; - } - if (DstTy.isVector() && DstTy.getNumElements() != SrcTy.getNumElements()) - report("Generic vector extend/truncate must preserve number of lanes", - MI); + verifyVectorElementMatch(DstTy, SrcTy, MI); + unsigned DstSize = DstElTy.getSizeInBits(); unsigned SrcSize = SrcElTy.getSizeInBits(); switch (MI->getOpcode()) { @@ -1061,6 +1156,17 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { } break; } + case TargetOpcode::G_SELECT: { + LLT SelTy = MRI->getType(MI->getOperand(0).getReg()); + LLT CondTy = MRI->getType(MI->getOperand(1).getReg()); + if (!SelTy.isValid() || !CondTy.isValid()) + break; + + // Scalar condition select on a vector is valid. + if (CondTy.isVector()) + verifyVectorElementMatch(SelTy, CondTy, MI); + break; + } case TargetOpcode::G_MERGE_VALUES: { // G_MERGE_VALUES should only be used to merge scalars into a larger scalar, // e.g. s2N = MERGE sN, sN @@ -1070,6 +1176,16 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { LLT SrcTy = MRI->getType(MI->getOperand(1).getReg()); if (DstTy.isVector() || SrcTy.isVector()) report("G_MERGE_VALUES cannot operate on vectors", MI); + + const unsigned NumOps = MI->getNumOperands(); + if (DstTy.getSizeInBits() != SrcTy.getSizeInBits() * (NumOps - 1)) + report("G_MERGE_VALUES result size is inconsistent", MI); + + for (unsigned I = 2; I != NumOps; ++I) { + if (MRI->getType(MI->getOperand(I).getReg()) != SrcTy) + report("G_MERGE_VALUES source types do not match", MI); + } + break; } case TargetOpcode::G_UNMERGE_VALUES: { @@ -1092,18 +1208,23 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { // must match the dest vector size. LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); LLT SrcEltTy = MRI->getType(MI->getOperand(1).getReg()); - if (!DstTy.isVector() || SrcEltTy.isVector()) + if (!DstTy.isVector() || SrcEltTy.isVector()) { report("G_BUILD_VECTOR must produce a vector from scalar operands", MI); + break; + } + + if (DstTy.getElementType() != SrcEltTy) + report("G_BUILD_VECTOR result element type must match source type", MI); + + if (DstTy.getNumElements() != MI->getNumOperands() - 1) + report("G_BUILD_VECTOR must have an operand for each elemement", MI); + for (unsigned i = 2; i < MI->getNumOperands(); ++i) { if (MRI->getType(MI->getOperand(1).getReg()) != MRI->getType(MI->getOperand(i).getReg())) report("G_BUILD_VECTOR source operand types are not homogeneous", MI); } - if (DstTy.getSizeInBits() != - SrcEltTy.getSizeInBits() * (MI->getNumOperands() - 1)) - report("G_BUILD_VECTOR src operands total size don't match dest " - "size.", - MI); + break; } case TargetOpcode::G_BUILD_VECTOR_TRUNC: { @@ -1144,6 +1265,176 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { report("G_CONCAT_VECTOR num dest and source elements should match", MI); break; } + case TargetOpcode::G_ICMP: + case TargetOpcode::G_FCMP: { + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + LLT SrcTy = MRI->getType(MI->getOperand(2).getReg()); + + if ((DstTy.isVector() != SrcTy.isVector()) || + (DstTy.isVector() && DstTy.getNumElements() != SrcTy.getNumElements())) + report("Generic vector icmp/fcmp must preserve number of lanes", MI); + + break; + } + case TargetOpcode::G_EXTRACT: { + const MachineOperand &SrcOp = MI->getOperand(1); + if (!SrcOp.isReg()) { + report("extract source must be a register", MI); + break; + } + + const MachineOperand &OffsetOp = MI->getOperand(2); + if (!OffsetOp.isImm()) { + report("extract offset must be a constant", MI); + break; + } + + unsigned DstSize = MRI->getType(MI->getOperand(0).getReg()).getSizeInBits(); + unsigned SrcSize = MRI->getType(SrcOp.getReg()).getSizeInBits(); + if (SrcSize == DstSize) + report("extract source must be larger than result", MI); + + if (DstSize + OffsetOp.getImm() > SrcSize) + report("extract reads past end of register", MI); + break; + } + case TargetOpcode::G_INSERT: { + const MachineOperand &SrcOp = MI->getOperand(2); + if (!SrcOp.isReg()) { + report("insert source must be a register", MI); + break; + } + + const MachineOperand &OffsetOp = MI->getOperand(3); + if (!OffsetOp.isImm()) { + report("insert offset must be a constant", MI); + break; + } + + unsigned DstSize = MRI->getType(MI->getOperand(0).getReg()).getSizeInBits(); + unsigned SrcSize = MRI->getType(SrcOp.getReg()).getSizeInBits(); + + if (DstSize <= SrcSize) + report("inserted size must be smaller than total register", MI); + + if (SrcSize + OffsetOp.getImm() > DstSize) + report("insert writes past end of register", MI); + + break; + } + case TargetOpcode::G_JUMP_TABLE: { + if (!MI->getOperand(1).isJTI()) + report("G_JUMP_TABLE source operand must be a jump table index", MI); + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + if (!DstTy.isPointer()) + report("G_JUMP_TABLE dest operand must have a pointer type", MI); + break; + } + case TargetOpcode::G_BRJT: { + if (!MRI->getType(MI->getOperand(0).getReg()).isPointer()) + report("G_BRJT src operand 0 must be a pointer type", MI); + + if (!MI->getOperand(1).isJTI()) + report("G_BRJT src operand 1 must be a jump table index", MI); + + const auto &IdxOp = MI->getOperand(2); + if (!IdxOp.isReg() || MRI->getType(IdxOp.getReg()).isPointer()) + report("G_BRJT src operand 2 must be a scalar reg type", MI); + break; + } + case TargetOpcode::G_INTRINSIC: + case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: { + // TODO: Should verify number of def and use operands, but the current + // interface requires passing in IR types for mangling. + const MachineOperand &IntrIDOp = MI->getOperand(MI->getNumExplicitDefs()); + if (!IntrIDOp.isIntrinsicID()) { + report("G_INTRINSIC first src operand must be an intrinsic ID", MI); + break; + } + + bool NoSideEffects = MI->getOpcode() == TargetOpcode::G_INTRINSIC; + unsigned IntrID = IntrIDOp.getIntrinsicID(); + if (IntrID != 0 && IntrID < Intrinsic::num_intrinsics) { + AttributeList Attrs + = Intrinsic::getAttributes(MF->getFunction().getContext(), + static_cast<Intrinsic::ID>(IntrID)); + bool DeclHasSideEffects = !Attrs.hasFnAttribute(Attribute::ReadNone); + if (NoSideEffects && DeclHasSideEffects) { + report("G_INTRINSIC used with intrinsic that accesses memory", MI); + break; + } + if (!NoSideEffects && !DeclHasSideEffects) { + report("G_INTRINSIC_W_SIDE_EFFECTS used with readnone intrinsic", MI); + break; + } + } + + break; + } + default: + break; + } +} + +void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { + const MCInstrDesc &MCID = MI->getDesc(); + if (MI->getNumOperands() < MCID.getNumOperands()) { + report("Too few operands", MI); + errs() << MCID.getNumOperands() << " operands expected, but " + << MI->getNumOperands() << " given.\n"; + } + + if (MI->isPHI()) { + if (MF->getProperties().hasProperty( + MachineFunctionProperties::Property::NoPHIs)) + report("Found PHI instruction with NoPHIs property set", MI); + + if (FirstNonPHI) + report("Found PHI instruction after non-PHI", MI); + } else if (FirstNonPHI == nullptr) + FirstNonPHI = MI; + + // Check the tied operands. + if (MI->isInlineAsm()) + verifyInlineAsm(MI); + + // Check the MachineMemOperands for basic consistency. + for (MachineInstr::mmo_iterator I = MI->memoperands_begin(), + E = MI->memoperands_end(); + I != E; ++I) { + if ((*I)->isLoad() && !MI->mayLoad()) + report("Missing mayLoad flag", MI); + if ((*I)->isStore() && !MI->mayStore()) + report("Missing mayStore flag", MI); + } + + // Debug values must not have a slot index. + // Other instructions must have one, unless they are inside a bundle. + if (LiveInts) { + bool mapped = !LiveInts->isNotInMIMap(*MI); + if (MI->isDebugInstr()) { + if (mapped) + report("Debug instruction has a slot index", MI); + } else if (MI->isInsideBundle()) { + if (mapped) + report("Instruction inside bundle has a slot index", MI); + } else { + if (!mapped) + report("Missing slot index", MI); + } + } + + if (isPreISelGenericOpcode(MCID.getOpcode())) { + verifyPreISelGenericInstruction(MI); + return; + } + + StringRef ErrorInfo; + if (!TII->verifyInstruction(*MI, ErrorInfo)) + report(ErrorInfo.data(), MI); + + // Verify properties of various specific instruction types + switch (MI->getOpcode()) { case TargetOpcode::COPY: { if (foundErrors) break; @@ -1193,7 +1484,8 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { VerifyStackMapConstant(VarStart + StatepointOpers::NumDeoptOperandsOffset); // TODO: verify we have properly encoded deopt arguments - }; + break; + } } void @@ -1356,7 +1648,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { return; } if (SubIdx) { - report("Generic virtual register does not subregister index", MO, + report("Generic virtual register does not allow subregister index", MO, MONum); return; } @@ -1911,6 +2203,10 @@ void MachineVerifier::visitMachineFunctionAfter() { verifyLiveVariables(); if (LiveInts) verifyLiveIntervals(); + + for (auto CSInfo : MF->getCallSitesInfo()) + if (!CSInfo.first->isCall()) + report("Call site info referencing instruction that is not call", MF); } void MachineVerifier::verifyLiveVariables() { diff --git a/lib/CodeGen/MacroFusion.cpp b/lib/CodeGen/MacroFusion.cpp index 82b6d642c73b..2db1e86905a4 100644 --- a/lib/CodeGen/MacroFusion.cpp +++ b/lib/CodeGen/MacroFusion.cpp @@ -1,9 +1,8 @@ //===- MacroFusion.cpp - Macro Fusion -------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -37,7 +36,7 @@ static bool isHazard(const SDep &Dep) { return Dep.getKind() == SDep::Anti || Dep.getKind() == SDep::Output; } -static bool fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU, +static bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU, SUnit &SecondSU) { // Check that neither instr is already paired with another along the edge // between them. @@ -49,7 +48,7 @@ static bool fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU, if (SI.isCluster()) return false; // Though the reachability checks above could be made more generic, - // perhaps as part of ScheduleDAGMI::addEdge(), since such edges are valid, + // perhaps as part of ScheduleDAGInstrs::addEdge(), since such edges are valid, // the extra computation cost makes it less interesting in general cases. // Create a single weak edge between the adjacent instrs. The only effect is @@ -118,7 +117,7 @@ namespace { class MacroFusion : public ScheduleDAGMutation { ShouldSchedulePredTy shouldScheduleAdjacent; bool FuseBlock; - bool scheduleAdjacentImpl(ScheduleDAGMI &DAG, SUnit &AnchorSU); + bool scheduleAdjacentImpl(ScheduleDAGInstrs &DAG, SUnit &AnchorSU); public: MacroFusion(ShouldSchedulePredTy shouldScheduleAdjacent, bool FuseBlock) @@ -129,9 +128,7 @@ public: } // end anonymous namespace -void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) { - ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); - +void MacroFusion::apply(ScheduleDAGInstrs *DAG) { if (FuseBlock) // For each of the SUnits in the scheduling block, try to fuse the instr in // it with one in its predecessors. @@ -145,7 +142,7 @@ void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) { /// Implement the fusion of instr pairs in the scheduling DAG, /// anchored at the instr in AnchorSU.. -bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGMI &DAG, SUnit &AnchorSU) { +bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGInstrs &DAG, SUnit &AnchorSU) { const MachineInstr &AnchorMI = *AnchorSU.getInstr(); const TargetInstrInfo &TII = *DAG.TII; const TargetSubtargetInfo &ST = DAG.MF.getSubtarget(); diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp index 770f6c5b0403..c70b62252139 100644 --- a/lib/CodeGen/OptimizePHIs.cpp +++ b/lib/CodeGen/OptimizePHIs.cpp @@ -1,9 +1,8 @@ //===- OptimizePHIs.cpp - Optimize machine instruction PHIs ---------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -182,11 +181,12 @@ bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) { if (!MRI->constrainRegClass(SingleValReg, MRI->getRegClass(OldReg))) continue; - // for the case SingleValReg taken from copy instr - MRI->clearKillFlags(SingleValReg); - MRI->replaceRegWith(OldReg, SingleValReg); MI->eraseFromParent(); + + // The kill flags on OldReg and SingleValReg may no longer be correct. + MRI->clearKillFlags(SingleValReg); + ++NumPHICycles; Changed = true; continue; diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index b9801c6fd97b..948a5835438c 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -1,9 +1,8 @@ //===- PhiElimination.cpp - Eliminate PHI nodes by inserting copies -------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/PHIEliminationUtils.cpp b/lib/CodeGen/PHIEliminationUtils.cpp index 4e67ff2e5088..3a2cdaf3bd3c 100644 --- a/lib/CodeGen/PHIEliminationUtils.cpp +++ b/lib/CodeGen/PHIEliminationUtils.cpp @@ -1,9 +1,8 @@ //===-- PHIEliminationUtils.cpp - Helper functions for PHI elimination ----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/PHIEliminationUtils.h b/lib/CodeGen/PHIEliminationUtils.h index b997d7ac5f4f..0ff3a41f47d3 100644 --- a/lib/CodeGen/PHIEliminationUtils.h +++ b/lib/CodeGen/PHIEliminationUtils.h @@ -1,9 +1,8 @@ //=- PHIEliminationUtils.h - Helper functions for PHI elimination -*- C++ -*-=// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/ParallelCG.cpp b/lib/CodeGen/ParallelCG.cpp index bc3f2a6e9b5a..e4c73658cb4f 100644 --- a/lib/CodeGen/ParallelCG.cpp +++ b/lib/CodeGen/ParallelCG.cpp @@ -1,9 +1,8 @@ //===-- ParallelCG.cpp ----------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/PatchableFunction.cpp b/lib/CodeGen/PatchableFunction.cpp index afb4b0a7e174..a3fa1b0ad8ed 100644 --- a/lib/CodeGen/PatchableFunction.cpp +++ b/lib/CodeGen/PatchableFunction.cpp @@ -1,9 +1,8 @@ //===-- PatchableFunction.cpp - Patchable prologues for LLVM -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index 1d058ccfb633..b918396aa8c5 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -1,9 +1,8 @@ //===- PeepholeOptimizer.cpp - Peephole Optimizations ---------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -1307,7 +1306,7 @@ bool PeepholeOptimizer::optimizeUncoalescableCopy( /// Check whether MI is a candidate for folding into a later instruction. /// We only fold loads to virtual registers and the virtual register defined -/// has a single use. +/// has a single user. bool PeepholeOptimizer::isLoadFoldable( MachineInstr &MI, SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) { if (!MI.canFoldAsLoad() || !MI.mayLoad()) @@ -1317,12 +1316,12 @@ bool PeepholeOptimizer::isLoadFoldable( return false; unsigned Reg = MI.getOperand(0).getReg(); - // To reduce compilation time, we check MRI->hasOneNonDBGUse when inserting + // To reduce compilation time, we check MRI->hasOneNonDBGUser when inserting // loads. It should be checked when processing uses of the load, since // uses can be removed during peephole. if (!MI.getOperand(0).getSubReg() && TargetRegisterInfo::isVirtualRegister(Reg) && - MRI->hasOneNonDBGUse(Reg)) { + MRI->hasOneNonDBGUser(Reg)) { FoldAsLoadDefCandidates.insert(Reg); return true; } @@ -1778,6 +1777,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { LocalMIs.erase(MI); LocalMIs.erase(DefMI); LocalMIs.insert(FoldMI); + if (MI->isCall()) + MI->getMF()->updateCallSiteInfo(MI, FoldMI); MI->eraseFromParent(); DefMI->eraseFromParent(); MRI->markUsesInDebugValueAsUndef(FoldedReg); @@ -1826,7 +1827,7 @@ ValueTrackerResult ValueTracker::getNextSourceFromBitcast() { assert(Def->isBitcast() && "Invalid definition"); // Bail if there are effects that a plain copy will not expose. - if (Def->hasUnmodeledSideEffects()) + if (Def->mayRaiseFPException() || Def->hasUnmodeledSideEffects()) return ValueTrackerResult(); // Bitcasts with more than one def are not supported. @@ -1901,13 +1902,8 @@ ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() { // Def = REG_SEQUENCE v0, sub0, v1, sub1, ... // Check if one of the operand defines the subreg we are interested in. for (const RegSubRegPairAndIdx &RegSeqInput : RegSeqInputRegs) { - if (RegSeqInput.SubIdx == DefSubReg) { - if (RegSeqInput.SubReg) - // Bail if we have to compose sub registers. - return ValueTrackerResult(); - + if (RegSeqInput.SubIdx == DefSubReg) return ValueTrackerResult(RegSeqInput.Reg, RegSeqInput.SubReg); - } } // If the subreg we are tracking is super-defined by another subreg, diff --git a/lib/CodeGen/PostRAHazardRecognizer.cpp b/lib/CodeGen/PostRAHazardRecognizer.cpp index f9d4a9746e41..0a3838617bc5 100644 --- a/lib/CodeGen/PostRAHazardRecognizer.cpp +++ b/lib/CodeGen/PostRAHazardRecognizer.cpp @@ -1,9 +1,8 @@ //===----- PostRAHazardRecognizer.cpp - hazard recognizer -----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index dd0a5fe1b39d..5bea9f2893c9 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -1,9 +1,8 @@ //===----- SchedulePostRAList.cpp - list scheduler ------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/PreISelIntrinsicLowering.cpp b/lib/CodeGen/PreISelIntrinsicLowering.cpp index b0e9ac03612d..2752e186875c 100644 --- a/lib/CodeGen/PreISelIntrinsicLowering.cpp +++ b/lib/CodeGen/PreISelIntrinsicLowering.cpp @@ -1,9 +1,8 @@ //===- PreISelIntrinsicLowering.cpp - Pre-ISel intrinsic lowering pass ----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -45,7 +44,7 @@ static bool lowerLoadRelative(Function &F) { Value *OffsetPtr = B.CreateGEP(Int8Ty, CI->getArgOperand(0), CI->getArgOperand(1)); Value *OffsetPtrI32 = B.CreateBitCast(OffsetPtr, Int32PtrTy); - Value *OffsetI32 = B.CreateAlignedLoad(OffsetPtrI32, 4); + Value *OffsetI32 = B.CreateAlignedLoad(Int32Ty, OffsetPtrI32, 4); Value *ResultPtr = B.CreateGEP(Int8Ty, CI->getArgOperand(0), OffsetI32); @@ -65,9 +64,9 @@ static bool lowerObjCCall(Function &F, const char *NewFn, // If we haven't already looked up this function, check to see if the // program already contains a function with this name. Module *M = F.getParent(); - Constant* FCache = M->getOrInsertFunction(NewFn, F.getFunctionType()); + FunctionCallee FCache = M->getOrInsertFunction(NewFn, F.getFunctionType()); - if (Function* Fn = dyn_cast<Function>(FCache)) { + if (Function *Fn = dyn_cast<Function>(FCache.getCallee())) { Fn->setLinkage(F.getLinkage()); if (setNonLazyBind && !Fn->isWeakForLinker()) { // If we have Native ARC, set nonlazybind attribute for these APIs for diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index 7e9b4af12ee9..b38987ad1c90 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -1,9 +1,8 @@ //===---------------------- ProcessImplicitDefs.cpp -----------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 23754e487a18..d463bee67595 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -1,9 +1,8 @@ //===- PrologEpilogInserter.cpp - Insert Prolog/Epilog code in function ---===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -32,6 +31,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" @@ -169,6 +169,46 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const { /// StackObjSet - A set of stack object indexes using StackObjSet = SmallSetVector<int, 8>; +using SavedDbgValuesMap = + SmallDenseMap<MachineBasicBlock *, SmallVector<MachineInstr *, 4>, 4>; + +/// Stash DBG_VALUEs that describe parameters and which are placed at the start +/// of the block. Later on, after the prologue code has been emitted, the +/// stashed DBG_VALUEs will be reinserted at the start of the block. +static void stashEntryDbgValues(MachineBasicBlock &MBB, + SavedDbgValuesMap &EntryDbgValues) { + SmallVector<const MachineInstr *, 4> FrameIndexValues; + + for (auto &MI : MBB) { + if (!MI.isDebugInstr()) + break; + if (!MI.isDebugValue() || !MI.getDebugVariable()->isParameter()) + continue; + if (MI.getOperand(0).isFI()) { + // We can only emit valid locations for frame indices after the frame + // setup, so do not stash away them. + FrameIndexValues.push_back(&MI); + continue; + } + const DILocalVariable *Var = MI.getDebugVariable(); + const DIExpression *Expr = MI.getDebugExpression(); + auto Overlaps = [Var, Expr](const MachineInstr *DV) { + return Var == DV->getDebugVariable() && + Expr->fragmentsOverlap(DV->getDebugExpression()); + }; + // See if the debug value overlaps with any preceding debug value that will + // not be stashed. If that is the case, then we can't stash this value, as + // we would then reorder the values at reinsertion. + if (llvm::none_of(FrameIndexValues, Overlaps)) + EntryDbgValues[&MBB].push_back(&MI); + } + + // Remove stashed debug values from the block. + if (EntryDbgValues.count(&MBB)) + for (auto *MI : EntryDbgValues[&MBB]) + MI->removeFromParent(); +} + /// runOnMachineFunction - Insert prolog/epilog code and replace abstract /// frame indexes with appropriate references. bool PEI::runOnMachineFunction(MachineFunction &MF) { @@ -179,8 +219,6 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) { RS = TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr; FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(MF); - FrameIndexEliminationScavenging = (RS && !FrameIndexVirtualScavenging) || - TRI->requiresFrameIndexReplacementScavenging(MF); ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE(); // Calculate the MaxCallFrameSize and AdjustsStack variables for the @@ -192,6 +230,11 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) { // place all spills in the entry block, all restores in return blocks. calculateSaveRestoreBlocks(MF); + // Stash away DBG_VALUEs that should not be moved by insertion of prolog code. + SavedDbgValuesMap EntryDbgValues; + for (MachineBasicBlock *SaveBlock : SaveBlocks) + stashEntryDbgValues(*SaveBlock, EntryDbgValues); + // Handle CSR spilling and restoring, for targets that need it. if (MF.getTarget().usesPhysRegsForPEI()) spillCalleeSavedRegs(MF); @@ -211,6 +254,10 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) { if (!F.hasFnAttribute(Attribute::Naked)) insertPrologEpilogCode(MF); + // Reinsert stashed debug values at the start of the entry blocks. + for (auto &I : EntryDbgValues) + I.first->insert(I.first->begin(), I.second.begin(), I.second.end()); + // Replace all MO_FrameIndex operands with physical register references // and actual offsets. // @@ -495,9 +542,16 @@ static void insertCSRSaves(MachineBasicBlock &SaveBlock, for (const CalleeSavedInfo &CS : CSI) { // Insert the spill to the stack frame. unsigned Reg = CS.getReg(); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC, - TRI); + + if (CS.isSpilledToReg()) { + BuildMI(SaveBlock, I, DebugLoc(), + TII.get(TargetOpcode::COPY), CS.getDstReg()) + .addReg(Reg, getKillRegState(true)); + } else { + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC, + TRI); + } } } } @@ -517,12 +571,17 @@ static void insertCSRRestores(MachineBasicBlock &RestoreBlock, if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) { for (const CalleeSavedInfo &CI : reverse(CSI)) { unsigned Reg = CI.getReg(); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI); - assert(I != RestoreBlock.begin() && - "loadRegFromStackSlot didn't insert any code!"); - // Insert in reverse order. loadRegFromStackSlot can insert - // multiple instructions. + if (CI.isSpilledToReg()) { + BuildMI(RestoreBlock, I, DebugLoc(), TII.get(TargetOpcode::COPY), Reg) + .addReg(CI.getDstReg(), getKillRegState(true)); + } else { + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI); + assert(I != RestoreBlock.begin() && + "loadRegFromStackSlot didn't insert any code!"); + // Insert in reverse order. loadRegFromStackSlot can insert + // multiple instructions. + } } } } @@ -615,10 +674,13 @@ computeFreeStackSlots(MachineFrameInfo &MFI, bool StackGrowsDown, SmallVector<int, 16> AllocatedFrameSlots; // Add fixed objects. for (int i = MFI.getObjectIndexBegin(); i != 0; ++i) - AllocatedFrameSlots.push_back(i); + // StackSlot scavenging is only implemented for the default stack. + if (MFI.getStackID(i) == TargetStackID::Default) + AllocatedFrameSlots.push_back(i); // Add callee-save objects. for (int i = MinCSFrameIndex; i <= (int)MaxCSFrameIndex; ++i) - AllocatedFrameSlots.push_back(i); + if (MFI.getStackID(i) == TargetStackID::Default) + AllocatedFrameSlots.push_back(i); for (int i : AllocatedFrameSlots) { // These are converted from int64_t, but they should always fit in int @@ -740,11 +802,23 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { // Skew to be applied to alignment. unsigned Skew = TFI.getStackAlignmentSkew(MF); +#ifdef EXPENSIVE_CHECKS + for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) + if (!MFI.isDeadObjectIndex(i) && + MFI.getStackID(i) == TargetStackID::Default) + assert(MFI.getObjectAlignment(i) <= MFI.getMaxAlignment() && + "MaxAlignment is invalid"); +#endif + // If there are fixed sized objects that are preallocated in the local area, // non-fixed objects can't be allocated right at the start of local area. // Adjust 'Offset' to point to the end of last fixed sized preallocated // object. for (int i = MFI.getObjectIndexBegin(); i != 0; ++i) { + if (MFI.getStackID(i) != + TargetStackID::Default) // Only allocate objects on the default stack. + continue; + int64_t FixedOff; if (StackGrowsDown) { // The maximum distance from the stack pointer is at lower address of @@ -763,6 +837,10 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { // callee saved registers. if (StackGrowsDown) { for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) { + if (MFI.getStackID(i) != + TargetStackID::Default) // Only allocate objects on the default stack. + continue; + // If the stack grows down, we need to add the size to find the lowest // address of the object. Offset += MFI.getObjectSize(i); @@ -777,6 +855,10 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { } else if (MaxCSFrameIndex >= MinCSFrameIndex) { // Be careful about underflow in comparisons agains MinCSFrameIndex. for (unsigned i = MaxCSFrameIndex; i != MinCSFrameIndex - 1; --i) { + if (MFI.getStackID(i) != + TargetStackID::Default) // Only allocate objects on the default stack. + continue; + if (MFI.isDeadObjectIndex(i)) continue; @@ -845,18 +927,26 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { // Make sure that the stack protector comes before the local variables on the // stack. SmallSet<int, 16> ProtectedObjs; - if (MFI.getStackProtectorIndex() >= 0) { + if (MFI.hasStackProtectorIndex()) { + int StackProtectorFI = MFI.getStackProtectorIndex(); StackObjSet LargeArrayObjs; StackObjSet SmallArrayObjs; StackObjSet AddrOfObjs; - AdjustStackOffset(MFI, MFI.getStackProtectorIndex(), StackGrowsDown, - Offset, MaxAlign, Skew); + // If we need a stack protector, we need to make sure that + // LocalStackSlotPass didn't already allocate a slot for it. + // If we are told to use the LocalStackAllocationBlock, the stack protector + // is expected to be already pre-allocated. + if (!MFI.getUseLocalStackAllocationBlock()) + AdjustStackOffset(MFI, StackProtectorFI, StackGrowsDown, Offset, MaxAlign, + Skew); + else if (!MFI.isObjectPreAllocated(MFI.getStackProtectorIndex())) + llvm_unreachable( + "Stack protector not pre-allocated by LocalStackSlotPass."); // Assign large stack objects first. for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) { - if (MFI.isObjectPreAllocated(i) && - MFI.getUseLocalStackAllocationBlock()) + if (MFI.isObjectPreAllocated(i) && MFI.getUseLocalStackAllocationBlock()) continue; if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) continue; @@ -864,8 +954,10 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { continue; if (MFI.isDeadObjectIndex(i)) continue; - if (MFI.getStackProtectorIndex() == (int)i || - EHRegNodeFrameIndex == (int)i) + if (StackProtectorFI == (int)i || EHRegNodeFrameIndex == (int)i) + continue; + if (MFI.getStackID(i) != + TargetStackID::Default) // Only allocate objects on the default stack. continue; switch (MFI.getObjectSSPLayout(i)) { @@ -884,6 +976,15 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { llvm_unreachable("Unexpected SSPLayoutKind."); } + // We expect **all** the protected stack objects to be pre-allocated by + // LocalStackSlotPass. If it turns out that PEI still has to allocate some + // of them, we may end up messing up the expected order of the objects. + if (MFI.getUseLocalStackAllocationBlock() && + !(LargeArrayObjs.empty() && SmallArrayObjs.empty() && + AddrOfObjs.empty())) + llvm_unreachable("Found protected stack objects not pre-allocated by " + "LocalStackSlotPass."); + AssignProtectedObjSet(LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown, Offset, MaxAlign, Skew); AssignProtectedObjSet(SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown, @@ -905,11 +1006,13 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { continue; if (MFI.isDeadObjectIndex(i)) continue; - if (MFI.getStackProtectorIndex() == (int)i || - EHRegNodeFrameIndex == (int)i) + if (MFI.getStackProtectorIndex() == (int)i || EHRegNodeFrameIndex == (int)i) continue; if (ProtectedObjs.count(i)) continue; + if (MFI.getStackID(i) != + TargetStackID::Default) // Only allocate objects on the default stack. + continue; // Add the objects that we need to allocate to our working set. ObjectsToAllocate.push_back(i); @@ -1026,8 +1129,16 @@ void PEI::insertPrologEpilogCode(MachineFunction &MF) { /// replaceFrameIndices - Replace all MO_FrameIndex operands with physical /// register references and actual offsets. void PEI::replaceFrameIndices(MachineFunction &MF) { - const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); - if (!TFI.needsFrameIndexResolution(MF)) return; + const auto &ST = MF.getSubtarget(); + const TargetFrameLowering &TFI = *ST.getFrameLowering(); + if (!TFI.needsFrameIndexResolution(MF)) + return; + + const TargetRegisterInfo *TRI = ST.getRegisterInfo(); + + // Allow the target to determine this after knowing the frame size. + FrameIndexEliminationScavenging = (RS && !FrameIndexVirtualScavenging) || + TRI->requiresFrameIndexReplacementScavenging(MF); // Store SPAdj at exit of a basic block. SmallVector<int, 8> SPState; @@ -1095,12 +1206,37 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF, assert(i == 0 && "Frame indices can only appear as the first " "operand of a DBG_VALUE machine instruction"); unsigned Reg; + unsigned FrameIdx = MI.getOperand(0).getIndex(); + unsigned Size = MF.getFrameInfo().getObjectSize(FrameIdx); + int64_t Offset = - TFI->getFrameIndexReference(MF, MI.getOperand(0).getIndex(), Reg); + TFI->getFrameIndexReference(MF, FrameIdx, Reg); MI.getOperand(0).ChangeToRegister(Reg, false /*isDef*/); MI.getOperand(0).setIsDebug(); - auto *DIExpr = DIExpression::prepend(MI.getDebugExpression(), - DIExpression::NoDeref, Offset); + + const DIExpression *DIExpr = MI.getDebugExpression(); + + // If we have a direct DBG_VALUE, and its location expression isn't + // currently complex, then adding an offset will morph it into a + // complex location that is interpreted as being a memory address. + // This changes a pointer-valued variable to dereference that pointer, + // which is incorrect. Fix by adding DW_OP_stack_value. + unsigned PrependFlags = DIExpression::ApplyOffset; + if (!MI.isIndirectDebugValue() && !DIExpr->isComplex()) + PrependFlags |= DIExpression::StackValue; + + // If we have DBG_VALUE that is indirect and has a Implicit location + // expression need to insert a deref before prepending a Memory + // location expression. Also after doing this we change the DBG_VALUE + // to be direct. + if (MI.isIndirectDebugValue() && DIExpr->isImplicit()) { + SmallVector<uint64_t, 2> Ops = {dwarf::DW_OP_deref_size, Size}; + bool WithStackValue = true; + DIExpr = DIExpression::prependOpcodes(DIExpr, Ops, WithStackValue); + // Make the DBG_VALUE direct. + MI.getOperand(1).ChangeToRegister(0, false); + } + DIExpr = DIExpression::prepend(DIExpr, PrependFlags, Offset); MI.getOperand(3).setMetadata(DIExpr); continue; } diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp index 6ca8d86e3f8e..da3ef4b771f3 100644 --- a/lib/CodeGen/PseudoSourceValue.cpp +++ b/lib/CodeGen/PseudoSourceValue.cpp @@ -1,9 +1,8 @@ //===-- llvm/CodeGen/PseudoSourceValue.cpp ----------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/ReachingDefAnalysis.cpp b/lib/CodeGen/ReachingDefAnalysis.cpp index a9f0a9387297..f05c97ad621e 100644 --- a/lib/CodeGen/ReachingDefAnalysis.cpp +++ b/lib/CodeGen/ReachingDefAnalysis.cpp @@ -1,9 +1,8 @@ //===---- ReachingDefAnalysis.cpp - Reaching Def Analysis ---*- C++ -*-----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp index bc28a054c680..1cbe75c27d13 100644 --- a/lib/CodeGen/RegAllocBase.cpp +++ b/lib/CodeGen/RegAllocBase.cpp @@ -1,9 +1,8 @@ //===- RegAllocBase.cpp - Register Allocator Base Class -------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -20,6 +19,7 @@ #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveRegMatrix.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/VirtRegMap.h" @@ -119,16 +119,19 @@ void RegAllocBase::allocatePhysRegs() { for (MachineRegisterInfo::reg_instr_iterator I = MRI->reg_instr_begin(VirtReg->reg), E = MRI->reg_instr_end(); I != E; ) { - MachineInstr *TmpMI = &*(I++); - if (TmpMI->isInlineAsm()) { - MI = TmpMI; + MI = &*(I++); + if (MI->isInlineAsm()) break; - } } - if (MI) + if (MI && MI->isInlineAsm()) { MI->emitError("inline assembly requires more registers than available"); - else + } else if (MI) { + LLVMContext &Context = + MI->getParent()->getParent()->getMMI().getModule()->getContext(); + Context.emitError("ran out of registers during register allocation"); + } else { report_fatal_error("ran out of registers during register allocation"); + } // Keep going after reporting the error. VRM->assignVirt2Phys(VirtReg->reg, RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg)).front()); diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h index 686ffc36e049..6a7cc5ba4308 100644 --- a/lib/CodeGen/RegAllocBase.h +++ b/lib/CodeGen/RegAllocBase.h @@ -1,9 +1,8 @@ //===- RegAllocBase.h - basic regalloc interface and driver -----*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp index daeff3fc3963..46f6946f7003 100644 --- a/lib/CodeGen/RegAllocBasic.cpp +++ b/lib/CodeGen/RegAllocBasic.cpp @@ -1,9 +1,8 @@ //===-- RegAllocBasic.cpp - Basic Register Allocator ----------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index eb3a4e481f5d..2ffa5e389f89 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -1,9 +1,8 @@ //===- RegAllocFast.cpp - A fast register allocator for debug code --------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -102,6 +101,10 @@ namespace { DenseMap<unsigned, SmallVector<MachineInstr *, 2>> LiveDbgValueMap; + /// Has a bit set for every virtual register for which it was determined + /// that it is alive across blocks. + BitVector MayLiveAcrossBlocks; + /// State of a physical register. enum RegState { /// A disabled register is not available for allocation, but an alias may @@ -152,6 +155,7 @@ namespace { enum : unsigned { spillClean = 50, spillDirty = 100, + spillPrefBonus = 20, spillImpossible = ~0u }; @@ -204,19 +208,26 @@ namespace { } void allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint); + void allocVirtRegUndef(MachineOperand &MO); MCPhysReg defineVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg, unsigned Hint); LiveReg &reloadVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg, unsigned Hint); - void spillAll(MachineBasicBlock::iterator MI); + void spillAll(MachineBasicBlock::iterator MI, bool OnlyLiveOut); bool setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg); + unsigned traceCopies(unsigned VirtReg) const; + unsigned traceCopyChain(unsigned Reg) const; + int getStackSpaceFor(unsigned VirtReg); void spill(MachineBasicBlock::iterator Before, unsigned VirtReg, MCPhysReg AssignedReg, bool Kill); void reload(MachineBasicBlock::iterator Before, unsigned VirtReg, MCPhysReg PhysReg); + bool mayLiveOut(unsigned VirtReg); + bool mayLiveIn(unsigned VirtReg); + void dumpState(); }; @@ -251,6 +262,53 @@ int RegAllocFast::getStackSpaceFor(unsigned VirtReg) { return FrameIdx; } +/// Returns false if \p VirtReg is known to not live out of the current block. +bool RegAllocFast::mayLiveOut(unsigned VirtReg) { + if (MayLiveAcrossBlocks.test(TargetRegisterInfo::virtReg2Index(VirtReg))) { + // Cannot be live-out if there are no successors. + return !MBB->succ_empty(); + } + + // If this block loops back to itself, it would be necessary to check whether + // the use comes after the def. + if (MBB->isSuccessor(MBB)) { + MayLiveAcrossBlocks.set(TargetRegisterInfo::virtReg2Index(VirtReg)); + return true; + } + + // See if the first \p Limit uses of the register are all in the current + // block. + static const unsigned Limit = 8; + unsigned C = 0; + for (const MachineInstr &UseInst : MRI->reg_nodbg_instructions(VirtReg)) { + if (UseInst.getParent() != MBB || ++C >= Limit) { + MayLiveAcrossBlocks.set(TargetRegisterInfo::virtReg2Index(VirtReg)); + // Cannot be live-out if there are no successors. + return !MBB->succ_empty(); + } + } + + return false; +} + +/// Returns false if \p VirtReg is known to not be live into the current block. +bool RegAllocFast::mayLiveIn(unsigned VirtReg) { + if (MayLiveAcrossBlocks.test(TargetRegisterInfo::virtReg2Index(VirtReg))) + return !MBB->pred_empty(); + + // See if the first \p Limit def of the register are all in the current block. + static const unsigned Limit = 8; + unsigned C = 0; + for (const MachineInstr &DefInst : MRI->def_instructions(VirtReg)) { + if (DefInst.getParent() != MBB || ++C >= Limit) { + MayLiveAcrossBlocks.set(TargetRegisterInfo::virtReg2Index(VirtReg)); + return !MBB->pred_empty(); + } + } + + return false; +} + /// Insert spill instruction for \p AssignedReg before \p Before. Update /// DBG_VALUEs with \p VirtReg operands with the stack slot. void RegAllocFast::spill(MachineBasicBlock::iterator Before, unsigned VirtReg, @@ -374,7 +432,7 @@ void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR) { } /// Spill all dirty virtregs without killing them. -void RegAllocFast::spillAll(MachineBasicBlock::iterator MI) { +void RegAllocFast::spillAll(MachineBasicBlock::iterator MI, bool OnlyLiveOut) { if (LiveVirtRegs.empty()) return; // The LiveRegMap is keyed by an unsigned (the virtreg number), so the order @@ -382,6 +440,8 @@ void RegAllocFast::spillAll(MachineBasicBlock::iterator MI) { for (LiveReg &LR : LiveVirtRegs) { if (!LR.PhysReg) continue; + if (OnlyLiveOut && !mayLiveOut(LR.VirtReg)) + continue; spillVirtReg(MI, LR); } LiveVirtRegs.clear(); @@ -558,8 +618,48 @@ void RegAllocFast::assignVirtToPhysReg(LiveReg &LR, MCPhysReg PhysReg) { setPhysRegState(PhysReg, VirtReg); } +static bool isCoalescable(const MachineInstr &MI) { + return MI.isFullCopy(); +} + +unsigned RegAllocFast::traceCopyChain(unsigned Reg) const { + static const unsigned ChainLengthLimit = 3; + unsigned C = 0; + do { + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return Reg; + assert(TargetRegisterInfo::isVirtualRegister(Reg)); + + MachineInstr *VRegDef = MRI->getUniqueVRegDef(Reg); + if (!VRegDef || !isCoalescable(*VRegDef)) + return 0; + Reg = VRegDef->getOperand(1).getReg(); + } while (++C <= ChainLengthLimit); + return 0; +} + +/// Check if any of \p VirtReg's definitions is a copy. If it is follow the +/// chain of copies to check whether we reach a physical register we can +/// coalesce with. +unsigned RegAllocFast::traceCopies(unsigned VirtReg) const { + static const unsigned DefLimit = 3; + unsigned C = 0; + for (const MachineInstr &MI : MRI->def_instructions(VirtReg)) { + if (isCoalescable(MI)) { + unsigned Reg = MI.getOperand(1).getReg(); + Reg = traceCopyChain(Reg); + if (Reg != 0) + return Reg; + } + + if (++C >= DefLimit) + break; + } + return 0; +} + /// Allocates a physical register for VirtReg. -void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint) { +void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint0) { const unsigned VirtReg = LR.VirtReg; assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && @@ -567,32 +667,54 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint) { const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); LLVM_DEBUG(dbgs() << "Search register for " << printReg(VirtReg) - << " in class " << TRI->getRegClassName(&RC) << '\n'); + << " in class " << TRI->getRegClassName(&RC) + << " with hint " << printReg(Hint0, TRI) << '\n'); // Take hint when possible. - if (TargetRegisterInfo::isPhysicalRegister(Hint) && - MRI->isAllocatable(Hint) && RC.contains(Hint)) { + if (TargetRegisterInfo::isPhysicalRegister(Hint0) && + MRI->isAllocatable(Hint0) && RC.contains(Hint0)) { // Ignore the hint if we would have to spill a dirty register. - unsigned Cost = calcSpillCost(Hint); + unsigned Cost = calcSpillCost(Hint0); if (Cost < spillDirty) { + LLVM_DEBUG(dbgs() << "\tPreferred Register 1: " << printReg(Hint0, TRI) + << '\n'); if (Cost) - definePhysReg(MI, Hint, regFree); - assignVirtToPhysReg(LR, Hint); + definePhysReg(MI, Hint0, regFree); + assignVirtToPhysReg(LR, Hint0); return; + } else { + LLVM_DEBUG(dbgs() << "\tPreferred Register 1: " << printReg(Hint0, TRI) + << "occupied\n"); } + } else { + Hint0 = 0; } - // First try to find a completely free register. - ArrayRef<MCPhysReg> AllocationOrder = RegClassInfo.getOrder(&RC); - for (MCPhysReg PhysReg : AllocationOrder) { - if (PhysRegState[PhysReg] == regFree && !isRegUsedInInstr(PhysReg)) { - assignVirtToPhysReg(LR, PhysReg); + // Try other hint. + unsigned Hint1 = traceCopies(VirtReg); + if (TargetRegisterInfo::isPhysicalRegister(Hint1) && + MRI->isAllocatable(Hint1) && RC.contains(Hint1) && + !isRegUsedInInstr(Hint1)) { + // Ignore the hint if we would have to spill a dirty register. + unsigned Cost = calcSpillCost(Hint1); + if (Cost < spillDirty) { + LLVM_DEBUG(dbgs() << "\tPreferred Register 0: " << printReg(Hint1, TRI) + << '\n'); + if (Cost) + definePhysReg(MI, Hint1, regFree); + assignVirtToPhysReg(LR, Hint1); return; + } else { + LLVM_DEBUG(dbgs() << "\tPreferred Register 0: " << printReg(Hint1, TRI) + << "occupied\n"); } + } else { + Hint1 = 0; } MCPhysReg BestReg = 0; unsigned BestCost = spillImpossible; + ArrayRef<MCPhysReg> AllocationOrder = RegClassInfo.getOrder(&RC); for (MCPhysReg PhysReg : AllocationOrder) { LLVM_DEBUG(dbgs() << "\tRegister: " << printReg(PhysReg, TRI) << ' '); unsigned Cost = calcSpillCost(PhysReg); @@ -602,6 +724,10 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint) { assignVirtToPhysReg(LR, PhysReg); return; } + + if (PhysReg == Hint1 || PhysReg == Hint0) + Cost -= spillPrefBonus; + if (Cost < BestCost) { BestReg = PhysReg; BestCost = Cost; @@ -624,6 +750,31 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint) { assignVirtToPhysReg(LR, BestReg); } +void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) { + assert(MO.isUndef() && "expected undef use"); + unsigned VirtReg = MO.getReg(); + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Expected virtreg"); + + LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg); + MCPhysReg PhysReg; + if (LRI != LiveVirtRegs.end() && LRI->PhysReg) { + PhysReg = LRI->PhysReg; + } else { + const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); + ArrayRef<MCPhysReg> AllocationOrder = RegClassInfo.getOrder(&RC); + assert(!AllocationOrder.empty() && "Allocation order must not be empty"); + PhysReg = AllocationOrder[0]; + } + + unsigned SubRegIdx = MO.getSubReg(); + if (SubRegIdx != 0) { + PhysReg = TRI->getSubReg(PhysReg, SubRegIdx); + MO.setSubReg(0); + } + MO.setReg(PhysReg); + MO.setIsRenamable(true); +} + /// Allocates a register for VirtReg and mark it as dirty. MCPhysReg RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg, unsigned Hint) { @@ -941,12 +1092,23 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { // Second scan. // Allocate virtreg uses. + bool HasUndefUse = false; for (unsigned I = 0; I != VirtOpEnd; ++I) { MachineOperand &MO = MI.getOperand(I); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; if (MO.isUse()) { + if (MO.isUndef()) { + HasUndefUse = true; + // There is no need to allocate a register for an undef use. + continue; + } + + // Populate MayLiveAcrossBlocks in case the use block is allocated before + // the def block (removing the vreg uses). + mayLiveIn(Reg); + LiveReg &LR = reloadVirtReg(MI, I, Reg, CopyDstReg); MCPhysReg PhysReg = LR.PhysReg; CopySrcReg = (CopySrcReg == Reg || CopySrcReg == PhysReg) ? PhysReg : 0; @@ -955,6 +1117,22 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { } } + // Allocate undef operands. This is a separate step because in a situation + // like ` = OP undef %X, %X` both operands need the same register assign + // so we should perform the normal assignment first. + if (HasUndefUse) { + for (MachineOperand &MO : MI.uses()) { + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + + assert(MO.isUndef() && "Should only have undef virtreg uses left"); + allocVirtRegUndef(MO); + } + } + // Track registers defined by instruction - early clobbers and tied uses at // this point. UsedInInstr.clear(); @@ -979,10 +1157,24 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { // definitions may be used later on and we do not want to reuse // those for virtual registers in between. LLVM_DEBUG(dbgs() << " Spilling remaining registers before call.\n"); - spillAll(MI); + spillAll(MI, /*OnlyLiveOut*/ false); } // Third scan. + // Mark all physreg defs as used before allocating virtreg defs. + for (unsigned I = 0; I != DefOpEnd; ++I) { + const MachineOperand &MO = MI.getOperand(I); + if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber()) + continue; + unsigned Reg = MO.getReg(); + + if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg) || + !MRI->isAllocatable(Reg)) + continue; + definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved); + } + + // Fourth scan. // Allocate defs and collect dead defs. for (unsigned I = 0; I != DefOpEnd; ++I) { const MachineOperand &MO = MI.getOperand(I); @@ -990,11 +1182,9 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { continue; unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - if (!MRI->isAllocatable(Reg)) continue; - definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved); + // We have already dealt with phys regs in the previous scan. + if (TargetRegisterInfo::isPhysicalRegister(Reg)) continue; - } MCPhysReg PhysReg = defineVirtReg(MI, I, Reg, CopySrcReg); if (setPhysReg(MI, MI.getOperand(I), PhysReg)) { VirtDead.push_back(Reg); @@ -1089,7 +1279,7 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) { // Spill all physical registers holding virtual registers now. LLVM_DEBUG(dbgs() << "Spilling live registers at end of block.\n"); - spillAll(MBB.getFirstTerminator()); + spillAll(MBB.getFirstTerminator(), /*OnlyLiveOut*/ true); // Erase all the coalesced copies. We are delaying it until now because // LiveVirtRegs might refer to the instrs. @@ -1118,6 +1308,8 @@ bool RegAllocFast::runOnMachineFunction(MachineFunction &MF) { unsigned NumVirtRegs = MRI->getNumVirtRegs(); StackSlotForVirtReg.resize(NumVirtRegs); LiveVirtRegs.setUniverse(NumVirtRegs); + MayLiveAcrossBlocks.clear(); + MayLiveAcrossBlocks.resize(NumVirtRegs); // Loop over all of the basic blocks, eliminating virtual register references for (MachineBasicBlock &MBB : MF) diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 81b21b442437..771fc46415db 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -1,9 +1,8 @@ //===- RegAllocGreedy.cpp - greedy register allocator ---------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -138,7 +137,7 @@ CSRFirstTimeCost("regalloc-csr-first-time-cost", cl::init(0), cl::Hidden); static cl::opt<bool> ConsiderLocalIntervalCost( - "condsider-local-interval-cost", cl::Hidden, + "consider-local-interval-cost", cl::Hidden, cl::desc("Consider the cost of local intervals created by a split " "candidate when choosing the best split candidate."), cl::init(false)); @@ -465,7 +464,8 @@ private: void calcGapWeights(unsigned, SmallVectorImpl<float>&); unsigned canReassign(LiveInterval &VirtReg, unsigned PrevReg); bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool); - bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&); + bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&, + const SmallVirtRegSet&); bool canEvictInterferenceInRange(LiveInterval &VirtReg, unsigned PhysReg, SlotIndex Start, SlotIndex End, EvictionCost &MaxCost); @@ -479,9 +479,11 @@ private: const SmallVirtRegSet &FixedRegisters); unsigned tryAssign(LiveInterval&, AllocationOrder&, - SmallVectorImpl<unsigned>&); + SmallVectorImpl<unsigned>&, + const SmallVirtRegSet&); unsigned tryEvict(LiveInterval&, AllocationOrder&, - SmallVectorImpl<unsigned>&, unsigned = ~0u); + SmallVectorImpl<unsigned>&, unsigned, + const SmallVirtRegSet&); unsigned tryRegionSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl<unsigned>&); unsigned isSplitBenefitWorthCost(LiveInterval &VirtReg); @@ -508,7 +510,8 @@ private: unsigned tryLocalSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl<unsigned>&); unsigned trySplit(LiveInterval&, AllocationOrder&, - SmallVectorImpl<unsigned>&); + SmallVectorImpl<unsigned>&, + const SmallVirtRegSet&); unsigned tryLastChanceRecoloring(LiveInterval &, AllocationOrder &, SmallVectorImpl<unsigned> &, SmallVirtRegSet &, unsigned); @@ -758,7 +761,8 @@ LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) { /// tryAssign - Try to assign VirtReg to an available register. unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl<unsigned> &NewVRegs) { + SmallVectorImpl<unsigned> &NewVRegs, + const SmallVirtRegSet &FixedRegisters) { Order.rewind(); unsigned PhysReg; while ((PhysReg = Order.next())) @@ -776,7 +780,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, LLVM_DEBUG(dbgs() << "missed hint " << printReg(Hint, TRI) << '\n'); EvictionCost MaxCost; MaxCost.setBrokenHints(1); - if (canEvictInterference(VirtReg, Hint, true, MaxCost)) { + if (canEvictInterference(VirtReg, Hint, true, MaxCost, FixedRegisters)) { evictInterference(VirtReg, Hint, NewVRegs); return Hint; } @@ -794,7 +798,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost " << Cost << '\n'); - unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost); + unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost, FixedRegisters); return CheapReg ? CheapReg : PhysReg; } @@ -866,7 +870,8 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, /// when returning true. /// @returns True when interference can be evicted cheaper than MaxCost. bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, - bool IsHint, EvictionCost &MaxCost) { + bool IsHint, EvictionCost &MaxCost, + const SmallVirtRegSet &FixedRegisters) { // It is only possible to evict virtual register interference. if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) return false; @@ -896,6 +901,13 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, LiveInterval *Intf = Q.interferingVRegs()[i - 1]; assert(TargetRegisterInfo::isVirtualRegister(Intf->reg) && "Only expecting virtual register interference from query"); + + // Do not allow eviction of a virtual register if we are in the middle + // of last-chance recoloring and this virtual register is one that we + // have scavenged a physical register for. + if (FixedRegisters.count(Intf->reg)) + return false; + // Never evict spill products. They cannot split or spill. if (getStage(*Intf) == RS_Done) return false; @@ -1094,7 +1106,8 @@ bool RAGreedy::isUnusedCalleeSavedReg(unsigned PhysReg) const { unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl<unsigned> &NewVRegs, - unsigned CostPerUseLimit) { + unsigned CostPerUseLimit, + const SmallVirtRegSet &FixedRegisters) { NamedRegionTimer T("evict", "Evict", TimerGroupName, TimerGroupDescription, TimePassesIsEnabled); @@ -1142,7 +1155,8 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, continue; } - if (!canEvictInterference(VirtReg, PhysReg, false, BestCost)) + if (!canEvictInterference(VirtReg, PhysReg, false, BestCost, + FixedRegisters)) continue; // Best so far. @@ -2248,8 +2262,8 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber()); LLVM_DEBUG(dbgs() << RMS.size() << " regmasks in block:"); // Constrain to VirtReg's live range. - unsigned ri = std::lower_bound(RMS.begin(), RMS.end(), - Uses.front().getRegSlot()) - RMS.begin(); + unsigned ri = + llvm::lower_bound(RMS, Uses.front().getRegSlot()) - RMS.begin(); unsigned re = RMS.size(); for (unsigned i = 0; i != NumGaps && ri != re; ++i) { // Look for Uses[i] <= RMS <= Uses[i+1]. @@ -2444,7 +2458,8 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, /// assignable. /// @return Physreg when VirtReg may be assigned and/or new NewVRegs. unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl<unsigned>&NewVRegs) { + SmallVectorImpl<unsigned>&NewVRegs, + const SmallVirtRegSet &FixedRegisters) { // Ranges must be Split2 or less. if (getStage(VirtReg) >= RS_Spill) return 0; @@ -2472,7 +2487,7 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, if (SA->didRepairRange()) { // VirtReg has changed, so all cached queries are invalid. Matrix->invalidateVirtRegs(); - if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) + if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) return PhysReg; } @@ -2611,6 +2626,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, DenseMap<unsigned, unsigned> VirtRegToPhysReg; // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in // this recoloring "session". + assert(!FixedRegisters.count(VirtReg.reg)); FixedRegisters.insert(VirtReg.reg); SmallVector<unsigned, 4> CurrentNewVRegs; @@ -2858,14 +2874,14 @@ void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) { if (!Instr.isFullCopy()) continue; // Look for the other end of the copy. - unsigned OtherReg = Instr.getOperand(0).getReg(); + Register OtherReg = Instr.getOperand(0).getReg(); if (OtherReg == Reg) { OtherReg = Instr.getOperand(1).getReg(); if (OtherReg == Reg) continue; } // Get the current assignment. - unsigned OtherPhysReg = TargetRegisterInfo::isPhysicalRegister(OtherReg) + Register OtherPhysReg = TargetRegisterInfo::isPhysicalRegister(OtherReg) ? OtherReg : VRM->getPhys(OtherReg); // Push the collected information. @@ -3022,7 +3038,7 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, unsigned CostPerUseLimit = ~0u; // First try assigning a free register. AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix); - if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) { + if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) { // If VirtReg got an assignment, the eviction info is no longre relevant. LastEvicted.clearEvicteeInfo(VirtReg.reg); // When NewVRegs is not empty, we may have made decisions such as evicting @@ -3049,7 +3065,8 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, // get a second chance until they have been split. if (Stage != RS_Split) if (unsigned PhysReg = - tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit)) { + tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit, + FixedRegisters)) { unsigned Hint = MRI->getSimpleHint(VirtReg.reg); // If VirtReg has a hint and that hint is broken record this // virtual register as a recoloring candidate for broken hint. @@ -3079,7 +3096,7 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, if (Stage < RS_Spill) { // Try splitting VirtReg or interferences. unsigned NewVRegSizeBefore = NewVRegs.size(); - unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs); + unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs, FixedRegisters); if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore)) { // If VirtReg got split, the eviction info is no longre relevant. LastEvicted.clearEvicteeInfo(VirtReg.reg); diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index c19001c8403d..7a5a6c148ed4 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -1,9 +1,8 @@ //===- RegAllocPBQP.cpp ---- PBQP Register Allocator ----------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/RegUsageInfoCollector.cpp b/lib/CodeGen/RegUsageInfoCollector.cpp index 66c7c5cd7dbf..b37dfada7101 100644 --- a/lib/CodeGen/RegUsageInfoCollector.cpp +++ b/lib/CodeGen/RegUsageInfoCollector.cpp @@ -1,9 +1,8 @@ //===-- RegUsageInfoCollector.cpp - Register Usage Information Collector --===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// @@ -78,14 +77,48 @@ FunctionPass *llvm::createRegUsageInfoCollector() { return new RegUsageInfoCollector(); } +// TODO: Move to hook somwehere? + +// Return true if it is useful to track the used registers for IPRA / no CSR +// optimizations. This is not useful for entry points, and computing the +// register usage information is expensive. +static bool isCallableFunction(const MachineFunction &MF) { + switch (MF.getFunction().getCallingConv()) { + case CallingConv::AMDGPU_VS: + case CallingConv::AMDGPU_GS: + case CallingConv::AMDGPU_PS: + case CallingConv::AMDGPU_CS: + case CallingConv::AMDGPU_HS: + case CallingConv::AMDGPU_ES: + case CallingConv::AMDGPU_LS: + case CallingConv::AMDGPU_KERNEL: + return false; + default: + return true; + } +} + bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo *MRI = &MF.getRegInfo(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); const LLVMTargetMachine &TM = MF.getTarget(); LLVM_DEBUG(dbgs() << " -------------------- " << getPassName() - << " -------------------- \n"); - LLVM_DEBUG(dbgs() << "Function Name : " << MF.getName() << "\n"); + << " -------------------- \nFunction Name : " + << MF.getName() << '\n'); + + // Analyzing the register usage may be expensive on some targets. + if (!isCallableFunction(MF)) { + LLVM_DEBUG(dbgs() << "Not analyzing non-callable function\n"); + return false; + } + + // If there are no callers, there's no point in computing more precise + // register usage here. + if (MF.getFunction().use_empty()) { + LLVM_DEBUG(dbgs() << "Not analyzing function with no callers\n"); + return false; + } std::vector<uint32_t> RegMask; @@ -111,6 +144,7 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) { }; // Scan all the physical registers. When a register is defined in the current // function set it and all the aliasing registers as defined in the regmask. + // FIXME: Rewrite to use regunits. for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) { // Don't count registers that are saved and restored. if (SavedRegs.test(PReg)) @@ -136,11 +170,14 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) { << " function optimized for not having CSR.\n"); } - for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) - if (MachineOperand::clobbersPhysReg(&(RegMask[0]), PReg)) - LLVM_DEBUG(dbgs() << printReg(PReg, TRI) << " "); + LLVM_DEBUG( + for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) { + if (MachineOperand::clobbersPhysReg(&(RegMask[0]), PReg)) + dbgs() << printReg(PReg, TRI) << " "; + } - LLVM_DEBUG(dbgs() << " \n----------------------------------------\n"); + dbgs() << " \n----------------------------------------\n"; + ); PRUI.storeUpdateRegUsageInfo(F, RegMask); @@ -155,38 +192,17 @@ computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF) { // Target will return the set of registers that it saves/restores as needed. SavedRegs.clear(); TFI.determineCalleeSaves(MF, SavedRegs); + if (SavedRegs.none()) + return; // Insert subregs. const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF); for (unsigned i = 0; CSRegs[i]; ++i) { - unsigned Reg = CSRegs[i]; - if (SavedRegs.test(Reg)) - for (MCSubRegIterator SR(Reg, &TRI, false); SR.isValid(); ++SR) + MCPhysReg Reg = CSRegs[i]; + if (SavedRegs.test(Reg)) { + // Save subregisters + for (MCSubRegIterator SR(Reg, &TRI); SR.isValid(); ++SR) SavedRegs.set(*SR); - } - - // Insert any register fully saved via subregisters. - for (const TargetRegisterClass *RC : TRI.regclasses()) { - if (!RC->CoveredBySubRegs) - continue; - - for (unsigned PReg = 1, PRegE = TRI.getNumRegs(); PReg < PRegE; ++PReg) { - if (SavedRegs.test(PReg)) - continue; - - // Check if PReg is fully covered by its subregs. - if (!RC->contains(PReg)) - continue; - - // Add PReg to SavedRegs if all subregs are saved. - bool AllSubRegsSaved = true; - for (MCSubRegIterator SR(PReg, &TRI, false); SR.isValid(); ++SR) - if (!SavedRegs.test(*SR)) { - AllSubRegsSaved = false; - break; - } - if (AllSubRegsSaved) - SavedRegs.set(PReg); } } } diff --git a/lib/CodeGen/RegUsageInfoPropagate.cpp b/lib/CodeGen/RegUsageInfoPropagate.cpp index 256de295821d..fc4be82d215e 100644 --- a/lib/CodeGen/RegUsageInfoPropagate.cpp +++ b/lib/CodeGen/RegUsageInfoPropagate.cpp @@ -1,9 +1,8 @@ //=--- RegUsageInfoPropagate.cpp - Register Usage Informartion Propagation --=// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp index add8faec97d4..530e0cccf1d4 100644 --- a/lib/CodeGen/RegisterClassInfo.cpp +++ b/lib/CodeGen/RegisterClassInfo.cpp @@ -1,9 +1,8 @@ //===- RegisterClassInfo.cpp - Dynamic Register Class Info ----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -91,6 +90,7 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) { void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { assert(RC && "no register class given"); RCInfo &RCI = RegClass[RC->getID()]; + auto &STI = MF->getSubtarget(); // Raw register count, including all reserved regs. unsigned NumRegs = RC->getNumRegs(); @@ -115,7 +115,8 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { unsigned Cost = TRI->getCostPerUse(PhysReg); MinCost = std::min(MinCost, Cost); - if (CalleeSavedAliases[PhysReg]) + if (CalleeSavedAliases[PhysReg] && + !STI.ignoreCSRForAllocationOrder(*MF, PhysReg)) // PhysReg aliases a CSR, save it for later. CSRAlias.push_back(PhysReg); else { diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 2a06d5e95fbb..2db6ab454cea 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -1,9 +1,8 @@ //===- RegisterCoalescer.cpp - Generic Register Coalescing Interface ------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -105,6 +104,19 @@ static cl::opt<unsigned> LateRematUpdateThreshold( "repeated work. "), cl::init(100)); +static cl::opt<unsigned> LargeIntervalSizeThreshold( + "large-interval-size-threshold", cl::Hidden, + cl::desc("If the valnos size of an interval is larger than the threshold, " + "it is regarded as a large interval. "), + cl::init(100)); + +static cl::opt<unsigned> LargeIntervalFreqThreshold( + "large-interval-freq-threshold", cl::Hidden, + cl::desc("For a large interval, if it is coalesed with other live " + "intervals many times more than the threshold, stop its " + "coalescing to control the compile time. "), + cl::init(100)); + namespace { class RegisterCoalescer : public MachineFunctionPass, @@ -153,6 +165,10 @@ namespace { /// lateLiveIntervalUpdate is called. DenseSet<unsigned> ToBeUpdated; + /// Record how many times the large live interval with many valnos + /// has been tried to join with other live interval. + DenseMap<unsigned, unsigned long> LargeLIVisitCounter; + /// Recursively eliminate dead defs in DeadDefs. void eliminateDeadDefs(); @@ -195,6 +211,11 @@ namespace { /// Attempt joining two virtual registers. Return true on success. bool joinVirtRegs(CoalescerPair &CP); + /// If a live interval has many valnos and is coalesced with other + /// live intervals many times, we regard such live interval as having + /// high compile time cost. + bool isHighCostLiveInterval(LiveInterval &LI); + /// Attempt joining with a reserved physreg. bool joinReservedPhysReg(CoalescerPair &CP); @@ -337,9 +358,10 @@ INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing", "Simple Register Coalescing", false, false) -static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI, - unsigned &Src, unsigned &Dst, - unsigned &SrcSub, unsigned &DstSub) { +LLVM_NODISCARD static bool isMoveInstr(const TargetRegisterInfo &tri, + const MachineInstr *MI, unsigned &Src, + unsigned &Dst, unsigned &SrcSub, + unsigned &DstSub) { if (MI->isCopy()) { Dst = MI->getOperand(0).getReg(); DstSub = MI->getOperand(0).getSubReg(); @@ -672,8 +694,7 @@ bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA, for (LiveRange::Segment &ASeg : IntA.segments) { if (ASeg.valno != AValNo) continue; - LiveInterval::iterator BI = - std::upper_bound(IntB.begin(), IntB.end(), ASeg.start); + LiveInterval::iterator BI = llvm::upper_bound(IntB, ASeg.start); if (BI != IntB.begin()) --BI; for (; BI != IntB.end() && ASeg.end >= BI->start; ++BI) { @@ -903,23 +924,32 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, } SlotIndex AIdx = CopyIdx.getRegSlot(true); LaneBitmask MaskA; + const SlotIndexes &Indexes = *LIS->getSlotIndexes(); for (LiveInterval::SubRange &SA : IntA.subranges()) { VNInfo *ASubValNo = SA.getVNInfoAt(AIdx); - assert(ASubValNo != nullptr); + // Even if we are dealing with a full copy, some lanes can + // still be undefined. + // E.g., + // undef A.subLow = ... + // B = COPY A <== A.subHigh is undefined here and does + // not have a value number. + if (!ASubValNo) + continue; MaskA |= SA.LaneMask; - IntB.refineSubRanges(Allocator, SA.LaneMask, - [&Allocator,&SA,CopyIdx,ASubValNo,&ShrinkB] - (LiveInterval::SubRange &SR) { - VNInfo *BSubValNo = SR.empty() - ? SR.getNextValue(CopyIdx, Allocator) - : SR.getVNInfoAt(CopyIdx); - assert(BSubValNo != nullptr); - auto P = addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo); - ShrinkB |= P.second; - if (P.first) - BSubValNo->def = ASubValNo->def; - }); + IntB.refineSubRanges( + Allocator, SA.LaneMask, + [&Allocator, &SA, CopyIdx, ASubValNo, + &ShrinkB](LiveInterval::SubRange &SR) { + VNInfo *BSubValNo = SR.empty() ? SR.getNextValue(CopyIdx, Allocator) + : SR.getVNInfoAt(CopyIdx); + assert(BSubValNo != nullptr); + auto P = addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo); + ShrinkB |= P.second; + if (P.first) + BSubValNo->def = ASubValNo->def; + }, + Indexes, *TRI); } // Go over all subranges of IntB that have not been covered by IntA, // and delete the segments starting at CopyIdx. This can happen if @@ -947,7 +977,7 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, /// For copy B = A in BB2, if A is defined by A = B in BB0 which is a /// predecessor of BB2, and if B is not redefined on the way from A = B -/// in BB2 to B = A in BB2, B = A in BB2 is partially redundant if the +/// in BB0 to B = A in BB2, B = A in BB2 is partially redundant if the /// execution goes through the path from BB0 to BB2. We may move B = A /// to the predecessor without such reversed copy. /// So we will transform the program from: @@ -1494,7 +1524,8 @@ MachineInstr *RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { // CoalescerPair may have a new register class with adjusted subreg indices // at this point. unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx); + if(!isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) + return nullptr; SlotIndex Idx = LIS->getInstructionIndex(*CopyMI); const LiveInterval &SrcLI = LIS->getInterval(SrcReg); @@ -1994,19 +2025,19 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { if (CP.isFlipped()) { // Physreg is copied into vreg // %y = COPY %physreg_x - // ... //< no other def of %x here + // ... //< no other def of %physreg_x here // use %y // => // ... - // use %x + // use %physreg_x CopyMI = MRI->getVRegDef(SrcReg); } else { // VReg is copied into physreg: // %y = def - // ... //< no other def or use of %y here - // %y = COPY %physreg_x + // ... //< no other def or use of %physreg_x here + // %physreg_x = COPY %y // => - // %y = def + // %physreg_x = def // ... if (!MRI->hasOneNonDBGUse(SrcReg)) { LLVM_DEBUG(dbgs() << "\t\tMultiple vreg uses!\n"); @@ -3010,7 +3041,9 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) { // If a subrange starts at the copy then an undefined value has been // copied and we must remove that subrange value as well. VNInfo *ValueOut = Q.valueOutOrDead(); - if (ValueOut != nullptr && Q.valueIn() == nullptr) { + if (ValueOut != nullptr && (Q.valueIn() == nullptr || + (V.Identical && V.Resolution == CR_Erase && + ValueOut->def == Def))) { LLVM_DEBUG(dbgs() << "\t\tPrune sublane " << PrintLaneMask(S.LaneMask) << " at " << Def << "\n"); SmallVector<SlotIndex,8> EndPoints; @@ -3019,7 +3052,7 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) { // Mark value number as unused. ValueOut->markUnused(); - if (V.Identical && S.Query(OtherDef).valueOut()) { + if (V.Identical && S.Query(OtherDef).valueOutOrDead()) { // If V is identical to V.OtherVNI (and S was live at OtherDef), // then we can't simply prune V from S. V needs to be replaced // with V.OtherVNI. @@ -3241,16 +3274,29 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI, LaneBitmask LaneMask, CoalescerPair &CP) { BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); - LI.refineSubRanges(Allocator, LaneMask, - [this,&Allocator,&ToMerge,&CP](LiveInterval::SubRange &SR) { - if (SR.empty()) { - SR.assign(ToMerge, Allocator); - } else { - // joinSubRegRange() destroys the merged range, so we need a copy. - LiveRange RangeCopy(ToMerge, Allocator); - joinSubRegRanges(SR, RangeCopy, SR.LaneMask, CP); - } - }); + LI.refineSubRanges( + Allocator, LaneMask, + [this, &Allocator, &ToMerge, &CP](LiveInterval::SubRange &SR) { + if (SR.empty()) { + SR.assign(ToMerge, Allocator); + } else { + // joinSubRegRange() destroys the merged range, so we need a copy. + LiveRange RangeCopy(ToMerge, Allocator); + joinSubRegRanges(SR, RangeCopy, SR.LaneMask, CP); + } + }, + *LIS->getSlotIndexes(), *TRI); +} + +bool RegisterCoalescer::isHighCostLiveInterval(LiveInterval &LI) { + if (LI.valnos.size() < LargeIntervalSizeThreshold) + return false; + auto &Counter = LargeLIVisitCounter[LI.reg]; + if (Counter < LargeIntervalFreqThreshold) { + Counter++; + return false; + } + return true; } bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { @@ -3265,6 +3311,9 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { LLVM_DEBUG(dbgs() << "\t\tRHS = " << RHS << "\n\t\tLHS = " << LHS << '\n'); + if (isHighCostLiveInterval(LHS) || isHighCostLiveInterval(RHS)) + return false; + // First compute NewVNInfo and the simple value mappings. // Detect impossible conflicts early. if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) @@ -3474,7 +3523,8 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const { if (!UseTerminalRule) return false; unsigned DstReg, DstSubReg, SrcReg, SrcSubReg; - isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg); + if (!isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg)) + return false; // Check if the destination of this copy has any other affinity. if (TargetRegisterInfo::isPhysicalRegister(DstReg) || // If SrcReg is a physical register, the copy won't be coalesced. @@ -3498,8 +3548,9 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const { if (&MI == &Copy || !MI.isCopyLike() || MI.getParent() != OrigBB) continue; unsigned OtherReg, OtherSubReg, OtherSrcReg, OtherSrcSubReg; - isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg, - OtherSubReg); + if (!isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg, + OtherSubReg)) + return false; if (OtherReg == SrcReg) OtherReg = OtherSrcReg; // Check if OtherReg is a non-terminal. @@ -3620,6 +3671,7 @@ void RegisterCoalescer::releaseMemory() { WorkList.clear(); DeadDefs.clear(); InflateRegs.clear(); + LargeLIVisitCounter.clear(); } bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { diff --git a/lib/CodeGen/RegisterCoalescer.h b/lib/CodeGen/RegisterCoalescer.h index 1a46f6d053e6..f505d46cd338 100644 --- a/lib/CodeGen/RegisterCoalescer.h +++ b/lib/CodeGen/RegisterCoalescer.h @@ -1,9 +1,8 @@ //===- RegisterCoalescer.h - Register Coalescing Interface ------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp index 1099e468e885..7d9b3aa9b2d7 100644 --- a/lib/CodeGen/RegisterPressure.cpp +++ b/lib/CodeGen/RegisterPressure.cpp @@ -1,9 +1,8 @@ //===- RegisterPressure.cpp - Dynamic Register Pressure -------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -846,7 +845,7 @@ void RegPressureTracker::recedeSkipDebugValues() { CurrPos = skipDebugInstructionsBackward(std::prev(CurrPos), MBB->begin()); SlotIndex SlotIdx; - if (RequireIntervals) + if (RequireIntervals && !CurrPos->isDebugInstr()) SlotIdx = LIS->getInstructionIndex(*CurrPos).getRegSlot(); // Open the top of the region using slot indexes. @@ -856,6 +855,12 @@ void RegPressureTracker::recedeSkipDebugValues() { void RegPressureTracker::recede(SmallVectorImpl<RegisterMaskPair> *LiveUses) { recedeSkipDebugValues(); + if (CurrPos->isDebugValue()) { + // It's possible to only have debug_value instructions and hit the start of + // the block. + assert(CurrPos == MBB->begin()); + return; + } const MachineInstr &MI = *CurrPos; RegisterOperands RegOpers; diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index 3660586c1358..bb19110e6d70 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -1,9 +1,8 @@ //===- RegisterScavenging.cpp - Machine register scavenging ---------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -534,7 +533,7 @@ RegScavenger::spill(unsigned Reg, const TargetRegisterClass &RC, int SPAdj, unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, MachineBasicBlock::iterator I, - int SPAdj) { + int SPAdj, bool AllowSpill) { MachineInstr &MI = *I; const MachineFunction &MF = *MI.getMF(); // Consider all allocatable registers in the register class initially @@ -565,6 +564,9 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, return SReg; } + if (!AllowSpill) + return 0; + ScavengedInfo &Scavenged = spill(SReg, *RC, SPAdj, I, UseMI); Scavenged.Restore = &*std::prev(UseMI); @@ -576,7 +578,8 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, - bool RestoreAfter, int SPAdj) { + bool RestoreAfter, int SPAdj, + bool AllowSpill) { const MachineBasicBlock &MBB = *To->getParent(); const MachineFunction &MF = *MBB.getParent(); @@ -590,21 +593,25 @@ unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator SpillBefore = P.second; assert(Reg != 0 && "No register left to scavenge!"); // Found an available register? - if (SpillBefore != MBB.end()) { - MachineBasicBlock::iterator ReloadAfter = - RestoreAfter ? std::next(MBBI) : MBBI; - MachineBasicBlock::iterator ReloadBefore = std::next(ReloadAfter); - if (ReloadBefore != MBB.end()) - LLVM_DEBUG(dbgs() << "Reload before: " << *ReloadBefore << '\n'); - ScavengedInfo &Scavenged = spill(Reg, RC, SPAdj, SpillBefore, ReloadBefore); - Scavenged.Restore = &*std::prev(SpillBefore); - LiveUnits.removeReg(Reg); - LLVM_DEBUG(dbgs() << "Scavenged register with spill: " << printReg(Reg, TRI) - << " until " << *SpillBefore); - } else { + if (SpillBefore == MBB.end()) { LLVM_DEBUG(dbgs() << "Scavenged free register: " << printReg(Reg, TRI) - << '\n'); + << '\n'); + return Reg; } + + if (!AllowSpill) + return 0; + + MachineBasicBlock::iterator ReloadAfter = + RestoreAfter ? std::next(MBBI) : MBBI; + MachineBasicBlock::iterator ReloadBefore = std::next(ReloadAfter); + if (ReloadBefore != MBB.end()) + LLVM_DEBUG(dbgs() << "Reload before: " << *ReloadBefore << '\n'); + ScavengedInfo &Scavenged = spill(Reg, RC, SPAdj, SpillBefore, ReloadBefore); + Scavenged.Restore = &*std::prev(SpillBefore); + LiveUnits.removeReg(Reg); + LLVM_DEBUG(dbgs() << "Scavenged register with spill: " << printReg(Reg, TRI) + << " until " << *SpillBefore); return Reg; } diff --git a/lib/CodeGen/RegisterUsageInfo.cpp b/lib/CodeGen/RegisterUsageInfo.cpp index 6b9880a8913f..6858d7233bc5 100644 --- a/lib/CodeGen/RegisterUsageInfo.cpp +++ b/lib/CodeGen/RegisterUsageInfo.cpp @@ -1,9 +1,8 @@ //===- RegisterUsageInfo.cpp - Register Usage Information Storage ---------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// diff --git a/lib/CodeGen/RenameIndependentSubregs.cpp b/lib/CodeGen/RenameIndependentSubregs.cpp index 156d1c81c238..22cff48c3051 100644 --- a/lib/CodeGen/RenameIndependentSubregs.cpp +++ b/lib/CodeGen/RenameIndependentSubregs.cpp @@ -1,9 +1,8 @@ //===-- RenameIndependentSubregs.cpp - Live Interval Analysis -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/ResetMachineFunctionPass.cpp b/lib/CodeGen/ResetMachineFunctionPass.cpp index a02302e6ff99..019de6554d2a 100644 --- a/lib/CodeGen/ResetMachineFunctionPass.cpp +++ b/lib/CodeGen/ResetMachineFunctionPass.cpp @@ -1,9 +1,8 @@ //===-- ResetMachineFunctionPass.cpp - Reset Machine Function ----*- C++ -*-==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file @@ -27,6 +26,7 @@ using namespace llvm; #define DEBUG_TYPE "reset-machine-function" STATISTIC(NumFunctionsReset, "Number of functions reset"); +STATISTIC(NumFunctionsVisited, "Number of functions visited"); namespace { class ResetMachineFunction : public MachineFunctionPass { @@ -51,6 +51,7 @@ namespace { } bool runOnMachineFunction(MachineFunction &MF) override { + ++NumFunctionsVisited; // No matter what happened, whether we successfully selected the function // or not, nothing is going to use the vreg types after us. Make sure they // disappear. diff --git a/lib/CodeGen/SafeStack.cpp b/lib/CodeGen/SafeStack.cpp index c356fb57ac6d..a6bc7330e2cc 100644 --- a/lib/CodeGen/SafeStack.cpp +++ b/lib/CodeGen/SafeStack.cpp @@ -1,9 +1,8 @@ //===- SafeStack.cpp - Safe Stack Insertion -------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -372,7 +371,7 @@ Value *SafeStack::getStackGuard(IRBuilder<> &IRB, Function &F) { if (!StackGuardVar) StackGuardVar = F.getParent()->getOrInsertGlobal("__stack_chk_guard", StackPtrTy); - return IRB.CreateLoad(StackGuardVar, "StackGuard"); + return IRB.CreateLoad(StackPtrTy, StackGuardVar, "StackGuard"); } void SafeStack::findInsts(Function &F, @@ -453,7 +452,8 @@ SafeStack::createStackRestorePoints(IRBuilder<> &IRB, Function &F, ++NumUnsafeStackRestorePoints; IRB.SetInsertPoint(I->getNextNode()); - Value *CurrentTop = DynamicTop ? IRB.CreateLoad(DynamicTop) : StaticTop; + Value *CurrentTop = + DynamicTop ? IRB.CreateLoad(StackPtrTy, DynamicTop) : StaticTop; IRB.CreateStore(CurrentTop, UnsafeStackPtr); } @@ -462,7 +462,7 @@ SafeStack::createStackRestorePoints(IRBuilder<> &IRB, Function &F, void SafeStack::checkStackGuard(IRBuilder<> &IRB, Function &F, ReturnInst &RI, AllocaInst *StackGuardSlot, Value *StackGuard) { - Value *V = IRB.CreateLoad(StackGuardSlot); + Value *V = IRB.CreateLoad(StackPtrTy, StackGuardSlot); Value *Cmp = IRB.CreateICmpNE(StackGuard, V); auto SuccessProb = BranchProbabilityInfo::getBranchProbStackProtector(true); @@ -475,8 +475,8 @@ void SafeStack::checkStackGuard(IRBuilder<> &IRB, Function &F, ReturnInst &RI, /* Unreachable */ true, Weights); IRBuilder<> IRBFail(CheckTerm); // FIXME: respect -fsanitize-trap / -ftrap-function here? - Constant *StackChkFail = F.getParent()->getOrInsertFunction( - "__stack_chk_fail", IRB.getVoidTy()); + FunctionCallee StackChkFail = + F.getParent()->getOrInsertFunction("__stack_chk_fail", IRB.getVoidTy()); IRBFail.CreateCall(StackChkFail, {}); } @@ -550,7 +550,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack( if (StackGuardSlot) { unsigned Offset = SSL.getObjectOffset(StackGuardSlot); - Value *Off = IRB.CreateGEP(BasePointer, // BasePointer is i8* + Value *Off = IRB.CreateGEP(Int8Ty, BasePointer, // BasePointer is i8* ConstantInt::get(Int32Ty, -Offset)); Value *NewAI = IRB.CreateBitCast(Off, StackGuardSlot->getType(), "StackGuardSlot"); @@ -569,14 +569,14 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack( if (Size == 0) Size = 1; // Don't create zero-sized stack objects. - Value *Off = IRB.CreateGEP(BasePointer, // BasePointer is i8* + Value *Off = IRB.CreateGEP(Int8Ty, BasePointer, // BasePointer is i8* ConstantInt::get(Int32Ty, -Offset)); Value *NewArg = IRB.CreateBitCast(Off, Arg->getType(), Arg->getName() + ".unsafe-byval"); // Replace alloc with the new location. replaceDbgDeclare(Arg, BasePointer, BasePointer->getNextNode(), DIB, - DIExpression::NoDeref, -Offset, DIExpression::NoDeref); + DIExpression::ApplyOffset, -Offset); Arg->replaceAllUsesWith(NewArg); IRB.SetInsertPoint(cast<Instruction>(NewArg)->getNextNode()); IRB.CreateMemCpy(Off, Align, Arg, Arg->getParamAlignment(), Size); @@ -587,12 +587,8 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack( IRB.SetInsertPoint(AI); unsigned Offset = SSL.getObjectOffset(AI); - uint64_t Size = getStaticAllocaAllocationSize(AI); - if (Size == 0) - Size = 1; // Don't create zero-sized stack objects. - - replaceDbgDeclareForAlloca(AI, BasePointer, DIB, DIExpression::NoDeref, - -Offset, DIExpression::NoDeref); + replaceDbgDeclareForAlloca(AI, BasePointer, DIB, DIExpression::ApplyOffset, + -Offset); replaceDbgValueForAlloca(AI, BasePointer, DIB, -Offset); // Replace uses of the alloca with the new location. @@ -609,20 +605,16 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack( InsertBefore = User; IRBuilder<> IRBUser(InsertBefore); - Value *Off = IRBUser.CreateGEP(BasePointer, // BasePointer is i8* + Value *Off = IRBUser.CreateGEP(Int8Ty, BasePointer, // BasePointer is i8* ConstantInt::get(Int32Ty, -Offset)); Value *Replacement = IRBUser.CreateBitCast(Off, AI->getType(), Name); - if (auto *PHI = dyn_cast<PHINode>(User)) { + if (auto *PHI = dyn_cast<PHINode>(User)) // PHI nodes may have multiple incoming edges from the same BB (why??), // all must be updated at once with the same incoming value. - auto *BB = PHI->getIncomingBlock(U); - for (unsigned I = 0; I < PHI->getNumIncomingValues(); ++I) - if (PHI->getIncomingBlock(I) == BB) - PHI->setIncomingValue(I, Replacement); - } else { + PHI->setIncomingValueForBlock(PHI->getIncomingBlock(U), Replacement); + else U.set(Replacement); - } } AI->eraseFromParent(); @@ -637,7 +629,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack( IRB.SetInsertPoint(BasePointer->getNextNode()); Value *StaticTop = - IRB.CreateGEP(BasePointer, ConstantInt::get(Int32Ty, -FrameSize), + IRB.CreateGEP(Int8Ty, BasePointer, ConstantInt::get(Int32Ty, -FrameSize), "unsafe_stack_static_top"); IRB.CreateStore(StaticTop, UnsafeStackPtr); return StaticTop; @@ -660,7 +652,8 @@ void SafeStack::moveDynamicAllocasToUnsafeStack( uint64_t TySize = DL.getTypeAllocSize(Ty); Value *Size = IRB.CreateMul(ArraySize, ConstantInt::get(IntPtrTy, TySize)); - Value *SP = IRB.CreatePtrToInt(IRB.CreateLoad(UnsafeStackPtr), IntPtrTy); + Value *SP = IRB.CreatePtrToInt(IRB.CreateLoad(StackPtrTy, UnsafeStackPtr), + IntPtrTy); SP = IRB.CreateSub(SP, Size); // Align the SP value to satisfy the AllocaInst, type and stack alignments. @@ -682,8 +675,7 @@ void SafeStack::moveDynamicAllocasToUnsafeStack( if (AI->hasName() && isa<Instruction>(NewAI)) NewAI->takeName(AI); - replaceDbgDeclareForAlloca(AI, NewAI, DIB, DIExpression::NoDeref, 0, - DIExpression::NoDeref); + replaceDbgDeclareForAlloca(AI, NewAI, DIB, DIExpression::ApplyOffset, 0); AI->replaceAllUsesWith(NewAI); AI->eraseFromParent(); } @@ -698,7 +690,7 @@ void SafeStack::moveDynamicAllocasToUnsafeStack( if (II->getIntrinsicID() == Intrinsic::stacksave) { IRBuilder<> IRB(II); - Instruction *LI = IRB.CreateLoad(UnsafeStackPtr); + Instruction *LI = IRB.CreateLoad(StackPtrTy, UnsafeStackPtr); LI->takeName(II); II->replaceAllUsesWith(LI); II->eraseFromParent(); @@ -727,7 +719,7 @@ void SafeStack::TryInlinePointerAddress() { if (!isa<CallInst>(UnsafeStackPtr)) return; - if(F.hasFnAttribute(Attribute::OptimizeNone)) + if(F.hasOptNone()) return; CallSite CS(UnsafeStackPtr); @@ -783,7 +775,7 @@ bool SafeStack::run() { if (DISubprogram *SP = F.getSubprogram()) IRB.SetCurrentDebugLocation(DebugLoc::get(SP->getScopeLine(), 0, SP)); if (SafeStackUsePointerAddress) { - Value *Fn = F.getParent()->getOrInsertFunction( + FunctionCallee Fn = F.getParent()->getOrInsertFunction( "__safestack_pointer_address", StackPtrTy->getPointerTo(0)); UnsafeStackPtr = IRB.CreateCall(Fn); } else { @@ -793,7 +785,7 @@ bool SafeStack::run() { // Load the current stack pointer (we'll also use it as a base pointer). // FIXME: use a dedicated register for it ? Instruction *BasePointer = - IRB.CreateLoad(UnsafeStackPtr, false, "unsafe_stack_ptr"); + IRB.CreateLoad(StackPtrTy, UnsafeStackPtr, false, "unsafe_stack_ptr"); assert(BasePointer->getType() == StackPtrTy); AllocaInst *StackGuardSlot = nullptr; diff --git a/lib/CodeGen/SafeStackColoring.cpp b/lib/CodeGen/SafeStackColoring.cpp index 726c38002817..04a5c4b6d892 100644 --- a/lib/CodeGen/SafeStackColoring.cpp +++ b/lib/CodeGen/SafeStackColoring.cpp @@ -1,9 +1,8 @@ //===- SafeStackColoring.cpp - SafeStack frame coloring -------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/SafeStackColoring.h b/lib/CodeGen/SafeStackColoring.h index 902e63ebeb7e..b696b1b6baed 100644 --- a/lib/CodeGen/SafeStackColoring.h +++ b/lib/CodeGen/SafeStackColoring.h @@ -1,9 +1,8 @@ //===- SafeStackColoring.h - SafeStack frame coloring ----------*- C++ -*--===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/SafeStackLayout.cpp b/lib/CodeGen/SafeStackLayout.cpp index 07b6a5d1883b..09964866e4d3 100644 --- a/lib/CodeGen/SafeStackLayout.cpp +++ b/lib/CodeGen/SafeStackLayout.cpp @@ -1,9 +1,8 @@ //===- SafeStackLayout.cpp - SafeStack frame layout -----------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/SafeStackLayout.h b/lib/CodeGen/SafeStackLayout.h index ac531d800f6e..349d9a8b595c 100644 --- a/lib/CodeGen/SafeStackLayout.h +++ b/lib/CodeGen/SafeStackLayout.h @@ -1,9 +1,8 @@ //===- SafeStackLayout.h - SafeStack frame layout --------------*- C++ -*--===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp index 2684f92b3a93..7776dffb4e9c 100644 --- a/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp +++ b/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp @@ -1,10 +1,9 @@ //===- ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ----===// // instrinsics // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -124,7 +123,7 @@ static bool isConstantIntVector(Value *Mask) { // %10 = extractelement <16 x i1> %mask, i32 2 // br i1 %10, label %cond.load4, label %else5 // -static void scalarizeMaskedLoad(CallInst *CI) { +static void scalarizeMaskedLoad(CallInst *CI, bool &ModifiedDT) { Value *Ptr = CI->getArgOperand(0); Value *Alignment = CI->getArgOperand(1); Value *Mask = CI->getArgOperand(2); @@ -144,7 +143,7 @@ static void scalarizeMaskedLoad(CallInst *CI) { // Short-cut if the mask is all-true. if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) { - Value *NewI = Builder.CreateAlignedLoad(Ptr, AlignVal); + Value *NewI = Builder.CreateAlignedLoad(VecType, Ptr, AlignVal); CI->replaceAllUsesWith(NewI); CI->eraseFromParent(); return; @@ -152,9 +151,9 @@ static void scalarizeMaskedLoad(CallInst *CI) { // Adjust alignment for the scalar instruction. AlignVal = MinAlign(AlignVal, EltTy->getPrimitiveSizeInBits() / 8); - // Bitcast %addr fron i8* to EltTy* + // Bitcast %addr from i8* to EltTy* Type *NewPtrType = - EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace()); + EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace()); Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); unsigned VectorWidth = VecType->getNumElements(); @@ -165,11 +164,9 @@ static void scalarizeMaskedLoad(CallInst *CI) { for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) continue; - Value *Gep = - Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); - LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal); - VResult = - Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx)); + Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx); + LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AlignVal); + VResult = Builder.CreateInsertElement(VResult, Load, Idx); } CI->replaceAllUsesWith(VResult); CI->eraseFromParent(); @@ -184,8 +181,7 @@ static void scalarizeMaskedLoad(CallInst *CI) { // br i1 %mask_1, label %cond.load, label %else // - Value *Predicate = - Builder.CreateExtractElement(Mask, Builder.getInt32(Idx)); + Value *Predicate = Builder.CreateExtractElement(Mask, Idx); // Create "cond" block // @@ -197,11 +193,9 @@ static void scalarizeMaskedLoad(CallInst *CI) { "cond.load"); Builder.SetInsertPoint(InsertPt); - Value *Gep = - Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); - LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal); - Value *NewVResult = Builder.CreateInsertElement(VResult, Load, - Builder.getInt32(Idx)); + Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx); + LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AlignVal); + Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx); // Create "else" block, fill it in the next iteration BasicBlock *NewIfBlock = @@ -222,6 +216,8 @@ static void scalarizeMaskedLoad(CallInst *CI) { CI->replaceAllUsesWith(VResult); CI->eraseFromParent(); + + ModifiedDT = true; } // Translate a masked store intrinsic, like @@ -250,7 +246,7 @@ static void scalarizeMaskedLoad(CallInst *CI) { // store i32 %6, i32* %7 // br label %else2 // . . . -static void scalarizeMaskedStore(CallInst *CI) { +static void scalarizeMaskedStore(CallInst *CI, bool &ModifiedDT) { Value *Src = CI->getArgOperand(0); Value *Ptr = CI->getArgOperand(1); Value *Alignment = CI->getArgOperand(2); @@ -276,9 +272,9 @@ static void scalarizeMaskedStore(CallInst *CI) { // Adjust alignment for the scalar instruction. AlignVal = MinAlign(AlignVal, EltTy->getPrimitiveSizeInBits() / 8); - // Bitcast %addr fron i8* to EltTy* + // Bitcast %addr from i8* to EltTy* Type *NewPtrType = - EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace()); + EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace()); Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); unsigned VectorWidth = VecType->getNumElements(); @@ -286,9 +282,8 @@ static void scalarizeMaskedStore(CallInst *CI) { for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) continue; - Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx)); - Value *Gep = - Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); + Value *OneElt = Builder.CreateExtractElement(Src, Idx); + Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx); Builder.CreateAlignedStore(OneElt, Gep, AlignVal); } CI->eraseFromParent(); @@ -301,8 +296,7 @@ static void scalarizeMaskedStore(CallInst *CI) { // %mask_1 = extractelement <16 x i1> %mask, i32 Idx // br i1 %mask_1, label %cond.store, label %else // - Value *Predicate = - Builder.CreateExtractElement(Mask, Builder.getInt32(Idx)); + Value *Predicate = Builder.CreateExtractElement(Mask, Idx); // Create "cond" block // @@ -314,9 +308,8 @@ static void scalarizeMaskedStore(CallInst *CI) { IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store"); Builder.SetInsertPoint(InsertPt); - Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx)); - Value *Gep = - Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); + Value *OneElt = Builder.CreateExtractElement(Src, Idx); + Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx); Builder.CreateAlignedStore(OneElt, Gep, AlignVal); // Create "else" block, fill it in the next iteration @@ -329,6 +322,8 @@ static void scalarizeMaskedStore(CallInst *CI) { IfBlock = NewIfBlock; } CI->eraseFromParent(); + + ModifiedDT = true; } // Translate a masked gather intrinsic like @@ -360,13 +355,14 @@ static void scalarizeMaskedStore(CallInst *CI) { // . . . // %Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src // ret <16 x i32> %Result -static void scalarizeMaskedGather(CallInst *CI) { +static void scalarizeMaskedGather(CallInst *CI, bool &ModifiedDT) { Value *Ptrs = CI->getArgOperand(0); Value *Alignment = CI->getArgOperand(1); Value *Mask = CI->getArgOperand(2); Value *Src0 = CI->getArgOperand(3); VectorType *VecType = cast<VectorType>(CI->getType()); + Type *EltTy = VecType->getElementType(); IRBuilder<> Builder(CI->getContext()); Instruction *InsertPt = CI; @@ -385,12 +381,11 @@ static void scalarizeMaskedGather(CallInst *CI) { for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) continue; - Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), - "Ptr" + Twine(Idx)); + Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx)); LoadInst *Load = - Builder.CreateAlignedLoad(Ptr, AlignVal, "Load" + Twine(Idx)); - VResult = Builder.CreateInsertElement( - VResult, Load, Builder.getInt32(Idx), "Res" + Twine(Idx)); + Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx)); + VResult = + Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx)); } CI->replaceAllUsesWith(VResult); CI->eraseFromParent(); @@ -404,8 +399,8 @@ static void scalarizeMaskedGather(CallInst *CI) { // br i1 %Mask1, label %cond.load, label %else // - Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx), - "Mask" + Twine(Idx)); + Value *Predicate = + Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); // Create "cond" block // @@ -416,13 +411,11 @@ static void scalarizeMaskedGather(CallInst *CI) { BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load"); Builder.SetInsertPoint(InsertPt); - Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), - "Ptr" + Twine(Idx)); + Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx)); LoadInst *Load = - Builder.CreateAlignedLoad(Ptr, AlignVal, "Load" + Twine(Idx)); - Value *NewVResult = Builder.CreateInsertElement(VResult, Load, - Builder.getInt32(Idx), - "Res" + Twine(Idx)); + Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx)); + Value *NewVResult = + Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx)); // Create "else" block, fill it in the next iteration BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); @@ -441,6 +434,8 @@ static void scalarizeMaskedGather(CallInst *CI) { CI->replaceAllUsesWith(VResult); CI->eraseFromParent(); + + ModifiedDT = true; } // Translate a masked scatter intrinsic, like @@ -469,7 +464,7 @@ static void scalarizeMaskedGather(CallInst *CI) { // store i32 %Elt1, i32* %Ptr1, align 4 // br label %else2 // . . . -static void scalarizeMaskedScatter(CallInst *CI) { +static void scalarizeMaskedScatter(CallInst *CI, bool &ModifiedDT) { Value *Src = CI->getArgOperand(0); Value *Ptrs = CI->getArgOperand(1); Value *Alignment = CI->getArgOperand(2); @@ -493,12 +488,11 @@ static void scalarizeMaskedScatter(CallInst *CI) { // Shorten the way if the mask is a vector of constants. if (isConstantIntVector(Mask)) { for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - if (cast<ConstantVector>(Mask)->getAggregateElement(Idx)->isNullValue()) + if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) continue; - Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx), - "Elt" + Twine(Idx)); - Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), - "Ptr" + Twine(Idx)); + Value *OneElt = + Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx)); + Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx)); Builder.CreateAlignedStore(OneElt, Ptr, AlignVal); } CI->eraseFromParent(); @@ -511,8 +505,8 @@ static void scalarizeMaskedScatter(CallInst *CI) { // %Mask1 = extractelement <16 x i1> %Mask, i32 Idx // br i1 %Mask1, label %cond.store, label %else // - Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx), - "Mask" + Twine(Idx)); + Value *Predicate = + Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); // Create "cond" block // @@ -523,10 +517,8 @@ static void scalarizeMaskedScatter(CallInst *CI) { BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store"); Builder.SetInsertPoint(InsertPt); - Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx), - "Elt" + Twine(Idx)); - Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), - "Ptr" + Twine(Idx)); + Value *OneElt = Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx)); + Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx)); Builder.CreateAlignedStore(OneElt, Ptr, AlignVal); // Create "else" block, fill it in the next iteration @@ -538,6 +530,156 @@ static void scalarizeMaskedScatter(CallInst *CI) { IfBlock = NewIfBlock; } CI->eraseFromParent(); + + ModifiedDT = true; +} + +static void scalarizeMaskedExpandLoad(CallInst *CI, bool &ModifiedDT) { + Value *Ptr = CI->getArgOperand(0); + Value *Mask = CI->getArgOperand(1); + Value *PassThru = CI->getArgOperand(2); + + VectorType *VecType = cast<VectorType>(CI->getType()); + + Type *EltTy = VecType->getElementType(); + + IRBuilder<> Builder(CI->getContext()); + Instruction *InsertPt = CI; + BasicBlock *IfBlock = CI->getParent(); + + Builder.SetInsertPoint(InsertPt); + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + unsigned VectorWidth = VecType->getNumElements(); + + // The result vector + Value *VResult = PassThru; + + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + // Fill the "else" block, created in the previous iteration + // + // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] + // %mask_1 = extractelement <16 x i1> %mask, i32 Idx + // br i1 %mask_1, label %cond.load, label %else + // + + Value *Predicate = + Builder.CreateExtractElement(Mask, Idx); + + // Create "cond" block + // + // %EltAddr = getelementptr i32* %1, i32 0 + // %Elt = load i32* %EltAddr + // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx + // + BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(), + "cond.load"); + Builder.SetInsertPoint(InsertPt); + + LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Ptr, 1); + Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx); + + // Move the pointer if there are more blocks to come. + Value *NewPtr; + if ((Idx + 1) != VectorWidth) + NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1); + + // Create "else" block, fill it in the next iteration + BasicBlock *NewIfBlock = + CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); + Builder.SetInsertPoint(InsertPt); + Instruction *OldBr = IfBlock->getTerminator(); + BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr); + OldBr->eraseFromParent(); + BasicBlock *PrevIfBlock = IfBlock; + IfBlock = NewIfBlock; + + // Create the phi to join the new and previous value. + PHINode *ResultPhi = Builder.CreatePHI(VecType, 2, "res.phi.else"); + ResultPhi->addIncoming(NewVResult, CondBlock); + ResultPhi->addIncoming(VResult, PrevIfBlock); + VResult = ResultPhi; + + // Add a PHI for the pointer if this isn't the last iteration. + if ((Idx + 1) != VectorWidth) { + PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else"); + PtrPhi->addIncoming(NewPtr, CondBlock); + PtrPhi->addIncoming(Ptr, PrevIfBlock); + Ptr = PtrPhi; + } + } + + CI->replaceAllUsesWith(VResult); + CI->eraseFromParent(); + + ModifiedDT = true; +} + +static void scalarizeMaskedCompressStore(CallInst *CI, bool &ModifiedDT) { + Value *Src = CI->getArgOperand(0); + Value *Ptr = CI->getArgOperand(1); + Value *Mask = CI->getArgOperand(2); + + VectorType *VecType = cast<VectorType>(Src->getType()); + + IRBuilder<> Builder(CI->getContext()); + Instruction *InsertPt = CI; + BasicBlock *IfBlock = CI->getParent(); + + Builder.SetInsertPoint(InsertPt); + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + Type *EltTy = VecType->getVectorElementType(); + + unsigned VectorWidth = VecType->getNumElements(); + + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + // Fill the "else" block, created in the previous iteration + // + // %mask_1 = extractelement <16 x i1> %mask, i32 Idx + // br i1 %mask_1, label %cond.store, label %else + // + Value *Predicate = Builder.CreateExtractElement(Mask, Idx); + + // Create "cond" block + // + // %OneElt = extractelement <16 x i32> %Src, i32 Idx + // %EltAddr = getelementptr i32* %1, i32 0 + // %store i32 %OneElt, i32* %EltAddr + // + BasicBlock *CondBlock = + IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store"); + Builder.SetInsertPoint(InsertPt); + + Value *OneElt = Builder.CreateExtractElement(Src, Idx); + Builder.CreateAlignedStore(OneElt, Ptr, 1); + + // Move the pointer if there are more blocks to come. + Value *NewPtr; + if ((Idx + 1) != VectorWidth) + NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1); + + // Create "else" block, fill it in the next iteration + BasicBlock *NewIfBlock = + CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); + Builder.SetInsertPoint(InsertPt); + Instruction *OldBr = IfBlock->getTerminator(); + BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr); + OldBr->eraseFromParent(); + BasicBlock *PrevIfBlock = IfBlock; + IfBlock = NewIfBlock; + + // Add a PHI for the pointer if this isn't the last iteration. + if ((Idx + 1) != VectorWidth) { + PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else"); + PtrPhi->addIncoming(NewPtr, CondBlock); + PtrPhi->addIncoming(Ptr, PrevIfBlock); + Ptr = PtrPhi; + } + } + CI->eraseFromParent(); + + ModifiedDT = true; } bool ScalarizeMaskedMemIntrin::runOnFunction(Function &F) { @@ -587,33 +729,35 @@ bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI, break; case Intrinsic::masked_load: // Scalarize unsupported vector masked load - if (!TTI->isLegalMaskedLoad(CI->getType())) { - scalarizeMaskedLoad(CI); - ModifiedDT = true; - return true; - } - return false; + if (TTI->isLegalMaskedLoad(CI->getType())) + return false; + scalarizeMaskedLoad(CI, ModifiedDT); + return true; case Intrinsic::masked_store: - if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType())) { - scalarizeMaskedStore(CI); - ModifiedDT = true; - return true; - } - return false; + if (TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType())) + return false; + scalarizeMaskedStore(CI, ModifiedDT); + return true; case Intrinsic::masked_gather: - if (!TTI->isLegalMaskedGather(CI->getType())) { - scalarizeMaskedGather(CI); - ModifiedDT = true; - return true; - } - return false; + if (TTI->isLegalMaskedGather(CI->getType())) + return false; + scalarizeMaskedGather(CI, ModifiedDT); + return true; case Intrinsic::masked_scatter: - if (!TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType())) { - scalarizeMaskedScatter(CI); - ModifiedDT = true; - return true; - } - return false; + if (TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType())) + return false; + scalarizeMaskedScatter(CI, ModifiedDT); + return true; + case Intrinsic::masked_expandload: + if (TTI->isLegalMaskedExpandLoad(CI->getType())) + return false; + scalarizeMaskedExpandLoad(CI, ModifiedDT); + return true; + case Intrinsic::masked_compressstore: + if (TTI->isLegalMaskedCompressStore(CI->getArgOperand(0)->getType())) + return false; + scalarizeMaskedCompressStore(CI, ModifiedDT); + return true; } } diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 6c135b3d69d6..dc3a11670a16 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -1,9 +1,8 @@ //===- ScheduleDAG.cpp - Implement the ScheduleDAG class ------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -15,6 +14,7 @@ #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" @@ -38,6 +38,10 @@ using namespace llvm; #define DEBUG_TYPE "pre-RA-sched" +STATISTIC(NumNewPredsAdded, "Number of times a single predecessor was added"); +STATISTIC(NumTopoInits, + "Number of times the topological order has been recomputed"); + #ifndef NDEBUG static cl::opt<bool> StressSchedOpt( "stress-sched", cl::Hidden, cl::init(false), @@ -458,6 +462,11 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() { // On insertion of the edge X->Y, the algorithm first marks by calling DFS // the nodes reachable from Y, and then shifts them using Shift to lie // immediately after X in Index2Node. + + // Cancel pending updates, mark as valid. + Dirty = false; + Updates.clear(); + unsigned DAGSize = SUnits.size(); std::vector<SUnit*> WorkList; WorkList.reserve(DAGSize); @@ -498,6 +507,7 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() { } Visited.resize(DAGSize); + NumTopoInits++; #ifndef NDEBUG // Check correctness of the ordering @@ -510,6 +520,31 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() { #endif } +void ScheduleDAGTopologicalSort::FixOrder() { + // Recompute from scratch after new nodes have been added. + if (Dirty) { + InitDAGTopologicalSorting(); + return; + } + + // Otherwise apply updates one-by-one. + for (auto &U : Updates) + AddPred(U.first, U.second); + Updates.clear(); +} + +void ScheduleDAGTopologicalSort::AddPredQueued(SUnit *Y, SUnit *X) { + // Recomputing the order from scratch is likely more efficient than applying + // updates one-by-one for too many updates. The current cut-off is arbitrarily + // chosen. + Dirty = Dirty || Updates.size() > 10; + + if (Dirty) + return; + + Updates.emplace_back(Y, X); +} + void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) { int UpperBound, LowerBound; LowerBound = Node2Index[Y->NodeNum]; @@ -524,6 +559,8 @@ void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) { // Recompute topological indexes. Shift(Visited, LowerBound, UpperBound); } + + NumNewPredsAdded++; } void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) { @@ -665,6 +702,7 @@ void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound, } bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *TargetSU, SUnit *SU) { + FixOrder(); // Is SU reachable from TargetSU via successor edges? if (IsReachable(SU, TargetSU)) return true; @@ -677,6 +715,7 @@ bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *TargetSU, SUnit *SU) { bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU, const SUnit *TargetSU) { + FixOrder(); // If insertion of the edge SU->TargetSU would create a cycle // then there is a path from TargetSU to SU. int UpperBound, LowerBound; diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 99406ed1496a..d5ad7e92299d 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -1,9 +1,8 @@ //===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -115,7 +114,7 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), RemoveKillFlags(RemoveKillFlags), UnknownValue(UndefValue::get( - Type::getVoidTy(mf.getFunction().getContext()))) { + Type::getVoidTy(mf.getFunction().getContext()))), Topo(SUnits, &ExitSU) { DbgValues.clear(); const TargetSubtargetInfo &ST = mf.getSubtarget(); @@ -132,7 +131,8 @@ static bool getUnderlyingObjectsForInstr(const MachineInstr *MI, const DataLayout &DL) { auto allMMOsOkay = [&]() { for (const MachineMemOperand *MMO : MI->memoperands()) { - if (MMO->isVolatile()) + // TODO: Figure out whether isAtomic is really necessary (see D57601). + if (MMO->isVolatile() || MMO->isAtomic()) return false; if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) { @@ -743,6 +743,14 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // done. Value2SUsMap NonAliasStores, NonAliasLoads(1 /*TrueMemOrderLatency*/); + // Track all instructions that may raise floating-point exceptions. + // These do not depend on one other (or normal loads or stores), but + // must not be rescheduled across global barriers. Note that we don't + // really need a "map" here since we don't track those MIs by value; + // using the same Value2SUsMap data type here is simply a matter of + // convenience. + Value2SUsMap FPExceptions; + // Remove any stale debug info; sometimes BuildSchedGraph is called again // without emitting the info from the previous call. DbgValues.clear(); @@ -870,10 +878,26 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, addBarrierChain(Loads); addBarrierChain(NonAliasStores); addBarrierChain(NonAliasLoads); + addBarrierChain(FPExceptions); continue; } + // Instructions that may raise FP exceptions may not be moved + // across any global barriers. + if (MI.mayRaiseFPException()) { + if (BarrierChain) + BarrierChain->addPredBarrier(SU); + + FPExceptions.insert(SU, UnknownValue); + + if (FPExceptions.size() >= HugeRegion) { + LLVM_DEBUG(dbgs() << "Reducing FPExceptions map.\n";); + Value2SUsMap empty; + reduceHugeMemNodeMaps(FPExceptions, empty, getReductionSize()); + } + } + // If it's not a store or a variant load, we're done. if (!MI.mayStore() && !(MI.mayLoad() && !MI.isDereferenceableInvariantLoad(AA))) @@ -968,6 +992,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, Uses.clear(); CurrentVRegDefs.clear(); CurrentVRegUses.clear(); + + Topo.MarkDirty(); } raw_ostream &llvm::operator<<(raw_ostream &OS, const PseudoSourceValue* PSV) { @@ -1089,22 +1115,21 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) { if (!MI.isBundled()) { toggleKills(MRI, LiveRegs, MI, true); } else { - MachineBasicBlock::instr_iterator First = MI.getIterator(); - if (MI.isBundle()) { + MachineBasicBlock::instr_iterator Bundle = MI.getIterator(); + if (MI.isBundle()) toggleKills(MRI, LiveRegs, MI, false); - ++First; - } + // Some targets make the (questionable) assumtion that the instructions // inside the bundle are ordered and consequently only the last use of // a register inside the bundle can kill it. - MachineBasicBlock::instr_iterator I = std::next(First); + MachineBasicBlock::instr_iterator I = std::next(Bundle); while (I->isBundledWithSucc()) ++I; do { if (!I->isDebugInstr()) toggleKills(MRI, LiveRegs, *I, true); --I; - } while(I != First); + } while (I != Bundle); } } } @@ -1146,6 +1171,23 @@ std::string ScheduleDAGInstrs::getDAGName() const { return "dag." + BB->getFullName(); } +bool ScheduleDAGInstrs::canAddEdge(SUnit *SuccSU, SUnit *PredSU) { + return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU); +} + +bool ScheduleDAGInstrs::addEdge(SUnit *SuccSU, const SDep &PredDep) { + if (SuccSU != &ExitSU) { + // Do not use WillCreateCycle, it assumes SD scheduling. + // If Pred is reachable from Succ, then the edge creates a cycle. + if (Topo.IsReachable(PredDep.getSUnit(), SuccSU)) + return false; + Topo.AddPredQueued(SuccSU, PredDep.getSUnit()); + } + SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial()); + // Return true regardless of whether a new edge needed to be inserted. + return true; +} + //===----------------------------------------------------------------------===// // SchedDFSResult Implementation //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp index ff2085aae865..8d04711f07c6 100644 --- a/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -1,9 +1,8 @@ //===-- ScheduleDAGPrinter.cpp - Implement ScheduleDAG::viewGraph() -------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp index 4301372179b8..a9fda56f2dac 100644 --- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp +++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp @@ -1,9 +1,8 @@ //===- ScoreboardHazardRecognizer.cpp - Scheduler Support -----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index ff5505c97721..49c922f560fa 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1,9 +1,8 @@ //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -112,6 +111,10 @@ static cl::opt<bool> MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), cl::desc("DAG combiner may split indexing from loads")); +static cl::opt<unsigned> TokenFactorInlineLimit( + "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048), + cl::desc("Limit the number of operands to inline for Token Factors")); + namespace { class DAGCombiner { @@ -138,6 +141,10 @@ namespace { /// them) when they are deleted from the underlying DAG. It relies on /// stable indices of nodes within the worklist. DenseMap<SDNode *, unsigned> WorklistMap; + /// This records all nodes attempted to add to the worklist since we + /// considered a new worklist entry. As we keep do not add duplicate nodes + /// in the worklist, this is different from the tail of the worklist. + SmallSetVector<SDNode *, 32> PruningList; /// Set of nodes which have been combined (at least once). /// @@ -155,6 +162,37 @@ namespace { AddToWorklist(Node); } + // Prune potentially dangling nodes. This is called after + // any visit to a node, but should also be called during a visit after any + // failed combine which may have created a DAG node. + void clearAddedDanglingWorklistEntries() { + // Check any nodes added to the worklist to see if they are prunable. + while (!PruningList.empty()) { + auto *N = PruningList.pop_back_val(); + if (N->use_empty()) + recursivelyDeleteUnusedNodes(N); + } + } + + SDNode *getNextWorklistEntry() { + // Before we do any work, remove nodes that are not in use. + clearAddedDanglingWorklistEntries(); + SDNode *N = nullptr; + // The Worklist holds the SDNodes in order, but it may contain null + // entries. + while (!N && !Worklist.empty()) { + N = Worklist.pop_back_val(); + } + + if (N) { + bool GoodWorklistEntry = WorklistMap.erase(N); + (void)GoodWorklistEntry; + assert(GoodWorklistEntry && + "Found a worklist entry without a corresponding map entry!"); + } + return N; + } + /// Call the node-specific routine that folds each particular type of node. SDValue visit(SDNode *N); @@ -162,7 +200,7 @@ namespace { DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL) : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), OptLevel(OL), AA(AA) { - ForCodeSize = DAG.getMachineFunction().getFunction().optForSize(); + ForCodeSize = DAG.getMachineFunction().getFunction().hasOptSize(); MaximumLegalStoreInBits = 0; for (MVT VT : MVT::all_valuetypes()) @@ -172,6 +210,11 @@ namespace { MaximumLegalStoreInBits = VT.getSizeInBits(); } + void ConsiderForPruning(SDNode *N) { + // Mark this for potential pruning. + PruningList.insert(N); + } + /// Add to the worklist making sure its instance is at the back (next to be /// processed.) void AddToWorklist(SDNode *N) { @@ -183,6 +226,8 @@ namespace { if (N->getOpcode() == ISD::HANDLENODE) return; + ConsiderForPruning(N); + if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second) Worklist.push_back(N); } @@ -190,6 +235,7 @@ namespace { /// Remove all instances of N from the worklist. void removeFromWorklist(SDNode *N) { CombinedNodes.erase(N); + PruningList.remove(N); auto It = WorklistMap.find(N); if (It == WorklistMap.end()) @@ -229,8 +275,15 @@ namespace { /// If so, return true. bool SimplifyDemandedBits(SDValue Op) { unsigned BitWidth = Op.getScalarValueSizeInBits(); - APInt Demanded = APInt::getAllOnesValue(BitWidth); - return SimplifyDemandedBits(Op, Demanded); + APInt DemandedBits = APInt::getAllOnesValue(BitWidth); + return SimplifyDemandedBits(Op, DemandedBits); + } + + bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) { + EVT VT = Op.getValueType(); + unsigned NumElts = VT.isVector() ? VT.getVectorNumElements() : 1; + APInt DemandedElts = APInt::getAllOnesValue(NumElts); + return SimplifyDemandedBits(Op, DemandedBits, DemandedElts); } /// Check the specified vector node value to see if it can be simplified or @@ -238,12 +291,13 @@ namespace { /// elements. If so, return true. bool SimplifyDemandedVectorElts(SDValue Op) { unsigned NumElts = Op.getValueType().getVectorNumElements(); - APInt Demanded = APInt::getAllOnesValue(NumElts); - return SimplifyDemandedVectorElts(Op, Demanded); + APInt DemandedElts = APInt::getAllOnesValue(NumElts); + return SimplifyDemandedVectorElts(Op, DemandedElts); } - bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded); - bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded, + bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, + const APInt &DemandedElts); + bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts, bool AssumeSingleUse = false); bool CombineToPreIndexedLoadStore(SDNode *N); @@ -291,15 +345,16 @@ namespace { SDValue visitTokenFactor(SDNode *N); SDValue visitMERGE_VALUES(SDNode *N); SDValue visitADD(SDNode *N); - SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference); + SDValue visitADDLike(SDNode *N); + SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference); SDValue visitSUB(SDNode *N); SDValue visitADDSAT(SDNode *N); SDValue visitSUBSAT(SDNode *N); SDValue visitADDC(SDNode *N); - SDValue visitUADDO(SDNode *N); + SDValue visitADDO(SDNode *N); SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N); SDValue visitSUBC(SDNode *N); - SDValue visitUSUBO(SDNode *N); + SDValue visitSUBO(SDNode *N); SDValue visitADDE(SDNode *N); SDValue visitADDCARRY(SDNode *N); SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N); @@ -316,8 +371,7 @@ namespace { SDValue visitMULHS(SDNode *N); SDValue visitSMUL_LOHI(SDNode *N); SDValue visitUMUL_LOHI(SDNode *N); - SDValue visitSMULO(SDNode *N); - SDValue visitUMULO(SDNode *N); + SDValue visitMULO(SDNode *N); SDValue visitIMINMAX(SDNode *N); SDValue visitAND(SDNode *N); SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N); @@ -386,6 +440,7 @@ namespace { SDValue replaceStoreOfFPConstant(StoreSDNode *ST); SDValue visitSTORE(SDNode *N); + SDValue visitLIFETIME_END(SDNode *N); SDValue visitINSERT_VECTOR_ELT(SDNode *N); SDValue visitEXTRACT_VECTOR_ELT(SDNode *N); SDValue visitBUILD_VECTOR(SDNode *N); @@ -400,13 +455,19 @@ namespace { SDValue visitMSCATTER(SDNode *N); SDValue visitFP_TO_FP16(SDNode *N); SDValue visitFP16_TO_FP(SDNode *N); + SDValue visitVECREDUCE(SDNode *N); SDValue visitFADDForFMACombine(SDNode *N); SDValue visitFSUBForFMACombine(SDNode *N); SDValue visitFMULForFMADistributiveCombine(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); - SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, + bool reassociationCanBreakAddressingModePattern(unsigned Opc, + const SDLoc &DL, SDValue N0, + SDValue N1); + SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0, + SDValue N1); + SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, SDValue N1, SDNodeFlags Flags); SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt); @@ -466,6 +527,7 @@ namespace { const SDLoc &DL); SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL); SDValue MatchLoadCombine(SDNode *N); + SDValue MatchStoreCombine(StoreSDNode *N); SDValue ReduceLoadWidth(SDNode *N); SDValue ReduceLoadOpStoreWidth(SDNode *N); SDValue splitMergedValStore(StoreSDNode *ST); @@ -475,7 +537,8 @@ namespace { SDValue reduceBuildVecToShuffle(SDNode *N); SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N, ArrayRef<int> VectorMask, SDValue VecIn1, - SDValue VecIn2, unsigned LeftIdx); + SDValue VecIn2, unsigned LeftIdx, + bool DidSplitVec); SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast); /// Walk up chain skipping non-aliasing memory nodes, @@ -484,7 +547,7 @@ namespace { SmallVectorImpl<SDValue> &Aliases); /// Return true if there is any possibility that the two addresses overlap. - bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const; + bool isAlias(SDNode *Op0, SDNode *Op1) const; /// Walk up chain skipping non-aliasing memory nodes, looking for a better /// chain (aliasing node.) @@ -642,6 +705,18 @@ public: } }; +class WorklistInserter : public SelectionDAG::DAGUpdateListener { + DAGCombiner &DC; + +public: + explicit WorklistInserter(DAGCombiner &dc) + : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {} + + // FIXME: Ideally we could add N to the worklist, but this causes exponential + // compile time costs in large DAGs, e.g. Halide. + void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); } +}; + } // end anonymous namespace //===----------------------------------------------------------------------===// @@ -697,20 +772,23 @@ void DAGCombiner::deleteAndRecombine(SDNode *N) { static char isNegatibleForFree(SDValue Op, bool LegalOperations, const TargetLowering &TLI, const TargetOptions *Options, + bool ForCodeSize, unsigned Depth = 0) { // fneg is removable even if it has multiple uses. - if (Op.getOpcode() == ISD::FNEG) return 2; + if (Op.getOpcode() == ISD::FNEG) + return 2; // Don't allow anything with multiple uses unless we know it is free. EVT VT = Op.getValueType(); const SDNodeFlags Flags = Op->getFlags(); - if (!Op.hasOneUse()) - if (!(Op.getOpcode() == ISD::FP_EXTEND && - TLI.isFPExtFree(VT, Op.getOperand(0).getValueType()))) - return 0; + if (!Op.hasOneUse() && + !(Op.getOpcode() == ISD::FP_EXTEND && + TLI.isFPExtFree(VT, Op.getOperand(0).getValueType()))) + return 0; // Don't recurse exponentially. - if (Depth > 6) return 0; + if (Depth > 6) + return 0; switch (Op.getOpcode()) { default: return false; @@ -721,7 +799,25 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations, // Don't invert constant FP values after legalization unless the target says // the negated constant is legal. return TLI.isOperationLegal(ISD::ConstantFP, VT) || - TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT); + TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT, + ForCodeSize); + } + case ISD::BUILD_VECTOR: { + // Only permit BUILD_VECTOR of constants. + if (llvm::any_of(Op->op_values(), [&](SDValue N) { + return !N.isUndef() && !isa<ConstantFPSDNode>(N); + })) + return 0; + if (!LegalOperations) + return 1; + if (TLI.isOperationLegal(ISD::ConstantFP, VT) && + TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) + return 1; + return llvm::all_of(Op->op_values(), [&](SDValue N) { + return N.isUndef() || + TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT, + ForCodeSize); + }); } case ISD::FADD: if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros()) @@ -733,15 +829,14 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations, // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, - Options, Depth + 1)) + Options, ForCodeSize, Depth + 1)) return V; // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, - Depth + 1); + ForCodeSize, Depth + 1); case ISD::FSUB: // We can't turn -(A-B) into B-A when we honor signed zeros. - if (!Options->NoSignedZerosFPMath && - !Flags.hasNoSignedZeros()) + if (!Options->NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) return 0; // fold (fneg (fsub A, B)) -> (fsub B, A) @@ -751,30 +846,31 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations, case ISD::FDIV: // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, - Options, Depth + 1)) + Options, ForCodeSize, Depth + 1)) return V; return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, - Depth + 1); + ForCodeSize, Depth + 1); case ISD::FP_EXTEND: case ISD::FP_ROUND: case ISD::FSIN: return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options, - Depth + 1); + ForCodeSize, Depth + 1); } } /// If isNegatibleForFree returns true, return the newly negated expression. static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, - bool LegalOperations, unsigned Depth = 0) { - const TargetOptions &Options = DAG.getTarget().Options; + bool LegalOperations, bool ForCodeSize, + unsigned Depth = 0) { // fneg is removable even if it has multiple uses. - if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0); + if (Op.getOpcode() == ISD::FNEG) + return Op.getOperand(0); assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); - - const SDNodeFlags Flags = Op.getNode()->getFlags(); + const TargetOptions &Options = DAG.getTarget().Options; + const SDNodeFlags Flags = Op->getFlags(); switch (Op.getOpcode()) { default: llvm_unreachable("Unknown code"); @@ -783,24 +879,41 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, V.changeSign(); return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType()); } + case ISD::BUILD_VECTOR: { + SmallVector<SDValue, 4> Ops; + for (SDValue C : Op->op_values()) { + if (C.isUndef()) { + Ops.push_back(C); + continue; + } + APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF(); + V.changeSign(); + Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType())); + } + return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops); + } case ISD::FADD: assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros()); // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) if (isNegatibleForFree(Op.getOperand(0), LegalOperations, - DAG.getTargetLoweringInfo(), &Options, Depth+1)) + DAG.getTargetLoweringInfo(), &Options, ForCodeSize, + Depth + 1)) return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, - LegalOperations, Depth+1), + LegalOperations, ForCodeSize, + Depth + 1), Op.getOperand(1), Flags); // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(1), DAG, - LegalOperations, Depth+1), + LegalOperations, ForCodeSize, + Depth + 1), Op.getOperand(0), Flags); case ISD::FSUB: // fold (fneg (fsub 0, B)) -> B - if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0))) + if (ConstantFPSDNode *N0CFP = + isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true)) if (N0CFP->isZero()) return Op.getOperand(1); @@ -812,28 +925,33 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, case ISD::FDIV: // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) if (isNegatibleForFree(Op.getOperand(0), LegalOperations, - DAG.getTargetLoweringInfo(), &Options, Depth+1)) + DAG.getTargetLoweringInfo(), &Options, ForCodeSize, + Depth + 1)) return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, - LegalOperations, Depth+1), + LegalOperations, ForCodeSize, + Depth + 1), Op.getOperand(1), Flags); // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Op.getOperand(0), GetNegatedExpression(Op.getOperand(1), DAG, - LegalOperations, Depth+1), Flags); + LegalOperations, ForCodeSize, + Depth + 1), Flags); case ISD::FP_EXTEND: case ISD::FSIN: return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, - LegalOperations, Depth+1)); + LegalOperations, ForCodeSize, + Depth + 1)); case ISD::FP_ROUND: - return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(), - GetNegatedExpression(Op.getOperand(0), DAG, - LegalOperations, Depth+1), - Op.getOperand(1)); + return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(), + GetNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, ForCodeSize, + Depth + 1), + Op.getOperand(1)); } } @@ -924,53 +1042,113 @@ static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) { ISD::isBuildVectorOfConstantFPSDNodes(V.getNode()); } -SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, - SDValue N1, SDNodeFlags Flags) { - // Don't reassociate reductions. - if (Flags.hasVectorReduction()) - return SDValue(); +bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc, + const SDLoc &DL, + SDValue N0, + SDValue N1) { + // Currently this only tries to ensure we don't undo the GEP splits done by + // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this, + // we check if the following transformation would be problematic: + // (load/store (add, (add, x, offset1), offset2)) -> + // (load/store (add, x, offset1+offset2)). - EVT VT = N0.getValueType(); - if (N0.getOpcode() == Opc && !N0->getFlags().hasVectorReduction()) { - if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { - if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) { - // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) - if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R)) - return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); - return SDValue(); - } - if (N0.hasOneUse()) { - // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one - // use - SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1); - if (!OpNode.getNode()) - return SDValue(); - AddToWorklist(OpNode.getNode()); - return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); - } + if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD) + return false; + + if (N0.hasOneUse()) + return false; + + auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + auto *C2 = dyn_cast<ConstantSDNode>(N1); + if (!C1 || !C2) + return false; + + const APInt &C1APIntVal = C1->getAPIntValue(); + const APInt &C2APIntVal = C2->getAPIntValue(); + if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64) + return false; + + const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal; + if (CombinedValueIntVal.getBitWidth() > 64) + return false; + const int64_t CombinedValue = CombinedValueIntVal.getSExtValue(); + + for (SDNode *Node : N0->uses()) { + auto LoadStore = dyn_cast<MemSDNode>(Node); + if (LoadStore) { + // Is x[offset2] already not a legal addressing mode? If so then + // reassociating the constants breaks nothing (we test offset2 because + // that's the one we hope to fold into the load or store). + TargetLoweringBase::AddrMode AM; + AM.HasBaseReg = true; + AM.BaseOffs = C2APIntVal.getSExtValue(); + EVT VT = LoadStore->getMemoryVT(); + unsigned AS = LoadStore->getAddressSpace(); + Type *AccessTy = VT.getTypeForEVT(*DAG.getContext()); + if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS)) + continue; + + // Would x[offset1+offset2] still be a legal addressing mode? + AM.BaseOffs = CombinedValue; + if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS)) + return true; } } - if (N1.getOpcode() == Opc && !N1->getFlags().hasVectorReduction()) { - if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) { - if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) { - // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) - if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L)) - return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); + return false; +} + +// Helper for DAGCombiner::reassociateOps. Try to reassociate an expression +// such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc. +SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, + SDValue N0, SDValue N1) { + EVT VT = N0.getValueType(); + + if (N0.getOpcode() != Opc) + return SDValue(); + + // Don't reassociate reductions. + if (N0->getFlags().hasVectorReduction()) + return SDValue(); + + if (SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { + if (SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N1)) { + // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2)) + if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, C1, C2)) + return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); + return SDValue(); + } + if (N0.hasOneUse()) { + // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1) + // iff (op x, c1) has one use + SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1); + if (!OpNode.getNode()) return SDValue(); - } - if (N1.hasOneUse()) { - // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one - // use - SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0)); - if (!OpNode.getNode()) - return SDValue(); - AddToWorklist(OpNode.getNode()); - return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); - } + AddToWorklist(OpNode.getNode()); + return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); } } + return SDValue(); +} +// Try to reassociate commutative binops. +SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, + SDValue N1, SDNodeFlags Flags) { + assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative."); + // Don't reassociate reductions. + if (Flags.hasVectorReduction()) + return SDValue(); + + // Floating-point reassociation is not allowed without loose FP math. + if (N0.getValueType().isFloatingPoint() || + N1.getValueType().isFloatingPoint()) + if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros()) + return SDValue(); + + if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1)) + return Combined; + if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0)) + return Combined; return SDValue(); } @@ -1026,10 +1204,11 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { /// Check the specified integer node value to see if it can be simplified or if /// things it uses can be simplified by bit propagation. If so, return true. -bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { +bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, + const APInt &DemandedElts) { TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); KnownBits Known; - if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO)) + if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO)) return false; // Revisit the node. @@ -1048,12 +1227,13 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { /// Check the specified vector node value to see if it can be simplified or /// if things it uses can be simplified as it only uses some of the elements. /// If so, return true. -bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded, +bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, + const APInt &DemandedElts, bool AssumeSingleUse) { TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); APInt KnownUndef, KnownZero; - if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO, - 0, AssumeSingleUse)) + if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, + TLO, 0, AssumeSingleUse)) return false; // Revisit the node. @@ -1383,6 +1563,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) { LegalOperations = Level >= AfterLegalizeVectorOps; LegalTypes = Level >= AfterLegalizeTypes; + WorklistInserter AddNodes(*this); + // Add all the dag nodes to the worklist. for (SDNode &Node : DAG.allnodes()) AddToWorklist(&Node); @@ -1392,19 +1574,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) { // changes of the root. HandleSDNode Dummy(DAG.getRoot()); - // While the worklist isn't empty, find a node and try to combine it. - while (!WorklistMap.empty()) { - SDNode *N; - // The Worklist holds the SDNodes in order, but it may contain null entries. - do { - N = Worklist.pop_back_val(); - } while (!N); - - bool GoodWorklistEntry = WorklistMap.erase(N); - (void)GoodWorklistEntry; - assert(GoodWorklistEntry && - "Found a worklist entry without a corresponding map entry!"); - + // While we have a valid worklist entry node, try to combine it. + while (SDNode *N = getNextWorklistEntry()) { // If N has no uses, it is dead. Make sure to revisit all N's operands once // N is deleted from the DAG, since they too may now be dead or may have a // reduced number of uses, allowing other xforms. @@ -1493,9 +1664,11 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::SSUBSAT: case ISD::USUBSAT: return visitSUBSAT(N); case ISD::ADDC: return visitADDC(N); - case ISD::UADDO: return visitUADDO(N); + case ISD::SADDO: + case ISD::UADDO: return visitADDO(N); case ISD::SUBC: return visitSUBC(N); - case ISD::USUBO: return visitUSUBO(N); + case ISD::SSUBO: + case ISD::USUBO: return visitSUBO(N); case ISD::ADDE: return visitADDE(N); case ISD::ADDCARRY: return visitADDCARRY(N); case ISD::SUBE: return visitSUBE(N); @@ -1509,8 +1682,8 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::MULHS: return visitMULHS(N); case ISD::SMUL_LOHI: return visitSMUL_LOHI(N); case ISD::UMUL_LOHI: return visitUMUL_LOHI(N); - case ISD::SMULO: return visitSMULO(N); - case ISD::UMULO: return visitUMULO(N); + case ISD::SMULO: + case ISD::UMULO: return visitMULO(N); case ISD::SMIN: case ISD::SMAX: case ISD::UMIN: @@ -1590,8 +1763,22 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::MLOAD: return visitMLOAD(N); case ISD::MSCATTER: return visitMSCATTER(N); case ISD::MSTORE: return visitMSTORE(N); + case ISD::LIFETIME_END: return visitLIFETIME_END(N); case ISD::FP_TO_FP16: return visitFP_TO_FP16(N); case ISD::FP16_TO_FP: return visitFP16_TO_FP(N); + case ISD::VECREDUCE_FADD: + case ISD::VECREDUCE_FMUL: + case ISD::VECREDUCE_ADD: + case ISD::VECREDUCE_MUL: + case ISD::VECREDUCE_AND: + case ISD::VECREDUCE_OR: + case ISD::VECREDUCE_XOR: + case ISD::VECREDUCE_SMAX: + case ISD::VECREDUCE_SMIN: + case ISD::VECREDUCE_UMAX: + case ISD::VECREDUCE_UMIN: + case ISD::VECREDUCE_FMAX: + case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N); } return SDValue(); } @@ -1644,7 +1831,7 @@ SDValue DAGCombiner::combine(SDNode *N) { } } - // If N is a commutative binary node, try eliminate it if the commuted + // If N is a commutative binary node, try to eliminate it if the commuted // version is already present in the DAG. if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) && N->getNumValues() == 1) { @@ -1693,6 +1880,12 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { if (OptLevel == CodeGenOpt::None) return SDValue(); + // If the sole user is a token factor, we should make sure we have a + // chance to merge them together. This prevents TF chains from inhibiting + // optimizations. + if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor) + AddToWorklist(*(N->use_begin())); + SmallVector<SDNode *, 8> TFs; // List of token factors to visit. SmallVector<SDValue, 8> Ops; // Ops for replacing token factor. SmallPtrSet<SDNode*, 16> SeenOps; @@ -1704,8 +1897,19 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { // Iterate through token factors. The TFs grows when new token factors are // encountered. for (unsigned i = 0; i < TFs.size(); ++i) { - SDNode *TF = TFs[i]; + // Limit number of nodes to inline, to avoid quadratic compile times. + // We have to add the outstanding Token Factors to Ops, otherwise we might + // drop Ops from the resulting Token Factors. + if (Ops.size() > TokenFactorInlineLimit) { + for (unsigned j = i; j < TFs.size(); j++) + Ops.emplace_back(TFs[j], 0); + // Drop unprocessed Token Factors from TFs, so we do not add them to the + // combiner worklist later. + TFs.resize(i); + break; + } + SDNode *TF = TFs[i]; // Check each of the operands. for (const SDValue &Op : TF->op_values()) { switch (Op.getOpcode()) { @@ -1719,8 +1923,6 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) { // Queue up for processing. TFs.push_back(Op.getNode()); - // Clean up in case the token factor is removed. - AddToWorklist(Op.getNode()); Changed = true; break; } @@ -1737,6 +1939,11 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { } } + // Re-visit inlined Token Factors, to clean them up in case they have been + // removed. Skip the first Token Factor, as this is the current node. + for (unsigned i = 1, e = TFs.size(); i < e; i++) + AddToWorklist(TFs[i]); + // Remove Nodes that are chained to another node in the list. Do so // by walking up chains breath-first stopping when we've seen // another operand. In general we must climb to the EntryNode, but we can exit @@ -1803,6 +2010,8 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { for (const SDValue &Op : CurNode->op_values()) AddToWorklist(i, Op.getNode(), CurOpNumber); break; + case ISD::LIFETIME_START: + case ISD::LIFETIME_END: case ISD::CopyFromReg: case ISD::CopyToReg: AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber); @@ -1831,9 +2040,9 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { if (SeenChains.count(Op.getNode()) == 0) PrunedOps.push_back(Op); } - Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps); + Result = DAG.getTokenFactor(SDLoc(N), PrunedOps); } else { - Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops); + Result = DAG.getTokenFactor(SDLoc(N), Ops); } } return Result; @@ -1869,7 +2078,8 @@ static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) { } SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { - assert(ISD::isBinaryOp(BO) && "Unexpected binary operator"); + assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 && + "Unexpected binary operator"); // Don't do this unless the old select is going away. We want to eliminate the // binary operator, not replace a binop with a select. @@ -1940,7 +2150,9 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { !isConstantFPBuildVectorOrConstantFP(NewCF)) return SDValue(); - return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF); + SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF); + SelectOp->setFlags(BO->getFlags()); + return SelectOp; } static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) { @@ -1990,6 +2202,7 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) { // We need a constant operand for the add/sub, and the other operand is a // logical shift right: add (srl), C or sub C, (srl). + // TODO - support non-uniform vector amounts. bool IsAdd = N->getOpcode() == ISD::ADD; SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0); SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1); @@ -2006,7 +2219,7 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) { EVT VT = ShiftOp.getValueType(); SDValue ShAmt = ShiftOp.getOperand(1); ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt); - if (!ShAmtC || ShAmtC->getZExtValue() != VT.getScalarSizeInBits() - 1) + if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1)) return SDValue(); // Eliminate the 'not' by adjusting the shift and add/sub constant: @@ -2019,7 +2232,10 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) { return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT)); } -SDValue DAGCombiner::visitADD(SDNode *N) { +/// Try to fold a node that behaves like an ADD (note that N isn't necessarily +/// an ISD::ADD here, it could for example be an ISD::OR if we know that there +/// are no common bits set in the operands). +SDValue DAGCombiner::visitADDLike(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); @@ -2058,13 +2274,22 @@ SDValue DAGCombiner::visitADD(SDNode *N) { return N0; if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) { + // fold ((A-c1)+c2) -> (A+(c2-c1)) + if (N0.getOpcode() == ISD::SUB && + isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) { + SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N1.getNode(), + N0.getOperand(1).getNode()); + assert(Sub && "Constant folding failed"); + return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub); + } + // fold ((c1-A)+c2) -> (c1+c2)-A if (N0.getOpcode() == ISD::SUB && isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) { - // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic. - return DAG.getNode(ISD::SUB, DL, VT, - DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)), - N0.getOperand(1)); + SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N1.getNode(), + N0.getOperand(0).getNode()); + assert(Add && "Constant folding failed"); + return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1)); } // add (sext i1 X), 1 -> zext (not i1 X) @@ -2097,9 +2322,10 @@ SDValue DAGCombiner::visitADD(SDNode *N) { return NewSel; // reassociate add - if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1, N->getFlags())) - return RADD; - + if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) { + if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags())) + return RADD; + } // fold ((0-A) + B) -> B-A if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0))) return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1)); @@ -2116,6 +2342,18 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1)) return N0.getOperand(0); + // fold ((A-B)+(C-A)) -> (C-B) + if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB && + N0.getOperand(0) == N1.getOperand(1)) + return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0), + N0.getOperand(1)); + + // fold ((A-B)+(B-C)) -> (A-C) + if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB && + N0.getOperand(1) == N1.getOperand(0)) + return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), + N1.getOperand(1)); + // fold (A+(B-(A+C))) to (B-C) if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && N0 == N1.getOperand(1).getOperand(0)) @@ -2148,31 +2386,93 @@ SDValue DAGCombiner::visitADD(SDNode *N) { DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11)); } + // fold (add (umax X, C), -C) --> (usubsat X, C) + if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) { + auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) { + return (!Max && !Op) || + (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue())); + }; + if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT, + /*AllowUndefs*/ true)) + return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0), + N0.getOperand(1)); + } + + if (SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + if (isOneOrOneSplat(N1)) { + // fold (add (xor a, -1), 1) -> (sub 0, a) + if (isBitwiseNot(N0)) + return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), + N0.getOperand(0)); + + // fold (add (add (xor a, -1), b), 1) -> (sub b, a) + if (N0.getOpcode() == ISD::ADD || + N0.getOpcode() == ISD::UADDO || + N0.getOpcode() == ISD::SADDO) { + SDValue A, Xor; + + if (isBitwiseNot(N0.getOperand(0))) { + A = N0.getOperand(1); + Xor = N0.getOperand(0); + } else if (isBitwiseNot(N0.getOperand(1))) { + A = N0.getOperand(0); + Xor = N0.getOperand(1); + } + + if (Xor) + return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0)); + } + + // Look for: + // add (add x, y), 1 + // And if the target does not like this form then turn into: + // sub y, (xor x, -1) + if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() && + N0.getOpcode() == ISD::ADD) { + SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0), + DAG.getAllOnesConstant(DL, VT)); + return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not); + } + } + + // (x - y) + -1 -> add (xor y, -1), x + if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && + isAllOnesOrAllOnesSplat(N1)) { + SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1); + return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0)); + } + + if (SDValue Combined = visitADDLikeCommutative(N0, N1, N)) + return Combined; + + if (SDValue Combined = visitADDLikeCommutative(N1, N0, N)) + return Combined; + + return SDValue(); +} + +SDValue DAGCombiner::visitADD(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + SDLoc DL(N); + + if (SDValue Combined = visitADDLike(N)) + return Combined; + if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG)) return V; if (SDValue V = foldAddSubOfSignBit(N, DAG)) return V; - if (SimplifyDemandedBits(SDValue(N, 0))) - return SDValue(N, 0); - // fold (a+b) -> (a|b) iff a and b share no bits. if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) && DAG.haveNoCommonBitsSet(N0, N1)) return DAG.getNode(ISD::OR, DL, VT, N0, N1); - // fold (add (xor a, -1), 1) -> (sub 0, a) - if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) - return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), - N0.getOperand(0)); - - if (SDValue Combined = visitADDLike(N0, N1, N)) - return Combined; - - if (SDValue Combined = visitADDLike(N1, N0, N)) - return Combined; - return SDValue(); } @@ -2246,6 +2546,10 @@ static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) { V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO) return SDValue(); + EVT VT = V.getNode()->getValueType(0); + if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT)) + return SDValue(); + // If the result is masked, then no matter what kind of bool it is we can // return. If it isn't, then we need to make sure the bool type is either 0 or // 1 and not other values. @@ -2257,7 +2561,26 @@ static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) { return SDValue(); } -SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) { +/// Given the operands of an add/sub operation, see if the 2nd operand is a +/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert +/// the opcode and bypass the mask operation. +static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1, + SelectionDAG &DAG, const SDLoc &DL) { + if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1))) + return SDValue(); + + EVT VT = N0.getValueType(); + if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits()) + return SDValue(); + + // add N0, (and (AssertSext X, i1), 1) --> sub N0, X + // sub N0, (and (AssertSext X, i1), 1) --> add N0, X + return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0)); +} + +/// Helper for doing combines based on N0 and N1 being added to each other. +SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1, + SDNode *LocReference) { EVT VT = N0.getValueType(); SDLoc DL(LocReference); @@ -2269,21 +2592,42 @@ SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) N1.getOperand(0).getOperand(1), N1.getOperand(1))); - if (N1.getOpcode() == ISD::AND) { - SDValue AndOp0 = N1.getOperand(0); - unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0); - unsigned DestBits = VT.getScalarSizeInBits(); - - // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) - // and similar xforms where the inner op is either ~0 or 0. - if (NumSignBits == DestBits && isOneOrOneSplat(N1->getOperand(1))) - return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0); - } + if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL)) + return V; - // add (sext i1), X -> sub X, (zext i1) + // Look for: + // add (add x, 1), y + // And if the target does not like this form then turn into: + // sub y, (xor x, -1) + if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() && + N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) { + SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0), + DAG.getAllOnesConstant(DL, VT)); + return DAG.getNode(ISD::SUB, DL, VT, N1, Not); + } + + // Hoist one-use subtraction by non-opaque constant: + // (x - C) + y -> (x + y) - C + // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors. + if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && + isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) { + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1); + return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1)); + } + // Hoist one-use subtraction from non-opaque constant: + // (C - x) + y -> (y - x) + C + if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && + isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) { + SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1)); + return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0)); + } + + // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1' + // rather than 'add 0/-1' (the zext should get folded). + // add (sext i1 Y), X --> sub X, (zext i1 Y) if (N0.getOpcode() == ISD::SIGN_EXTEND && - N0.getOperand(0).getValueType() == MVT::i1 && - !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) { + N0.getOperand(0).getScalarValueSizeInBits() == 1 && + TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) { SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); } @@ -2344,8 +2688,10 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { return SDValue(); } -static SDValue flipBoolean(SDValue V, const SDLoc &DL, EVT VT, +static SDValue flipBoolean(SDValue V, const SDLoc &DL, SelectionDAG &DAG, const TargetLowering &TLI) { + EVT VT = V.getValueType(); + SDValue Cst; switch (TLI.getBooleanContents(VT)) { case TargetLowering::ZeroOrOneBooleanContent: @@ -2353,35 +2699,60 @@ static SDValue flipBoolean(SDValue V, const SDLoc &DL, EVT VT, Cst = DAG.getConstant(1, DL, VT); break; case TargetLowering::ZeroOrNegativeOneBooleanContent: - Cst = DAG.getConstant(-1, DL, VT); + Cst = DAG.getAllOnesConstant(DL, VT); break; } return DAG.getNode(ISD::XOR, DL, VT, V, Cst); } -static bool isBooleanFlip(SDValue V, EVT VT, const TargetLowering &TLI) { - if (V.getOpcode() != ISD::XOR) return false; - ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V.getOperand(1)); - if (!Const) return false; +/** + * Flips a boolean if it is cheaper to compute. If the Force parameters is set, + * then the flip also occurs if computing the inverse is the same cost. + * This function returns an empty SDValue in case it cannot flip the boolean + * without increasing the cost of the computation. If you want to flip a boolean + * no matter what, use flipBoolean. + */ +static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG, + const TargetLowering &TLI, + bool Force) { + if (Force && isa<ConstantSDNode>(V)) + return flipBoolean(V, SDLoc(V), DAG, TLI); + + if (V.getOpcode() != ISD::XOR) + return SDValue(); + + ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false); + if (!Const) + return SDValue(); + EVT VT = V.getValueType(); + + bool IsFlip = false; switch(TLI.getBooleanContents(VT)) { case TargetLowering::ZeroOrOneBooleanContent: - return Const->isOne(); + IsFlip = Const->isOne(); + break; case TargetLowering::ZeroOrNegativeOneBooleanContent: - return Const->isAllOnesValue(); + IsFlip = Const->isAllOnesValue(); + break; case TargetLowering::UndefinedBooleanContent: - return (Const->getAPIntValue() & 0x01) == 1; + IsFlip = (Const->getAPIntValue() & 0x01) == 1; + break; } - llvm_unreachable("Unsupported boolean content"); + + if (IsFlip) + return V.getOperand(0); + if (Force) + return flipBoolean(V, SDLoc(V), DAG, TLI); + return SDValue(); } -SDValue DAGCombiner::visitUADDO(SDNode *N) { +SDValue DAGCombiner::visitADDO(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); - if (VT.isVector()) - return SDValue(); + bool IsSigned = (ISD::SADDO == N->getOpcode()); EVT CarryVT = N->getValueType(1); SDLoc DL(N); @@ -2392,40 +2763,42 @@ SDValue DAGCombiner::visitUADDO(SDNode *N) { DAG.getUNDEF(CarryVT)); // canonicalize constant to RHS. - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); - if (N0C && !N1C) - return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0); + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0); - // fold (uaddo x, 0) -> x + no carry out - if (isNullConstant(N1)) + // fold (addo x, 0) -> x + no carry out + if (isNullOrNullSplat(N1)) return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT)); - // If it cannot overflow, transform into an add. - if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never) - return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1), - DAG.getConstant(0, DL, CarryVT)); + if (!IsSigned) { + // If it cannot overflow, transform into an add. + if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never) + return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1), + DAG.getConstant(0, DL, CarryVT)); - // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry. - if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) { - SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(), - DAG.getConstant(0, DL, VT), - N0.getOperand(0)); - return CombineTo(N, Sub, - flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI)); - } + // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry. + if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) { + SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(), + DAG.getConstant(0, DL, VT), N0.getOperand(0)); + return CombineTo(N, Sub, + flipBoolean(Sub.getValue(1), DL, DAG, TLI)); + } - if (SDValue Combined = visitUADDOLike(N0, N1, N)) - return Combined; + if (SDValue Combined = visitUADDOLike(N0, N1, N)) + return Combined; - if (SDValue Combined = visitUADDOLike(N1, N0, N)) - return Combined; + if (SDValue Combined = visitUADDOLike(N1, N0, N)) + return Combined; + } return SDValue(); } SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) { - auto VT = N0.getValueType(); + EVT VT = N0.getValueType(); + if (VT.isVector()) + return SDValue(); // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry) // If Y + 1 cannot overflow. @@ -2484,11 +2857,10 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) { return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1); } - EVT CarryVT = CarryIn.getValueType(); - // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry. if (isNullConstant(N0) && isNullConstant(N1)) { EVT VT = N0.getValueType(); + EVT CarryVT = CarryIn.getValueType(); SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT); AddToWorklist(CarryExt.getNode()); return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt, @@ -2496,16 +2868,6 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) { DAG.getConstant(0, DL, CarryVT)); } - // fold (addcarry (xor a, -1), 0, !b) -> (subcarry 0, a, b) and flip carry. - if (isBitwiseNot(N0) && isNullConstant(N1) && - isBooleanFlip(CarryIn, CarryVT, TLI)) { - SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), - DAG.getConstant(0, DL, N0.getValueType()), - N0.getOperand(0), CarryIn.getOperand(0)); - return CombineTo(N, Sub, - flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI)); - } - if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N)) return Combined; @@ -2515,12 +2877,112 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) { return SDValue(); } +/** + * If we are facing some sort of diamond carry propapagtion pattern try to + * break it up to generate something like: + * (addcarry X, 0, (addcarry A, B, Z):Carry) + * + * The end result is usually an increase in operation required, but because the + * carry is now linearized, other tranforms can kick in and optimize the DAG. + * + * Patterns typically look something like + * (uaddo A, B) + * / \ + * Carry Sum + * | \ + * | (addcarry *, 0, Z) + * | / + * \ Carry + * | / + * (addcarry X, *, *) + * + * But numerous variation exist. Our goal is to identify A, B, X and Z and + * produce a combine with a single path for carry propagation. + */ +static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG, + SDValue X, SDValue Carry0, SDValue Carry1, + SDNode *N) { + if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1) + return SDValue(); + if (Carry1.getOpcode() != ISD::UADDO) + return SDValue(); + + SDValue Z; + + /** + * First look for a suitable Z. It will present itself in the form of + * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true + */ + if (Carry0.getOpcode() == ISD::ADDCARRY && + isNullConstant(Carry0.getOperand(1))) { + Z = Carry0.getOperand(2); + } else if (Carry0.getOpcode() == ISD::UADDO && + isOneConstant(Carry0.getOperand(1))) { + EVT VT = Combiner.getSetCCResultType(Carry0.getValueType()); + Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT); + } else { + // We couldn't find a suitable Z. + return SDValue(); + } + + + auto cancelDiamond = [&](SDValue A,SDValue B) { + SDLoc DL(N); + SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z); + Combiner.AddToWorklist(NewY.getNode()); + return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X, + DAG.getConstant(0, DL, X.getValueType()), + NewY.getValue(1)); + }; + + /** + * (uaddo A, B) + * | + * Sum + * | + * (addcarry *, 0, Z) + */ + if (Carry0.getOperand(0) == Carry1.getValue(0)) { + return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1)); + } + + /** + * (addcarry A, 0, Z) + * | + * Sum + * | + * (uaddo *, B) + */ + if (Carry1.getOperand(0) == Carry0.getValue(0)) { + return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1)); + } + + if (Carry1.getOperand(1) == Carry0.getValue(0)) { + return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0)); + } + + return SDValue(); +} + SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N) { + // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry. + if (isBitwiseNot(N0)) + if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) { + SDLoc DL(N); + SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1, + N0.getOperand(0), NotC); + return CombineTo(N, Sub, + flipBoolean(Sub.getValue(1), DL, DAG, TLI)); + } + // Iff the flag result is dead: // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry) + // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo + // or the dependency between the instructions. if ((N0.getOpcode() == ISD::ADD || - (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) && + (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 && + N0.getValue(1) != CarryIn)) && isNullConstant(N1) && !N->hasAnyUseOfValue(1)) return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0.getOperand(0), N0.getOperand(1), CarryIn); @@ -2529,35 +2991,13 @@ SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, * When one of the addcarry argument is itself a carry, we may be facing * a diamond carry propagation. In which case we try to transform the DAG * to ensure linear carry propagation if that is possible. - * - * We are trying to get: - * (addcarry X, 0, (addcarry A, B, Z):Carry) */ if (auto Y = getAsCarry(TLI, N1)) { - /** - * (uaddo A, B) - * / \ - * Carry Sum - * | \ - * | (addcarry *, 0, Z) - * | / - * \ Carry - * | / - * (addcarry X, *, *) - */ - if (Y.getOpcode() == ISD::UADDO && - CarryIn.getResNo() == 1 && - CarryIn.getOpcode() == ISD::ADDCARRY && - isNullConstant(CarryIn.getOperand(1)) && - CarryIn.getOperand(0) == Y.getValue(0)) { - auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(), - Y.getOperand(0), Y.getOperand(1), - CarryIn.getOperand(2)); - AddToWorklist(NewY.getNode()); - return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, - DAG.getConstant(0, SDLoc(N), N0.getValueType()), - NewY.getValue(1)); - } + // Because both are carries, Y and Z can be swapped. + if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N)) + return R; + if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N)) + return R; } return SDValue(); @@ -2620,7 +3060,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { // -(X >>s 31) -> (X >>u 31) if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) { ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1)); - if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) { + if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) { auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA; if (!LegalOperations || TLI.isOperationLegal(NewSh, VT)) return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1)); @@ -2662,16 +3102,48 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) return N0.getOperand(0); + // fold (A+C1)-C2 -> A+(C1-C2) + if (N0.getOpcode() == ISD::ADD && + isConstantOrConstantVector(N1, /* NoOpaques */ true) && + isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) { + SDValue NewC = DAG.FoldConstantArithmetic( + ISD::SUB, DL, VT, N0.getOperand(1).getNode(), N1.getNode()); + assert(NewC && "Constant folding failed"); + return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC); + } + // fold C2-(A+C1) -> (C2-C1)-A if (N1.getOpcode() == ISD::ADD) { SDValue N11 = N1.getOperand(1); if (isConstantOrConstantVector(N0, /* NoOpaques */ true) && isConstantOrConstantVector(N11, /* NoOpaques */ true)) { - SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11); + SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(), + N11.getNode()); + assert(NewC && "Constant folding failed"); return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0)); } } + // fold (A-C1)-C2 -> A-(C1+C2) + if (N0.getOpcode() == ISD::SUB && + isConstantOrConstantVector(N1, /* NoOpaques */ true) && + isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) { + SDValue NewC = DAG.FoldConstantArithmetic( + ISD::ADD, DL, VT, N0.getOperand(1).getNode(), N1.getNode()); + assert(NewC && "Constant folding failed"); + return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC); + } + + // fold (c1-A)-c2 -> (c1-c2)-A + if (N0.getOpcode() == ISD::SUB && + isConstantOrConstantVector(N1, /* NoOpaques */ true) && + isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) { + SDValue NewC = DAG.FoldConstantArithmetic( + ISD::SUB, DL, VT, N0.getOperand(0).getNode(), N1.getNode()); + assert(NewC && "Constant folding failed"); + return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1)); + } + // fold ((A+(B+or-C))-B) -> A+or-C if (N0.getOpcode() == ISD::ADD && (N0.getOperand(1).getOpcode() == ISD::SUB || @@ -2728,6 +3200,63 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (SDValue V = foldAddSubOfSignBit(N, DAG)) return V; + if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N))) + return V; + + // (x - y) - 1 -> add (xor y, -1), x + if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) { + SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), + DAG.getAllOnesConstant(DL, VT)); + return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0)); + } + + // Look for: + // sub y, (xor x, -1) + // And if the target does not like this form then turn into: + // add (add x, y), 1 + if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) { + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0)); + return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT)); + } + + // Hoist one-use addition by non-opaque constant: + // (x + C) - y -> (x - y) + C + if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD && + isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) { + SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1); + return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1)); + } + // y - (x + C) -> (y - x) - C + if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD && + isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) { + SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0)); + return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1)); + } + // (x - C) - y -> (x - y) - C + // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors. + if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && + isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) { + SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1); + return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1)); + } + // (C - x) - y -> C - (x + y) + if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && + isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) { + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1); + return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add); + } + + // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1' + // rather than 'sub 0/1' (the sext should get folded). + // sub X, (zext i1 Y) --> add X, (sext i1 Y) + if (N1.getOpcode() == ISD::ZERO_EXTEND && + N1.getOperand(0).getScalarValueSizeInBits() == 1 && + TLI.getBooleanContents(VT) == + TargetLowering::ZeroOrNegativeOneBooleanContent) { + SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0)); + return DAG.getNode(ISD::ADD, DL, VT, N0, SExt); + } + // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X) if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) { if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) { @@ -2772,7 +3301,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) { SDValue ShAmt = N1.getOperand(1); ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt); - if (ShAmtC && ShAmtC->getZExtValue() == N1.getScalarValueSizeInBits() - 1) { + if (ShAmtC && + ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) { SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt); return DAG.getNode(ISD::ADD, DL, VT, N0, SRA); } @@ -2846,12 +3376,11 @@ SDValue DAGCombiner::visitSUBC(SDNode *N) { return SDValue(); } -SDValue DAGCombiner::visitUSUBO(SDNode *N) { +SDValue DAGCombiner::visitSUBO(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); - if (VT.isVector()) - return SDValue(); + bool IsSigned = (ISD::SSUBO == N->getOpcode()); EVT CarryVT = N->getValueType(1); SDLoc DL(N); @@ -2861,17 +3390,25 @@ SDValue DAGCombiner::visitUSUBO(SDNode *N) { return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1), DAG.getUNDEF(CarryVT)); - // fold (usubo x, x) -> 0 + no borrow + // fold (subo x, x) -> 0 + no borrow if (N0 == N1) return CombineTo(N, DAG.getConstant(0, DL, VT), DAG.getConstant(0, DL, CarryVT)); - // fold (usubo x, 0) -> x + no borrow - if (isNullConstant(N1)) + ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); + + // fold (subox, c) -> (addo x, -c) + if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) { + return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, + DAG.getConstant(-N1C->getAPIntValue(), DL, VT)); + } + + // fold (subo x, 0) -> x + no borrow + if (isNullOrNullSplat(N1)) return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT)); // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow - if (isAllOnesConstant(N0)) + if (!IsSigned && isAllOnesOrAllOnesSplat(N0)) return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0), DAG.getConstant(0, DL, CarryVT)); @@ -3012,13 +3549,13 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { MathOp = ISD::SUB; if (MathOp != ISD::DELETED_NODE) { - unsigned ShAmt = MathOp == ISD::ADD ? (MulC - 1).logBase2() - : (MulC + 1).logBase2(); - assert(ShAmt > 0 && ShAmt < VT.getScalarSizeInBits() && - "Not expecting multiply-by-constant that could have simplified"); + unsigned ShAmt = + MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2(); + assert(ShAmt < VT.getScalarSizeInBits() && + "multiply-by-constant generated out of bounds shift"); SDLoc DL(N); - SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, N0, - DAG.getConstant(ShAmt, DL, VT)); + SDValue Shl = + DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT)); SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0); if (ConstValue1.isNegative()) R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R); @@ -3069,7 +3606,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { N0.getOperand(1), N1)); // reassociate mul - if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags())) + if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags())) return RMUL; return SDValue(); @@ -3612,7 +4149,6 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c) if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) { - SDLoc DL(N); unsigned NumEltBits = VT.getScalarSizeInBits(); SDValue LogBase2 = BuildLogBase2(N1, DL); SDValue SRLAmt = DAG.getNode( @@ -3753,22 +4289,14 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { return SDValue(); } -SDValue DAGCombiner::visitSMULO(SDNode *N) { - // (smulo x, 2) -> (saddo x, x) - if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) - if (C2->getAPIntValue() == 2) - return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(), - N->getOperand(0), N->getOperand(0)); - - return SDValue(); -} +SDValue DAGCombiner::visitMULO(SDNode *N) { + bool IsSigned = (ISD::SMULO == N->getOpcode()); -SDValue DAGCombiner::visitUMULO(SDNode *N) { - // (umulo x, 2) -> (uaddo x, x) - if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) + // (mulo x, 2) -> (addo x, x) + if (ConstantSDNode *C2 = isConstOrConstSplat(N->getOperand(1))) if (C2->getAPIntValue() == 2) - return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(), - N->getOperand(0), N->getOperand(0)); + return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, SDLoc(N), + N->getVTList(), N->getOperand(0), N->getOperand(0)); return SDValue(); } @@ -4075,6 +4603,33 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, SDValue Zero = DAG.getConstant(0, DL, OpVT); return DAG.getSetCC(DL, VT, Or, Zero, CC1); } + + // Turn compare of constants whose difference is 1 bit into add+and+setcc. + // TODO - support non-uniform vector amounts. + if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) { + // Match a shared variable operand and 2 non-opaque constant operands. + ConstantSDNode *C0 = isConstOrConstSplat(LR); + ConstantSDNode *C1 = isConstOrConstSplat(RR); + if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) { + // Canonicalize larger constant as C0. + if (C1->getAPIntValue().ugt(C0->getAPIntValue())) + std::swap(C0, C1); + + // The difference of the constants must be a single bit. + const APInt &C0Val = C0->getAPIntValue(); + const APInt &C1Val = C1->getAPIntValue(); + if ((C0Val - C1Val).isPowerOf2()) { + // and/or (setcc X, C0, ne), (setcc X, C1, ne/eq) --> + // setcc ((add X, -C1), ~(C0 - C1)), 0, ne/eq + SDValue OffsetC = DAG.getConstant(-C1Val, DL, OpVT); + SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LL, OffsetC); + SDValue MaskC = DAG.getConstant(~(C0Val - C1Val), DL, OpVT); + SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Add, MaskC); + SDValue Zero = DAG.getConstant(0, DL, OpVT); + return DAG.getSetCC(DL, VT, And, Zero, CC0); + } + } + } } // Canonicalize equivalent operands to LL == RL. @@ -4259,7 +4814,8 @@ bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST, // Ensure that this isn't going to produce an unsupported unaligned access. if (ShAmt && !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT, - LDST->getAddressSpace(), ShAmt / 8)) + LDST->getAddressSpace(), ShAmt / 8, + LDST->getMemOperand()->getFlags())) return false; // It's not possible to generate a constant of extended or untyped type. @@ -4316,9 +4872,7 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N, SDNode *&NodeToMask) { // Recursively search for the operands, looking for loads which can be // narrowed. - for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) { - SDValue Op = N->getOperand(i); - + for (SDValue Op : N->op_values()) { if (Op.getValueType().isVector()) return false; @@ -4480,7 +5034,7 @@ SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) { SDValue N1 = N->getOperand(1); // Do we actually prefer shifts over mask? - if (!TLI.preferShiftsToClearExtremeBits(N0)) + if (!TLI.shouldFoldMaskToVariableShiftPair(N0)) return SDValue(); // Try to match (-1 '[outer] logical shift' y) @@ -4575,7 +5129,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return NewSel; // reassociate and - if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags())) + if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags())) return RAND; // Try to convert a constant mask AND into a shuffle clear mask. @@ -4644,24 +5198,22 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // the first vector value and FF for the rest, repeating. We need a mask // that will apply equally to all members of the vector, so AND all the // lanes of the constant together. - EVT VT = Vector->getValueType(0); - unsigned BitWidth = VT.getScalarSizeInBits(); + unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits(); // If the splat value has been compressed to a bitlength lower // than the size of the vector lane, we need to re-expand it to // the lane size. - if (BitWidth > SplatBitSize) - for (SplatValue = SplatValue.zextOrTrunc(BitWidth); - SplatBitSize < BitWidth; - SplatBitSize = SplatBitSize * 2) + if (EltBitWidth > SplatBitSize) + for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth); + SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2) SplatValue |= SplatValue.shl(SplatBitSize); // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value. - if (SplatBitSize % BitWidth == 0) { - Constant = APInt::getAllOnesValue(BitWidth); - for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i) - Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth); + if ((SplatBitSize % EltBitWidth) == 0) { + Constant = APInt::getAllOnesValue(EltBitWidth); + for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i) + Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth); } } } @@ -4773,44 +5325,29 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return SDValue(N, 0); // fold (zext_inreg (extload x)) -> (zextload x) - if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { - LoadSDNode *LN0 = cast<LoadSDNode>(N0); - EVT MemVT = LN0->getMemoryVT(); - // If we zero all the possible extended bits, then we can turn this into - // a zextload if we are running before legalize or the operation is legal. - unsigned BitWidth = N1.getScalarValueSizeInBits(); - if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, - BitWidth - MemVT.getScalarSizeInBits())) && - ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, - LN0->getChain(), LN0->getBasePtr(), - MemVT, LN0->getMemOperand()); - AddToWorklist(N); - CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); - return SDValue(N, 0); // Return N so it doesn't get rechecked! - } - } // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use - if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && - N0.hasOneUse()) { + if (ISD::isUNINDEXEDLoad(N0.getNode()) && + (ISD::isEXTLoad(N0.getNode()) || + (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); EVT MemVT = LN0->getMemoryVT(); // If we zero all the possible extended bits, then we can turn this into // a zextload if we are running before legalize or the operation is legal. - unsigned BitWidth = N1.getScalarValueSizeInBits(); - if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, - BitWidth - MemVT.getScalarSizeInBits())) && + unsigned ExtBitSize = N1.getScalarValueSizeInBits(); + unsigned MemBitSize = MemVT.getScalarSizeInBits(); + APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize); + if (DAG.MaskedValueIsZero(N1, ExtBits) && ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, - LN0->getChain(), LN0->getBasePtr(), - MemVT, LN0->getMemOperand()); + SDValue ExtLoad = + DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(), + LN0->getBasePtr(), MemVT, LN0->getMemOperand()); AddToWorklist(N); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); - return SDValue(N, 0); // Return N so it doesn't get rechecked! + return SDValue(N, 0); // Return N so it doesn't get rechecked! } } + // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const) if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) { if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), @@ -5155,6 +5692,23 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) { return SDValue(); } +/// OR combines for which the commuted variant will be tried as well. +static SDValue visitORCommutative( + SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) { + EVT VT = N0.getValueType(); + if (N0.getOpcode() == ISD::AND) { + // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y) + if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1) + return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1); + + // fold (or (and (xor Y, -1), X), Y) -> (or X, Y) + if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1) + return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1); + } + + return SDValue(); +} + SDValue DAGCombiner::visitOR(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -5284,7 +5838,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return BSwap; // reassociate or - if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags())) + if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags())) return ROR; // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) @@ -5302,6 +5856,11 @@ SDValue DAGCombiner::visitOR(SDNode *N) { } } + if (SDValue Combined = visitORCommutative(DAG, N0, N1, N)) + return Combined; + if (SDValue Combined = visitORCommutative(DAG, N1, N0, N)) + return Combined; + // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) if (N0.getOpcode() == N1.getOpcode()) if (SDValue V = hoistLogicOpWithSameOpcodeHands(N)) @@ -5318,6 +5877,12 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); + // If OR can be rewritten into ADD, try combines based on ADD. + if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) && + DAG.haveNoCommonBitsSet(N0, N1)) + if (SDValue Combined = visitADDLike(N)) + return Combined; + return SDValue(); } @@ -5869,6 +6434,213 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, return None; } +static unsigned LittleEndianByteAt(unsigned BW, unsigned i) { + return i; +} + +static unsigned BigEndianByteAt(unsigned BW, unsigned i) { + return BW - i - 1; +} + +// Check if the bytes offsets we are looking at match with either big or +// little endian value loaded. Return true for big endian, false for little +// endian, and None if match failed. +static Optional<bool> isBigEndian(const SmallVector<int64_t, 4> &ByteOffsets, + int64_t FirstOffset) { + // The endian can be decided only when it is 2 bytes at least. + unsigned Width = ByteOffsets.size(); + if (Width < 2) + return None; + + bool BigEndian = true, LittleEndian = true; + for (unsigned i = 0; i < Width; i++) { + int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset; + LittleEndian &= CurrentByteOffset == LittleEndianByteAt(Width, i); + BigEndian &= CurrentByteOffset == BigEndianByteAt(Width, i); + if (!BigEndian && !LittleEndian) + return None; + } + + assert((BigEndian != LittleEndian) && "It should be either big endian or" + "little endian"); + return BigEndian; +} + +static SDValue stripTruncAndExt(SDValue Value) { + switch (Value.getOpcode()) { + case ISD::TRUNCATE: + case ISD::ZERO_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::ANY_EXTEND: + return stripTruncAndExt(Value.getOperand(0)); + } + return Value; +} + +/// Match a pattern where a wide type scalar value is stored by several narrow +/// stores. Fold it into a single store or a BSWAP and a store if the targets +/// supports it. +/// +/// Assuming little endian target: +/// i8 *p = ... +/// i32 val = ... +/// p[0] = (val >> 0) & 0xFF; +/// p[1] = (val >> 8) & 0xFF; +/// p[2] = (val >> 16) & 0xFF; +/// p[3] = (val >> 24) & 0xFF; +/// => +/// *((i32)p) = val; +/// +/// i8 *p = ... +/// i32 val = ... +/// p[0] = (val >> 24) & 0xFF; +/// p[1] = (val >> 16) & 0xFF; +/// p[2] = (val >> 8) & 0xFF; +/// p[3] = (val >> 0) & 0xFF; +/// => +/// *((i32)p) = BSWAP(val); +SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) { + // Collect all the stores in the chain. + SDValue Chain; + SmallVector<StoreSDNode *, 8> Stores; + for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) { + if (Store->getMemoryVT() != MVT::i8 || + Store->isVolatile() || Store->isIndexed()) + return SDValue(); + Stores.push_back(Store); + Chain = Store->getChain(); + } + // Handle the simple type only. + unsigned Width = Stores.size(); + EVT VT = EVT::getIntegerVT( + *DAG.getContext(), Width * N->getMemoryVT().getSizeInBits()); + if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64) + return SDValue(); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (LegalOperations && !TLI.isOperationLegal(ISD::STORE, VT)) + return SDValue(); + + // Check if all the bytes of the combined value we are looking at are stored + // to the same base address. Collect bytes offsets from Base address into + // ByteOffsets. + SDValue CombinedValue; + SmallVector<int64_t, 4> ByteOffsets(Width, INT64_MAX); + int64_t FirstOffset = INT64_MAX; + StoreSDNode *FirstStore = nullptr; + Optional<BaseIndexOffset> Base; + for (auto Store : Stores) { + // All the stores store different byte of the CombinedValue. A truncate is + // required to get that byte value. + SDValue Trunc = Store->getValue(); + if (Trunc.getOpcode() != ISD::TRUNCATE) + return SDValue(); + // A shift operation is required to get the right byte offset, except the + // first byte. + int64_t Offset = 0; + SDValue Value = Trunc.getOperand(0); + if (Value.getOpcode() == ISD::SRL || + Value.getOpcode() == ISD::SRA) { + ConstantSDNode *ShiftOffset = + dyn_cast<ConstantSDNode>(Value.getOperand(1)); + // Trying to match the following pattern. The shift offset must be + // a constant and a multiple of 8. It is the byte offset in "y". + // + // x = srl y, offset + // i8 z = trunc x + // store z, ... + if (!ShiftOffset || (ShiftOffset->getSExtValue() % 8)) + return SDValue(); + + Offset = ShiftOffset->getSExtValue()/8; + Value = Value.getOperand(0); + } + + // Stores must share the same combined value with different offsets. + if (!CombinedValue) + CombinedValue = Value; + else if (stripTruncAndExt(CombinedValue) != stripTruncAndExt(Value)) + return SDValue(); + + // The trunc and all the extend operation should be stripped to get the + // real value we are stored. + else if (CombinedValue.getValueType() != VT) { + if (Value.getValueType() == VT || + Value.getValueSizeInBits() > CombinedValue.getValueSizeInBits()) + CombinedValue = Value; + // Give up if the combined value type is smaller than the store size. + if (CombinedValue.getValueSizeInBits() < VT.getSizeInBits()) + return SDValue(); + } + + // Stores must share the same base address + BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG); + int64_t ByteOffsetFromBase = 0; + if (!Base) + Base = Ptr; + else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase)) + return SDValue(); + + // Remember the first byte store + if (ByteOffsetFromBase < FirstOffset) { + FirstStore = Store; + FirstOffset = ByteOffsetFromBase; + } + // Map the offset in the store and the offset in the combined value, and + // early return if it has been set before. + if (Offset < 0 || Offset >= Width || ByteOffsets[Offset] != INT64_MAX) + return SDValue(); + ByteOffsets[Offset] = ByteOffsetFromBase; + } + + assert(FirstOffset != INT64_MAX && "First byte offset must be set"); + assert(FirstStore && "First store must be set"); + + // Check if the bytes of the combined value we are looking at match with + // either big or little endian value store. + Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset); + if (!IsBigEndian.hasValue()) + return SDValue(); + + // The node we are looking at matches with the pattern, check if we can + // replace it with a single bswap if needed and store. + + // If the store needs byte swap check if the target supports it + bool NeedsBswap = DAG.getDataLayout().isBigEndian() != *IsBigEndian; + + // Before legalize we can introduce illegal bswaps which will be later + // converted to an explicit bswap sequence. This way we end up with a single + // store and byte shuffling instead of several stores and byte shuffling. + if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT)) + return SDValue(); + + // Check that a store of the wide type is both allowed and fast on the target + bool Fast = false; + bool Allowed = + TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, + *FirstStore->getMemOperand(), &Fast); + if (!Allowed || !Fast) + return SDValue(); + + if (VT != CombinedValue.getValueType()) { + assert(CombinedValue.getValueType().getSizeInBits() > VT.getSizeInBits() && + "Get unexpected store value to combine"); + CombinedValue = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, + CombinedValue); + } + + if (NeedsBswap) + CombinedValue = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, CombinedValue); + + SDValue NewStore = + DAG.getStore(Chain, SDLoc(N), CombinedValue, FirstStore->getBasePtr(), + FirstStore->getPointerInfo(), FirstStore->getAlignment()); + + // Rely on other DAG combine rules to remove the other individual stores. + DAG.ReplaceAllUsesWith(N, NewStore.getNode()); + return NewStore; +} + /// Match a pattern where a wide type scalar value is loaded by several narrow /// loads and combined by shifts and ors. Fold it into a single load or a load /// and a BSWAP if the targets supports it. @@ -5916,11 +6688,6 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT)) return SDValue(); - std::function<unsigned(unsigned, unsigned)> LittleEndianByteAt = []( - unsigned BW, unsigned i) { return i; }; - std::function<unsigned(unsigned, unsigned)> BigEndianByteAt = []( - unsigned BW, unsigned i) { return BW - i - 1; }; - bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian(); auto MemoryByteOffset = [&] (ByteProvider P) { assert(P.isMemory() && "Must be a memory byte provider"); @@ -5987,15 +6754,10 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { // Check if the bytes of the OR we are looking at match with either big or // little endian value load - bool BigEndian = true, LittleEndian = true; - for (unsigned i = 0; i < ByteWidth; i++) { - int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset; - LittleEndian &= CurrentByteOffset == LittleEndianByteAt(ByteWidth, i); - BigEndian &= CurrentByteOffset == BigEndianByteAt(ByteWidth, i); - if (!BigEndian && !LittleEndian) - return SDValue(); - } - assert((BigEndian != LittleEndian) && "should be either or"); + Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset); + if (!IsBigEndian.hasValue()) + return SDValue(); + assert(FirstByteProvider && "must be set"); // Ensure that the first byte is loaded from zero offset of the first load. @@ -6008,7 +6770,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { // replace it with a single load and bswap if needed. // If the load needs byte swap check if the target supports it - bool NeedsBswap = IsBigEndianTarget != BigEndian; + bool NeedsBswap = IsBigEndianTarget != *IsBigEndian; // Before legalize we can introduce illegal bswaps which will be later // converted to an explicit bswap sequence. This way we end up with a single @@ -6019,8 +6781,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { // Check that a load of the wide type is both allowed and fast on the target bool Fast = false; bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), - VT, FirstLoad->getAddressSpace(), - FirstLoad->getAlignment(), &Fast); + VT, *FirstLoad->getMemOperand(), &Fast); if (!Allowed || !Fast) return SDValue(); @@ -6160,7 +6921,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { return NewSel; // reassociate xor - if (SDValue RXOR = ReassociateOps(ISD::XOR, DL, N0, N1, N->getFlags())) + if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags())) return RXOR; // fold !(x cc y) -> (x !cc y) @@ -6218,6 +6979,16 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { return DAG.getNode(NewOpcode, DL, VT, LHS, RHS); } } + + // fold (not (neg x)) -> (add X, -1) + // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if + // Y is a constant or the subtract has a single use. + if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB && + isNullConstant(N0.getOperand(0))) { + return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), + DAG.getAllOnesConstant(DL, VT)); + } + // fold (xor (and x, y), y) -> (and (not x), y) if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) { SDValue X = N0.getOperand(0); @@ -6310,11 +7081,16 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { /// Handle transforms common to the three shifts, when the shift amount is a /// constant. +/// We are looking for: (shift being one of shl/sra/srl) +/// shift (binop X, C0), C1 +/// And want to transform into: +/// binop (shift X, C1), (shift C0, C1) SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { // Do not turn a 'not' into a regular xor. if (isBitwiseNot(N->getOperand(0))) return SDValue(); + // The inner binop must be one-use, since we want to replace it. SDNode *LHS = N->getOperand(0).getNode(); if (!LHS->hasOneUse()) return SDValue(); @@ -6322,56 +7098,43 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { // instead of (shift (and)), likewise for add, or, xor, etc. This sort of // thing happens with address calculations, so it's important to canonicalize // it. - bool HighBitSet = false; // Can we transform this if the high bit is set? - switch (LHS->getOpcode()) { - default: return SDValue(); + default: + return SDValue(); case ISD::OR: case ISD::XOR: - HighBitSet = false; // We can only transform sra if the high bit is clear. - break; case ISD::AND: - HighBitSet = true; // We can only transform sra if the high bit is set. break; case ISD::ADD: if (N->getOpcode() != ISD::SHL) return SDValue(); // only shl(add) not sr[al](add). - HighBitSet = false; // We can only transform sra if the high bit is clear. break; } // We require the RHS of the binop to be a constant and not opaque as well. ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1)); - if (!BinOpCst) return SDValue(); + if (!BinOpCst) + return SDValue(); // FIXME: disable this unless the input to the binop is a shift by a constant - // or is copy/select.Enable this in other cases when figure out it's exactly profitable. - SDNode *BinOpLHSVal = LHS->getOperand(0).getNode(); - bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL || - BinOpLHSVal->getOpcode() == ISD::SRA || - BinOpLHSVal->getOpcode() == ISD::SRL; - bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg || - BinOpLHSVal->getOpcode() == ISD::SELECT; - - if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) && - !isCopyOrSelect) + // or is copy/select. Enable this in other cases when figure out it's exactly + // profitable. + SDValue BinOpLHSVal = LHS->getOperand(0); + bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL || + BinOpLHSVal.getOpcode() == ISD::SRA || + BinOpLHSVal.getOpcode() == ISD::SRL) && + isa<ConstantSDNode>(BinOpLHSVal.getOperand(1)); + bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg || + BinOpLHSVal.getOpcode() == ISD::SELECT; + + if (!IsShiftByConstant && !IsCopyOrSelect) return SDValue(); - if (isCopyOrSelect && N->hasOneUse()) + if (IsCopyOrSelect && N->hasOneUse()) return SDValue(); EVT VT = N->getValueType(0); - // If this is a signed shift right, and the high bit is modified by the - // logical operation, do not perform the transformation. The highBitSet - // boolean indicates the value of the high bit of the constant which would - // cause it to be modified for this operation. - if (N->getOpcode() == ISD::SRA) { - bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative(); - if (BinOpRHSSignSet != HighBitSet) - return SDValue(); - } - if (!TLI.isDesirableToCommuteWithShift(N, Level)) return SDValue(); @@ -6395,11 +7158,12 @@ SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { assert(N->getOperand(0).getOpcode() == ISD::AND); // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC) - if (N->hasOneUse() && N->getOperand(0).hasOneUse()) { + EVT TruncVT = N->getValueType(0); + if (N->hasOneUse() && N->getOperand(0).hasOneUse() && + TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) { SDValue N01 = N->getOperand(0).getOperand(1); if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) { SDLoc DL(N); - EVT TruncVT = N->getValueType(0); SDValue N00 = N->getOperand(0).getOperand(0); SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00); SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01); @@ -6431,6 +7195,7 @@ SDValue DAGCombiner::visitRotate(SDNode *N) { } // fold (rot x, c) -> (rot x, c % BitSize) + // TODO - support non-uniform vector amounts. if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) { if (Cst->getAPIntValue().uge(Bitsize)) { uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize); @@ -6476,6 +7241,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { return V; EVT VT = N0.getValueType(); + EVT ShiftVT = N1.getValueType(); unsigned OpSizeInBits = VT.getScalarSizeInBits(); // fold vector ops @@ -6506,6 +7272,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { ConstantSDNode *N1C = isConstOrConstSplat(N1); // fold (shl c1, c2) -> c1<<c2 + // TODO - support non-uniform vector shift amounts. ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); if (N0C && N1C && !N1C->isOpaque()) return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C); @@ -6517,6 +7284,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnesValue(OpSizeInBits))) return DAG.getConstant(0, SDLoc(N), VT); + // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && N1.getOperand(0).getOpcode() == ISD::AND) { @@ -6524,6 +7292,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1); } + // TODO - support non-uniform vector shift amounts. if (N1C && SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); @@ -6548,69 +7317,86 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { }; if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) { SDLoc DL(N); - EVT ShiftVT = N1.getValueType(); SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1)); return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum); } } - // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2))) + // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2)) // For this to be valid, the second form must not preserve any of the bits // that are shifted out by the inner shift in the first form. This means // the outer shift size must be >= the number of bits added by the ext. // As a corollary, we don't care what kind of ext it is. - if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND || - N0.getOpcode() == ISD::ANY_EXTEND || - N0.getOpcode() == ISD::SIGN_EXTEND) && + if ((N0.getOpcode() == ISD::ZERO_EXTEND || + N0.getOpcode() == ISD::ANY_EXTEND || + N0.getOpcode() == ISD::SIGN_EXTEND) && N0.getOperand(0).getOpcode() == ISD::SHL) { SDValue N0Op0 = N0.getOperand(0); - if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { - APInt c1 = N0Op0C1->getAPIntValue(); - APInt c2 = N1C->getAPIntValue(); - zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); + SDValue InnerShiftAmt = N0Op0.getOperand(1); + EVT InnerVT = N0Op0.getValueType(); + uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits(); - EVT InnerShiftVT = N0Op0.getValueType(); - uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); - if (c2.uge(OpSizeInBits - InnerShiftSize)) { - SDLoc DL(N0); - APInt Sum = c1 + c2; - if (Sum.uge(OpSizeInBits)) - return DAG.getConstant(0, DL, VT); + auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS, + ConstantSDNode *RHS) { + APInt c1 = LHS->getAPIntValue(); + APInt c2 = RHS->getAPIntValue(); + zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); + return c2.uge(OpSizeInBits - InnerBitwidth) && + (c1 + c2).uge(OpSizeInBits); + }; + if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange, + /*AllowUndefs*/ false, + /*AllowTypeMismatch*/ true)) + return DAG.getConstant(0, SDLoc(N), VT); - return DAG.getNode( - ISD::SHL, DL, VT, - DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)), - DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType())); - } + auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS, + ConstantSDNode *RHS) { + APInt c1 = LHS->getAPIntValue(); + APInt c2 = RHS->getAPIntValue(); + zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); + return c2.uge(OpSizeInBits - InnerBitwidth) && + (c1 + c2).ult(OpSizeInBits); + }; + if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange, + /*AllowUndefs*/ false, + /*AllowTypeMismatch*/ true)) { + SDLoc DL(N); + SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0)); + SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT); + Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1); + return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum); } } // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C)) // Only fold this if the inner zext has no other uses to avoid increasing // the total number of instructions. - if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() && + if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() && N0.getOperand(0).getOpcode() == ISD::SRL) { SDValue N0Op0 = N0.getOperand(0); - if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { - if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) { - uint64_t c1 = N0Op0C1->getZExtValue(); - uint64_t c2 = N1C->getZExtValue(); - if (c1 == c2) { - SDValue NewOp0 = N0.getOperand(0); - EVT CountVT = NewOp0.getOperand(1).getValueType(); - SDLoc DL(N); - SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(), - NewOp0, - DAG.getConstant(c2, DL, CountVT)); - AddToWorklist(NewSHL.getNode()); - return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL); - } - } + SDValue InnerShiftAmt = N0Op0.getOperand(1); + + auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) { + APInt c1 = LHS->getAPIntValue(); + APInt c2 = RHS->getAPIntValue(); + zeroExtendToMatch(c1, c2); + return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2); + }; + if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual, + /*AllowUndefs*/ false, + /*AllowTypeMismatch*/ true)) { + SDLoc DL(N); + EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType(); + SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT); + NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL); + AddToWorklist(NewSHL.getNode()); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL); } } // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2 + // TODO - support non-uniform vector shift amounts. if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) && N0->getFlags().hasExact()) { if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { @@ -6619,9 +7405,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { SDLoc DL(N); if (C1 <= C2) return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), - DAG.getConstant(C2 - C1, DL, N1.getValueType())); + DAG.getConstant(C2 - C1, DL, ShiftVT)); return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), - DAG.getConstant(C1 - C2, DL, N1.getValueType())); + DAG.getConstant(C1 - C2, DL, ShiftVT)); } } @@ -6629,11 +7415,13 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // (and (srl x, (sub c1, c2), MASK) // Only fold this if the inner shift has no other uses -- if it does, folding // this will increase the total number of instructions. + // TODO - drop hasOneUse requirement if c1 == c2? + // TODO - support non-uniform vector shift amounts. if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() && - TLI.shouldFoldShiftPairToMask(N, Level)) { + TLI.shouldFoldConstantShiftPairToMask(N, Level)) { if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { - uint64_t c1 = N0C1->getZExtValue(); - if (c1 < OpSizeInBits) { + if (N0C1->getAPIntValue().ult(OpSizeInBits)) { + uint64_t c1 = N0C1->getZExtValue(); uint64_t c2 = N1C->getZExtValue(); APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1); SDValue Shift; @@ -6641,12 +7429,12 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { Mask <<= c2 - c1; SDLoc DL(N); Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), - DAG.getConstant(c2 - c1, DL, N1.getValueType())); + DAG.getConstant(c2 - c1, DL, ShiftVT)); } else { Mask.lshrInPlace(c1 - c2); SDLoc DL(N); Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), - DAG.getConstant(c1 - c2, DL, N1.getValueType())); + DAG.getConstant(c1 - c2, DL, ShiftVT)); } SDLoc DL(N0); return DAG.getNode(ISD::AND, DL, VT, Shift, @@ -6719,6 +7507,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { ConstantSDNode *N1C = isConstOrConstSplat(N1); // fold (sra c1, c2) -> (sra c1, c2) + // TODO - support non-uniform vector shift amounts. ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); if (N0C && N1C && !N1C->isOpaque()) return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C); @@ -6815,32 +7604,32 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1); } + // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2)) // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2)) // if c1 is equal to the number of bits the trunc removes + // TODO - support non-uniform vector shift amounts. if (N0.getOpcode() == ISD::TRUNCATE && (N0.getOperand(0).getOpcode() == ISD::SRL || N0.getOperand(0).getOpcode() == ISD::SRA) && N0.getOperand(0).hasOneUse() && - N0.getOperand(0).getOperand(1).hasOneUse() && - N1C) { + N0.getOperand(0).getOperand(1).hasOneUse() && N1C) { SDValue N0Op0 = N0.getOperand(0); if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) { - unsigned LargeShiftVal = LargeShift->getZExtValue(); EVT LargeVT = N0Op0.getValueType(); - - if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) { + unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits; + if (LargeShift->getAPIntValue() == TruncBits) { SDLoc DL(N); - SDValue Amt = - DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL, - getShiftAmountTy(N0Op0.getOperand(0).getValueType())); - SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT, - N0Op0.getOperand(0), Amt); + SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL, + getShiftAmountTy(LargeVT)); + SDValue SRA = + DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt); return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA); } } } // Simplify, based on bits shifted out of the LHS. + // TODO - support non-uniform vector shift amounts. if (N1C && SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); @@ -6872,6 +7661,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { ConstantSDNode *N1C = isConstOrConstSplat(N1); // fold (srl c1, c2) -> c1 >>u c2 + // TODO - support non-uniform vector shift amounts. ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); if (N0C && N1C && !N1C->isOpaque()) return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C); @@ -6912,6 +7702,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { } // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2))) + // TODO - support non-uniform vector shift amounts. if (N1C && N0.getOpcode() == ISD::TRUNCATE && N0.getOperand(0).getOpcode() == ISD::SRL) { if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) { @@ -6935,6 +7726,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { } // fold (srl (shl x, c), c) -> (and x, cst2) + // TODO - (srl (shl x, c1), c2). if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 && isConstantOrConstantVector(N1, /* NoOpaques */ true)) { SDLoc DL(N); @@ -6945,11 +7737,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { } // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask) + // TODO - support non-uniform vector shift amounts. if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { // Shifting in all undef bits? EVT SmallVT = N0.getOperand(0).getValueType(); unsigned BitSize = SmallVT.getScalarSizeInBits(); - if (N1C->getZExtValue() >= BitSize) + if (N1C->getAPIntValue().uge(BitSize)) return DAG.getUNDEF(VT); if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { @@ -6970,7 +7763,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign // bit, which is unmodified by sra. - if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) { + if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) { if (N0.getOpcode() == ISD::SRA) return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1); } @@ -7021,6 +7814,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // fold operands of srl based on knowledge that the low bits are not // demanded. + // TODO - support non-uniform vector shift amounts. if (N1C && SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); @@ -7079,13 +7873,49 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) { N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1))) return IsFSHL ? N0 : N1; - // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth) + auto IsUndefOrZero = [](SDValue V) { + return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true); + }; + + // TODO - support non-uniform vector shift amounts. if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) { + EVT ShAmtTy = N2.getValueType(); + + // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth) if (Cst->getAPIntValue().uge(BitWidth)) { uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth); return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1, - DAG.getConstant(RotAmt, SDLoc(N), N2.getValueType())); + DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy)); } + + unsigned ShAmt = Cst->getZExtValue(); + if (ShAmt == 0) + return IsFSHL ? N0 : N1; + + // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C) + // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C) + // fold fshl(N0, undef_or_zero, C) -> shl(N0, C) + // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C) + if (IsUndefOrZero(N0)) + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, + DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt, + SDLoc(N), ShAmtTy)); + if (IsUndefOrZero(N1)) + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, + DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt, + SDLoc(N), ShAmtTy)); + } + + // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2) + // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2) + // iff We know the shift amount is in range. + // TODO: when is it worth doing SUB(BW, N2) as well? + if (isPowerOf2_32(BitWidth)) { + APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1); + if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits)) + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2); + if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits)) + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2); } // fold (fshl N0, N0, N2) -> (rotl N0, N2) @@ -7096,6 +7926,10 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) { if (N0 == N1 && hasOperation(RotOpc, VT)) return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2); + // Simplify, based on bits shifted out of N0/N1. + if (SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + return SDValue(); } @@ -7207,11 +8041,14 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) { // FIXME: This should be checking for no signed zeros on individual operands, as // well as no nans. -static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS) { +static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, + SDValue RHS, + const TargetLowering &TLI) { const TargetOptions &Options = DAG.getTarget().Options; EVT VT = LHS.getValueType(); return Options.NoSignedZerosFPMath && VT.isFloatingPoint() && + TLI.isProfitableToCombineMinNumMaxNum(VT) && DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS); } @@ -7364,6 +8201,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { EVT VT = N->getValueType(0); EVT VT0 = N0.getValueType(); SDLoc DL(N); + SDNodeFlags Flags = N->getFlags(); if (SDValue V = DAG.simplifySelect(N0, N1, N2)) return V; @@ -7414,20 +8252,26 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { SDValue Cond0 = N0->getOperand(0); SDValue Cond1 = N0->getOperand(1); SDValue InnerSelect = - DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2); + DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags); if (normalizeToSequence || !InnerSelect.use_empty()) return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, - InnerSelect, N2); + InnerSelect, N2, Flags); + // Cleanup on failure. + if (InnerSelect.use_empty()) + recursivelyDeleteUnusedNodes(InnerSelect.getNode()); } // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y) if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) { SDValue Cond0 = N0->getOperand(0); SDValue Cond1 = N0->getOperand(1); - SDValue InnerSelect = - DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2); + SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(), + Cond1, N1, N2, Flags); if (normalizeToSequence || !InnerSelect.use_empty()) return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1, - InnerSelect); + InnerSelect, Flags); + // Cleanup on failure. + if (InnerSelect.use_empty()) + recursivelyDeleteUnusedNodes(InnerSelect.getNode()); } // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y @@ -7439,12 +8283,14 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // Create the actual and node if we can generate good code for it. if (!normalizeToSequence) { SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0); - return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2); + return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, + N2, Flags); } // Otherwise see if we can optimize the "and" to a better pattern. - if (SDValue Combined = visitANDLike(N0, N1_0, N)) + if (SDValue Combined = visitANDLike(N0, N1_0, N)) { return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1, - N2); + N2, Flags); + } } } // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y @@ -7456,20 +8302,22 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // Create the actual or node if we can generate good code for it. if (!normalizeToSequence) { SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0); - return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2); + return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, + N2_2, Flags); } // Otherwise see if we can optimize to a better pattern. if (SDValue Combined = visitORLike(N0, N2_0, N)) return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1, - N2_2); + N2_2, Flags); } } } - if (VT0 == MVT::i1) { - // select (not Cond), N1, N2 -> select Cond, N2, N1 - if (isBitwiseNot(N0)) - return DAG.getNode(ISD::SELECT, DL, VT, N0->getOperand(0), N2, N1); + // select (not Cond), N1, N2 -> select Cond, N2, N1 + if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) { + SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1); + SelectOp->setFlags(Flags); + return SelectOp; } // Fold selects based on a setcc into other things, such as min/max/abs. @@ -7481,7 +8329,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // select (fcmp gt x, y), x, y -> fmaxnum x, y // // This is OK if we don't care what happens if either operand is a NaN. - if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2)) + if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI)) if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC, TLI, DAG)) return FMinMax; @@ -7516,9 +8364,16 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { } if (TLI.isOperationLegal(ISD::SELECT_CC, VT) || - (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) - return DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1, N2, - N0.getOperand(2)); + (!LegalOperations && + TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) { + // Any flags available in a select/setcc fold will be on the setcc as they + // migrated from fcmp + Flags = N0.getNode()->getFlags(); + SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1, + N2, N0.getOperand(2)); + SelectNode->setFlags(Flags); + return SelectNode; + } return SimplifySelect(DL, N0, N1, N2); } @@ -7599,14 +8454,19 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { } SDValue DAGCombiner::visitMSCATTER(SDNode *N) { - if (Level >= AfterLegalizeTypes) - return SDValue(); - MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N); SDValue Mask = MSC->getMask(); - SDValue Data = MSC->getValue(); + SDValue Data = MSC->getValue(); + SDValue Chain = MSC->getChain(); SDLoc DL(N); + // Zap scatters with a zero mask. + if (ISD::isBuildVectorAllZeros(Mask.getNode())) + return Chain; + + if (Level >= AfterLegalizeTypes) + return SDValue(); + // If the MSCATTER data type requires splitting and the mask is provided by a // SETCC, then split both nodes and its operands before legalization. This // prevents the type legalizer from unrolling SETCC into scalar comparisons @@ -7624,8 +8484,6 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) { EVT LoVT, HiVT; std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0)); - SDValue Chain = MSC->getChain(); - EVT MemoryVT = MSC->getMemoryVT(); unsigned Alignment = MSC->getOriginalAlignment(); @@ -7658,15 +8516,20 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) { } SDValue DAGCombiner::visitMSTORE(SDNode *N) { - if (Level >= AfterLegalizeTypes) - return SDValue(); - - MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N); + MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N); SDValue Mask = MST->getMask(); - SDValue Data = MST->getValue(); + SDValue Data = MST->getValue(); + SDValue Chain = MST->getChain(); EVT VT = Data.getValueType(); SDLoc DL(N); + // Zap masked stores with a zero mask. + if (ISD::isBuildVectorAllZeros(Mask.getNode())) + return Chain; + + if (Level >= AfterLegalizeTypes) + return SDValue(); + // If the MSTORE data type requires splitting and the mask is provided by a // SETCC, then split both nodes and its operands before legalization. This // prevents the type legalizer from unrolling SETCC into scalar comparisons @@ -7680,17 +8543,11 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { SDValue MaskLo, MaskHi, Lo, Hi; std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); - SDValue Chain = MST->getChain(); SDValue Ptr = MST->getBasePtr(); EVT MemoryVT = MST->getMemoryVT(); unsigned Alignment = MST->getOriginalAlignment(); - // if Alignment is equal to the vector size, - // take the half of it for the second part - unsigned SecondHalfAlignment = - (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment; - EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); @@ -7712,7 +8569,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { MMO = DAG.getMachineFunction().getMachineMemOperand( MST->getPointerInfo().getWithOffset(HiOffset), - MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment, + MachineMemOperand::MOStore, HiMemVT.getStoreSize(), Alignment, MST->getAAInfo(), MST->getRanges()); Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, @@ -7728,13 +8585,17 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { } SDValue DAGCombiner::visitMGATHER(SDNode *N) { - if (Level >= AfterLegalizeTypes) - return SDValue(); - MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N); SDValue Mask = MGT->getMask(); SDLoc DL(N); + // Zap gathers with a zero mask. + if (ISD::isBuildVectorAllZeros(Mask.getNode())) + return CombineTo(N, MGT->getPassThru(), MGT->getChain()); + + if (Level >= AfterLegalizeTypes) + return SDValue(); + // If the MGATHER result requires splitting and the mask is provided by a // SETCC, then split both nodes and its operands before legalization. This // prevents the type legalizer from unrolling SETCC into scalar comparisons @@ -7805,13 +8666,17 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) { } SDValue DAGCombiner::visitMLOAD(SDNode *N) { - if (Level >= AfterLegalizeTypes) - return SDValue(); - - MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N); + MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N); SDValue Mask = MLD->getMask(); SDLoc DL(N); + // Zap masked loads with a zero mask. + if (ISD::isBuildVectorAllZeros(Mask.getNode())) + return CombineTo(N, MLD->getPassThru(), MLD->getChain()); + + if (Level >= AfterLegalizeTypes) + return SDValue(); + // If the MLOAD result requires splitting and the mask is provided by a // SETCC, then split both nodes and its operands before legalization. This // prevents the type legalizer from unrolling SETCC into scalar comparisons @@ -7839,12 +8704,6 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { EVT MemoryVT = MLD->getMemoryVT(); unsigned Alignment = MLD->getOriginalAlignment(); - // if Alignment is equal to the vector size, - // take the half of it for the second part - unsigned SecondHalfAlignment = - (Alignment == MLD->getValueType(0).getSizeInBits()/8) ? - Alignment/2 : Alignment; - EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); @@ -7862,7 +8721,7 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { MMO = DAG.getMachineFunction().getMachineMemOperand( MLD->getPointerInfo().getWithOffset(HiOffset), - MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment, + MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), Alignment, MLD->getAAInfo(), MLD->getRanges()); Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, PassThruHi, HiMemVT, @@ -7943,11 +8802,16 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); + EVT VT = N->getValueType(0); SDLoc DL(N); if (SDValue V = DAG.simplifySelect(N0, N1, N2)) return V; + // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1 + if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) + return DAG.getSelect(DL, VT, F, N2, N1); + // Canonicalize integer abs. // vselect (setg[te] X, 0), X, -X -> // vselect (setgt X, -1), X, -X -> @@ -7987,11 +8851,10 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { // This is OK if we don't care about what happens if either operand is a // NaN. // - EVT VT = N->getValueType(0); - if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N0.getOperand(0), N0.getOperand(1))) { - ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); + if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N0.getOperand(0), + N0.getOperand(1), TLI)) { if (SDValue FMinMax = combineMinNumMaxNum( - DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG)) + DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG)) return FMinMax; } @@ -8080,9 +8943,11 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { return N2; } else if (SCC.getOpcode() == ISD::SETCC) { // Fold to a simpler select_cc - return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(), - SCC.getOperand(0), SCC.getOperand(1), N2, N3, - SCC.getOperand(2)); + SDValue SelectOp = DAG.getNode( + ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0), + SCC.getOperand(1), N2, N3, SCC.getOperand(2)); + SelectOp->setFlags(SCC->getFlags()); + return SelectOp; } } @@ -8148,6 +9013,7 @@ static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, unsigned Opcode = N->getOpcode(); SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + SDLoc DL(N); assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || @@ -8158,7 +9024,33 @@ static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, // fold (zext c1) -> c1 // fold (aext c1) -> c1 if (isa<ConstantSDNode>(N0)) - return DAG.getNode(Opcode, SDLoc(N), VT, N0); + return DAG.getNode(Opcode, DL, VT, N0); + + // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2) + // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2) + // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2) + if (N0->getOpcode() == ISD::SELECT) { + SDValue Op1 = N0->getOperand(1); + SDValue Op2 = N0->getOperand(2); + if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) && + (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) { + // For any_extend, choose sign extension of the constants to allow a + // possible further transform to sign_extend_inreg.i.e. + // + // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0> + // t2: i64 = any_extend t1 + // --> + // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0> + // --> + // t4: i64 = sign_extend_inreg t3 + unsigned FoldOpc = Opcode; + if (FoldOpc == ISD::ANY_EXTEND) + FoldOpc = ISD::SIGN_EXTEND; + return DAG.getSelect(DL, VT, N0->getOperand(0), + DAG.getNode(FoldOpc, DL, VT, Op1), + DAG.getNode(FoldOpc, DL, VT, Op2)); + } + } // fold (sext (build_vector AllConstants) -> (build_vector AllConstants) // fold (zext (build_vector AllConstants) -> (build_vector AllConstants) @@ -8173,7 +9065,6 @@ static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits(); SmallVector<SDValue, 8> Elts; unsigned NumElts = VT.getVectorNumElements(); - SDLoc DL(N); // For zero-extensions, UNDEF elements still guarantee to have the upper // bits set to zero. @@ -8387,6 +9278,9 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) { SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) { assert(N->getOpcode() == ISD::ZERO_EXTEND); EVT VT = N->getValueType(0); + EVT OrigVT = N->getOperand(0).getValueType(); + if (TLI.isZExtFree(OrigVT, VT)) + return SDValue(); // and/or/xor SDValue N0 = N->getOperand(0); @@ -8450,6 +9344,10 @@ SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) { Load->getValueType(0), ExtLoad); CombineTo(Load, Trunc, ExtLoad.getValue(1)); } + + // N0 is dead at this point. + recursivelyDeleteUnusedNodes(N0.getNode()); + return SDValue(N,0); // Return N so it doesn't get rechecked! } @@ -8509,19 +9407,21 @@ static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, : ISD::isZEXTLoad(N0Node); if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) || !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse()) - return {}; + return SDValue(); LoadSDNode *LN0 = cast<LoadSDNode>(N0); EVT MemVT = LN0->getMemoryVT(); if ((LegalOperations || LN0->isVolatile() || VT.isVector()) && !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT)) - return {}; + return SDValue(); SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(), LN0->getBasePtr(), MemVT, LN0->getMemOperand()); Combiner.CombineTo(N, ExtLoad); DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); + if (LN0->use_empty()) + Combiner.recursivelyDeleteUnusedNodes(LN0); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -8559,6 +9459,7 @@ static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, Combiner.CombineTo(N, ExtLoad); if (NoReplaceTrunc) { DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); + Combiner.recursivelyDeleteUnusedNodes(LN0); } else { SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); @@ -8804,6 +9705,25 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) return NewVSel; + // Eliminate this sign extend by doing a negation in the destination type: + // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64) + if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() && + isNullOrNullSplat(N0.getOperand(0)) && + N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND && + TLI.isOperationLegalOrCustom(ISD::SUB, VT)) { + SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT); + return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext); + } + // Eliminate this sign extend by doing a decrement in the destination type: + // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1) + if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && + isAllOnesOrAllOnesSplat(N0.getOperand(1)) && + N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && + TLI.isOperationLegalOrCustom(ISD::ADD, VT)) { + SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT); + return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT)); + } + return SDValue(); } @@ -9061,14 +9981,13 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse()) { SDValue ShAmt = N0.getOperand(1); - unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue(); if (N0.getOpcode() == ISD::SHL) { SDValue InnerZExt = N0.getOperand(0); // If the original shl may be shifting out bits, do not perform this // transformation. unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() - InnerZExt.getOperand(0).getValueSizeInBits(); - if (ShAmtVal > KnownZeroBits) + if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits)) return SDValue(); } @@ -9162,6 +10081,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { CombineTo(N, ExtLoad); if (NoReplaceTrunc) { DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); + recursivelyDeleteUnusedNodes(LN0); } else { SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); @@ -9185,6 +10105,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { MemVT, LN0->getMemOperand()); CombineTo(N, ExtLoad); DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); + recursivelyDeleteUnusedNodes(LN0); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -9574,14 +10495,14 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible. // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above. if (N0.getOpcode() == ISD::SRL) { - if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1))) - if (ShAmt->getZExtValue()+EVTBits <= VTBits) { + if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1))) + if (ShAmt->getAPIntValue().ule(VTBits - EVTBits)) { // We can turn this into an SRA iff the input to the SRL is already sign // extended enough. unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0)); - if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits) - return DAG.getNode(ISD::SRA, SDLoc(N), VT, - N0.getOperand(0), N0.getOperand(1)); + if (((VTBits - EVTBits) - ShAmt->getZExtValue()) < InSignBits) + return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0), + N0.getOperand(1)); } } @@ -9667,10 +10588,11 @@ SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) { SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + EVT SrcVT = N0.getValueType(); bool isLE = DAG.getDataLayout().isLittleEndian(); // noop truncate - if (N0.getValueType() == N->getValueType(0)) + if (SrcVT == VT) return N0; // fold (truncate (truncate x)) -> (truncate x) @@ -9740,7 +10662,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // trunc (select c, a, b) -> select c, (trunc a), (trunc b) if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) { - EVT SrcVT = N0.getValueType(); if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) && TLI.isTruncateFree(SrcVT, VT)) { SDLoc SL(N0); @@ -9753,7 +10674,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits() if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() && - (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) && + (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) && TLI.isTypeDesirableForOp(ISD::SHL, VT)) { SDValue Amt = N0.getOperand(1); KnownBits Known = DAG.computeKnownBits(Amt); @@ -9771,6 +10692,19 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { } } + // Attempt to pre-truncate BUILD_VECTOR sources. + if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations && + TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType())) { + SDLoc DL(N); + EVT SVT = VT.getScalarType(); + SmallVector<SDValue, 8> TruncOps; + for (const SDValue &Op : N0->op_values()) { + SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op); + TruncOps.push_back(TruncOp); + } + return DAG.getBuildVector(VT, DL, TruncOps); + } + // Fold a series of buildvector, bitcast, and truncate if possible. // For example fold // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to @@ -9906,7 +10840,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // When the adde's carry is not used. if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) && N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) && - (!LegalOperations || TLI.isOperationLegal(N0.getOpcode(), VT))) { + // We only do for addcarry before legalize operation + ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) || + TLI.isOperationLegal(N0.getOpcode(), VT))) { SDLoc SL(N); auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0)); auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1)); @@ -10070,14 +11006,17 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { return DAG.getUNDEF(VT); // If the input is a BUILD_VECTOR with all constant elements, fold this now. - // Only do this before legalize types, since we might create an illegal - // scalar type. Even if we knew we wouldn't create an illegal scalar type - // we can only do this before legalize ops, since the target maybe - // depending on the bitcast. + // Only do this before legalize types, unless both types are integer and the + // scalar type is legal. Only do this before legalize ops, since the target + // maybe depending on the bitcast. // First check to see if this is all constant. - if (!LegalTypes && + // TODO: Support FP bitcasts after legalize types. + if (VT.isVector() && + (!LegalTypes || + (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() && + TLI.isTypeLegal(VT.getVectorElementType()))) && N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() && - VT.isVector() && cast<BuildVectorSDNode>(N0)->isConstant()) + cast<BuildVectorSDNode>(N0)->isConstant()) return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), VT.getVectorElementType()); @@ -10113,18 +11052,14 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // as we assume software couldn't rely on the number of accesses of an // illegal type. ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || - TLI.isOperationLegal(ISD::LOAD, VT)) && - TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) { + TLI.isOperationLegal(ISD::LOAD, VT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - unsigned OrigAlign = LN0->getAlignment(); - bool Fast = false; - if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, - LN0->getAddressSpace(), OrigAlign, &Fast) && - Fast) { + if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG, + *LN0->getMemOperand())) { SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(), - LN0->getPointerInfo(), OrigAlign, + LN0->getPointerInfo(), LN0->getAlignment(), LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); return Load; @@ -11071,15 +12006,17 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // fold (fadd A, (fneg B)) -> (fsub A, B) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && - isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2) + isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize) == 2) return DAG.getNode(ISD::FSUB, DL, VT, N0, - GetNegatedExpression(N1, DAG, LegalOperations), Flags); + GetNegatedExpression(N1, DAG, LegalOperations, + ForCodeSize), Flags); // fold (fadd (fneg A), B) -> (fsub B, A) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && - isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2) + isNegatibleForFree(N0, LegalOperations, TLI, &Options, ForCodeSize) == 2) return DAG.getNode(ISD::FSUB, DL, VT, N1, - GetNegatedExpression(N0, DAG, LegalOperations), Flags); + GetNegatedExpression(N0, DAG, LegalOperations, + ForCodeSize), Flags); auto isFMulNegTwo = [](SDValue FMul) { if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL) @@ -11105,8 +12042,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // Selection pass has a hard time dealing with FP constants. bool AllowNewConst = (Level < AfterLegalizeDAG); - // If 'unsafe math' or nnan is enabled, fold lots of things. - if ((Options.UnsafeFPMath || Flags.hasNoNaNs()) && AllowNewConst) { + // If nnan is enabled, fold lots of things. + if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) { // If allowed, fold (fadd (fneg x), x) -> 0.0 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) return DAG.getConstantFP(0.0, DL, VT); @@ -11246,16 +12183,20 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { if (N0 == N1) { // (fsub x, x) -> 0.0 - if (Options.UnsafeFPMath || Flags.hasNoNaNs()) + if (Options.NoNaNsFPMath || Flags.hasNoNaNs()) return DAG.getConstantFP(0.0f, DL, VT); } // (fsub -0.0, N1) -> -N1 + // NOTE: It is safe to transform an FSUB(-0.0,X) into an FNEG(X), since the + // FSUB does not specify the sign bit of a NaN. Also note that for + // the same reason, the inverse transform is not safe, unless fast math + // flags are in play. if (N0CFP && N0CFP->isZero()) { if (N0CFP->isNegative() || (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) { - if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) - return GetNegatedExpression(N1, DAG, LegalOperations); + if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize)) + return GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize); if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags); } @@ -11273,9 +12214,10 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } // fold (fsub A, (fneg B)) -> (fadd A, B) - if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) + if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize)) return DAG.getNode(ISD::FADD, DL, VT, N0, - GetNegatedExpression(N1, DAG, LegalOperations), Flags); + GetNegatedExpression(N1, DAG, LegalOperations, + ForCodeSize), Flags); // FSUB -> FMA combines: if (SDValue Fused = visitFSUBForFMACombine(N)) { @@ -11319,7 +12261,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; - if (Options.UnsafeFPMath || + if ((Options.NoNaNsFPMath && Options.NoSignedZerosFPMath) || (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) { // fold (fmul A, 0) -> 0 if (N1CFP && N1CFP->isZero()) @@ -11361,14 +12303,18 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { return DAG.getNode(ISD::FNEG, DL, VT, N0); // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) - if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) { - if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) { + if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options, + ForCodeSize)) { + if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options, + ForCodeSize)) { // Both can be negated for free, check to see if at least one is cheaper // negated. if (LHSNeg == 2 || RHSNeg == 2) return DAG.getNode(ISD::FMUL, DL, VT, - GetNegatedExpression(N0, DAG, LegalOperations), - GetNegatedExpression(N1, DAG, LegalOperations), + GetNegatedExpression(N0, DAG, LegalOperations, + ForCodeSize), + GetNegatedExpression(N1, DAG, LegalOperations, + ForCodeSize), Flags); } } @@ -11506,7 +12452,8 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { // fma (fneg x), K, y -> fma x -K, y if (N0.getOpcode() == ISD::FNEG && (TLI.isOperationLegal(ISD::ConstantFP, VT) || - (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT)))) { + (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT, + ForCodeSize)))) { return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0), DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2); } @@ -11541,22 +12488,33 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL". SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) { + // TODO: Limit this transform based on optsize/minsize - it always creates at + // least 1 extra instruction. But the perf win may be substantial enough + // that only minsize should restrict this. bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath; const SDNodeFlags Flags = N->getFlags(); if (!UnsafeMath && !Flags.hasAllowReciprocal()) return SDValue(); - // Skip if current node is a reciprocal. + // Skip if current node is a reciprocal/fneg-reciprocal. SDValue N0 = N->getOperand(0); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); - if (N0CFP && N0CFP->isExactlyValue(1.0)) + ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true); + if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0))) return SDValue(); // Exit early if the target does not want this transform or if there can't // possibly be enough uses of the divisor to make the transform worthwhile. SDValue N1 = N->getOperand(1); unsigned MinUses = TLI.combineRepeatedFPDivisors(); - if (!MinUses || N1->use_size() < MinUses) + + // For splat vectors, scale the number of uses by the splat factor. If we can + // convert the division into a scalar op, that will likely be much faster. + unsigned NumElts = 1; + EVT VT = N->getValueType(0); + if (VT.isVector() && DAG.isSplatValue(N1)) + NumElts = VT.getVectorNumElements(); + + if (!MinUses || (N1->use_size() * NumElts) < MinUses) return SDValue(); // Find all FDIV users of the same divisor. @@ -11573,10 +12531,9 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) { // Now that we have the actual number of divisor uses, make sure it meets // the minimum threshold specified by the target. - if (Users.size() < MinUses) + if ((Users.size() * NumElts) < MinUses) return SDValue(); - EVT VT = N->getValueType(0); SDLoc DL(N); SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags); @@ -11619,6 +12576,9 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; + if (SDValue V = combineRepeatedFPDivisors(N)) + return V; + if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) { // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. if (N1CFP) { @@ -11634,7 +12594,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // backend)... we should handle this gracefully after Legalize. // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) || TLI.isOperationLegal(ISD::ConstantFP, VT) || - TLI.isFPImmLegal(Recip, VT))) + TLI.isFPImmLegal(Recip, VT, ForCodeSize))) return DAG.getNode(ISD::FMUL, DL, VT, N0, DAG.getConstantFP(Recip, DL, VT), Flags); } @@ -11692,21 +12652,22 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { } // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) - if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) { - if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) { + if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options, + ForCodeSize)) { + if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options, + ForCodeSize)) { // Both can be negated for free, check to see if at least one is cheaper // negated. if (LHSNeg == 2 || RHSNeg == 2) return DAG.getNode(ISD::FDIV, SDLoc(N), VT, - GetNegatedExpression(N0, DAG, LegalOperations), - GetNegatedExpression(N1, DAG, LegalOperations), + GetNegatedExpression(N0, DAG, LegalOperations, + ForCodeSize), + GetNegatedExpression(N1, DAG, LegalOperations, + ForCodeSize), Flags); } } - if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N)) - return CombineRepeatedDivisors; - return SDValue(); } @@ -11838,18 +12799,24 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) { return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags); } - // Try to convert x ** (1/4) into square roots. + // Try to convert x ** (1/4) and x ** (3/4) into square roots. // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case. // TODO: This could be extended (using a target hook) to handle smaller // power-of-2 fractional exponents. - if (ExponentC->getValueAPF().isExactlyValue(0.25)) { + bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25); + bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75); + if (ExponentIs025 || ExponentIs075) { // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0. // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN. + // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0. + // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN. // For regular numbers, rounding may cause the results to differ. // Therefore, we require { nsz ninf afn } for this transform. // TODO: We could select out the special cases if we don't have nsz/ninf. SDNodeFlags Flags = N->getFlags(); - if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || + + // We only need no signed zeros for the 0.25 case. + if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() || !Flags.hasApproximateFuncs()) return SDValue(); @@ -11859,13 +12826,17 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) { // Assume that libcalls are the smallest code. // TODO: This restriction should probably be lifted for vectors. - if (DAG.getMachineFunction().getFunction().optForSize()) + if (DAG.getMachineFunction().getFunction().hasOptSize()) return SDValue(); // pow(X, 0.25) --> sqrt(sqrt(X)) SDLoc DL(N); SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags); - return DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags); + SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags); + if (ExponentIs025) + return SqrtSqrt; + // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X)) + return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt, Flags); } return SDValue(); @@ -11911,6 +12882,10 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { EVT VT = N->getValueType(0); EVT OpVT = N0.getValueType(); + // [us]itofp(undef) = 0, because the result value is bounded. + if (N0.isUndef()) + return DAG.getConstantFP(0.0, SDLoc(N), VT); + // fold (sint_to_fp c1) -> c1fp if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && // ...but only if the target supports immediate floating-point values @@ -11968,6 +12943,10 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { EVT VT = N->getValueType(0); EVT OpVT = N0.getValueType(); + // [us]itofp(undef) = 0, because the result value is bounded. + if (N0.isUndef()) + return DAG.getConstantFP(0.0, SDLoc(N), VT); + // fold (uint_to_fp c1) -> c1fp if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && // ...but only if the target supports immediate floating-point values @@ -12051,6 +13030,10 @@ SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + // fold (fp_to_sint undef) -> undef + if (N0.isUndef()) + return DAG.getUNDEF(VT); + // fold (fp_to_sint c1fp) -> c1 if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0); @@ -12062,6 +13045,10 @@ SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + // fold (fp_to_uint undef) -> undef + if (N0.isUndef()) + return DAG.getUNDEF(VT); + // fold (fp_to_uint c1fp) -> c1 if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0); @@ -12250,8 +13237,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), - &DAG.getTarget().Options)) - return GetNegatedExpression(N0, DAG, LegalOperations); + &DAG.getTarget().Options, ForCodeSize)) + return GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading // constant pool values. @@ -12287,7 +13274,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { APFloat CVal = CFP1->getValueAPF(); CVal.changeSign(); if (Level >= AfterLegalizeDAG && - (TLI.isFPImmLegal(CVal, VT) || + (TLI.isFPImmLegal(CVal, VT, ForCodeSize) || TLI.isOperationLegal(ISD::ConstantFP, VT))) return DAG.getNode( ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), @@ -12556,6 +13543,7 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, TargetLowering::AddrMode AM; if (N->getOpcode() == ISD::ADD) { + AM.HasBaseReg = true; ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); if (Offset) // [reg +/- imm] @@ -12564,6 +13552,7 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, // [reg +/- reg] AM.Scale = 1; } else if (N->getOpcode() == ISD::SUB) { + AM.HasBaseReg = true; ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); if (Offset) // [reg +/- imm] @@ -12653,7 +13642,13 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // Check #2. if (!isLoad) { SDValue Val = cast<StoreSDNode>(N)->getValue(); - if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode())) + + // Would require a copy. + if (Val == BasePtr) + return false; + + // Would create a cycle. + if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode())) return false; } @@ -13190,7 +14185,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { if (LD->isUnindexed()) { // Walk up chain skipping non-aliasing memory nodes. - SDValue BetterChain = FindBetterChain(N, Chain); + SDValue BetterChain = FindBetterChain(LD, Chain); // If there is a better chain. if (Chain != BetterChain) { @@ -13378,7 +14373,7 @@ struct LoadedSlice { /// Get the alignment of the load used for this slice. unsigned getAlignment() const { unsigned Alignment = Origin->getAlignment(); - unsigned Offset = getOffsetFromBase(); + uint64_t Offset = getOffsetFromBase(); if (Offset != 0) Alignment = MinAlign(Alignment, Alignment + Offset); return Alignment; @@ -13500,9 +14495,11 @@ struct LoadedSlice { assert(DAG && "Missing context"); const TargetLowering &TLI = DAG->getTargetLoweringInfo(); EVT ResVT = Use->getValueType(0); - const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT()); + const TargetRegisterClass *ResRC = + TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent()); const TargetRegisterClass *ArgRC = - TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT()); + TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(), + Use->getOperand(0)->isDivergent()); if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT)) return false; @@ -13826,7 +14823,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result; // For narrowing to be valid, it must be the case that the load the - // immediately preceeding memory operation before the store. + // immediately preceding memory operation before the store. if (LD == Chain.getNode()) ; // ok. else if (Chain->getOpcode() == ISD::TokenFactor && @@ -14039,11 +15036,9 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { /// load / store operations if the target deems the transformation profitable. SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { StoreSDNode *ST = cast<StoreSDNode>(N); - SDValue Chain = ST->getChain(); SDValue Value = ST->getValue(); if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) && - Value.hasOneUse() && - Chain == SDValue(Value.getNode(), 1)) { + Value.hasOneUse()) { LoadSDNode *LD = cast<LoadSDNode>(Value); EVT VT = LD->getMemoryVT(); if (!VT.isFloatingPoint() || @@ -14073,7 +15068,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { LD->getPointerInfo(), LDAlign); SDValue NewST = - DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(), + DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(), ST->getPointerInfo(), STAlign); AddToWorklist(NewLD.getNode()); @@ -14171,14 +15166,14 @@ SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes, Visited.insert(StoreNodes[i].MemNode); } - // don't include nodes that are children + // don't include nodes that are children or repeated nodes. for (unsigned i = 0; i < NumStores; ++i) { - if (Visited.count(StoreNodes[i].MemNode->getChain().getNode()) == 0) + if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second) Chains.push_back(StoreNodes[i].MemNode->getChain()); } assert(Chains.size() > 0 && "Chain should have generated a chain"); - return DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, Chains); + return DAG.getTokenFactor(StoreDL, Chains); } bool DAGCombiner::MergeStoresOfConstantsOrVecElts( @@ -14372,15 +15367,19 @@ void DAGCombiner::getStoreMergeCandidates( // Loads must only have one use. if (!Ld->hasNUsesOfValue(1, 0)) return; - // The memory operands must not be volatile. + // The memory operands must not be volatile/indexed. if (Ld->isVolatile() || Ld->isIndexed()) return; } auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr, int64_t &Offset) -> bool { + // The memory operands must not be volatile/indexed. if (Other->isVolatile() || Other->isIndexed()) return false; - SDValue Val = peekThroughBitcasts(Other->getValue()); + // Don't mix temporal stores with non-temporal stores. + if (St->isNonTemporal() != Other->isNonTemporal()) + return false; + SDValue OtherBC = peekThroughBitcasts(Other->getValue()); // Allow merging constants of different types as integers. bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT()) : Other->getMemoryVT() != MemVT; @@ -14388,16 +15387,19 @@ void DAGCombiner::getStoreMergeCandidates( if (NoTypeMatch) return false; // The Load's Base Ptr must also match - if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) { - auto LPtr = BaseIndexOffset::match(OtherLd, DAG); + if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(OtherBC)) { + BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG); if (LoadVT != OtherLd->getMemoryVT()) return false; // Loads must only have one use. if (!OtherLd->hasNUsesOfValue(1, 0)) return false; - // The memory operands must not be volatile. + // The memory operands must not be volatile/indexed. if (OtherLd->isVolatile() || OtherLd->isIndexed()) return false; + // Don't mix temporal loads with non-temporal loads. + if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal()) + return false; if (!(LBasePtr.equalBaseIndex(LPtr, DAG))) return false; } else @@ -14406,17 +15408,17 @@ void DAGCombiner::getStoreMergeCandidates( if (IsConstantSrc) { if (NoTypeMatch) return false; - if (!(isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val))) + if (!(isa<ConstantSDNode>(OtherBC) || isa<ConstantFPSDNode>(OtherBC))) return false; } if (IsExtractVecSrc) { // Do not merge truncated stores here. if (Other->isTruncatingStore()) return false; - if (!MemVT.bitsEq(Val.getValueType())) + if (!MemVT.bitsEq(OtherBC.getValueType())) return false; - if (Val.getOpcode() != ISD::EXTRACT_VECTOR_ELT && - Val.getOpcode() != ISD::EXTRACT_SUBVECTOR) + if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT && + OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR) return false; } Ptr = BaseIndexOffset::match(Other, DAG); @@ -14441,9 +15443,11 @@ void DAGCombiner::getStoreMergeCandidates( RootNode = St->getChain().getNode(); + unsigned NumNodesExplored = 0; if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) { RootNode = Ldn->getChain().getNode(); - for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I) + for (auto I = RootNode->use_begin(), E = RootNode->use_end(); + I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored) if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2) if (I2.getOperandNo() == 0) @@ -14454,7 +15458,8 @@ void DAGCombiner::getStoreMergeCandidates( StoreNodes.push_back(MemOpLink(OtherST, PtrDiff)); } } else - for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I) + for (auto I = RootNode->use_begin(), E = RootNode->use_end(); + I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored) if (I.getOperandNo() == 0) if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) { BaseIndexOffset Ptr; @@ -14551,6 +15556,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { isa<ConstantFPSDNode>(StoredVal); bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT || StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR); + bool IsNonTemporalStore = St->isNonTemporal(); + bool IsNonTemporalLoad = + IsLoadSrc && cast<LoadSDNode>(StoredVal)->isNonTemporal(); if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc) return false; @@ -14652,8 +15660,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { if (TLI.isTypeLegal(StoreTy) && TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && - TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, - FirstStoreAlign, &IsFast) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstInChain->getMemOperand(), &IsFast) && IsFast) { LastIntegerTrunc = false; LastLegalType = i + 1; @@ -14664,8 +15672,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) && TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) && - TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, - FirstStoreAlign, &IsFast) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstInChain->getMemOperand(), + &IsFast) && IsFast) { LastIntegerTrunc = true; LastLegalType = i + 1; @@ -14683,8 +15692,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) && TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && - TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, - FirstStoreAlign, &IsFast) && + TLI.allowsMemoryAccess( + Context, DL, Ty, *FirstInChain->getMemOperand(), &IsFast) && IsFast) LastLegalVectorType = i + 1; } @@ -14755,8 +15764,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && - TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, - FirstStoreAlign, &IsFast) && + TLI.allowsMemoryAccess(Context, DL, Ty, + *FirstInChain->getMemOperand(), &IsFast) && IsFast) NumStoresToMerge = i + 1; } @@ -14847,7 +15856,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { unsigned FirstStoreAS = FirstInChain->getAddressSpace(); unsigned FirstStoreAlign = FirstInChain->getAlignment(); LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode); - unsigned FirstLoadAS = FirstLoad->getAddressSpace(); unsigned FirstLoadAlign = FirstLoad->getAlignment(); // Scan the memory operations on the chain and find the first @@ -14887,11 +15895,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { bool IsFastSt, IsFastLd; if (TLI.isTypeLegal(StoreTy) && TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && - TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, - FirstStoreAlign, &IsFastSt) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstInChain->getMemOperand(), &IsFastSt) && IsFastSt && - TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, - FirstLoadAlign, &IsFastLd) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstLoad->getMemOperand(), &IsFastLd) && IsFastLd) { LastLegalVectorType = i + 1; } @@ -14901,11 +15909,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { StoreTy = EVT::getIntegerVT(Context, SizeInBits); if (TLI.isTypeLegal(StoreTy) && TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && - TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, - FirstStoreAlign, &IsFastSt) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstInChain->getMemOperand(), &IsFastSt) && IsFastSt && - TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, - FirstLoadAlign, &IsFastLd) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstLoad->getMemOperand(), &IsFastLd) && IsFastLd) { LastLegalIntegerType = i + 1; DoIntegerTruncate = false; @@ -14920,11 +15928,12 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) && TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) && - TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, - FirstStoreAlign, &IsFastSt) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstInChain->getMemOperand(), + &IsFastSt) && IsFastSt && - TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, - FirstLoadAlign, &IsFastLd) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstLoad->getMemOperand(), &IsFastLd) && IsFastLd) { LastLegalIntegerType = i + 1; DoIntegerTruncate = true; @@ -14994,26 +16003,32 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem); AddToWorklist(NewStoreChain.getNode()); - MachineMemOperand::Flags MMOFlags = + MachineMemOperand::Flags LdMMOFlags = isDereferenceable ? MachineMemOperand::MODereferenceable : MachineMemOperand::MONone; + if (IsNonTemporalLoad) + LdMMOFlags |= MachineMemOperand::MONonTemporal; + + MachineMemOperand::Flags StMMOFlags = + IsNonTemporalStore ? MachineMemOperand::MONonTemporal + : MachineMemOperand::MONone; SDValue NewLoad, NewStore; if (UseVectorTy || !DoIntegerTruncate) { NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(), - FirstLoadAlign, MMOFlags); + FirstLoadAlign, LdMMOFlags); NewStore = DAG.getStore( NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), - FirstInChain->getPointerInfo(), FirstStoreAlign); + FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags); } else { // This must be the truncstore/extload case EVT ExtendedTy = TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT); NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, FirstLoad->getChain(), FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(), JointMemOpVT, - FirstLoadAlign, MMOFlags); + FirstLoadAlign, LdMMOFlags); NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), @@ -15168,16 +16183,11 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // illegal type. if (((!LegalOperations && !ST->isVolatile()) || TLI.isOperationLegal(ISD::STORE, SVT)) && - TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) { - unsigned OrigAlign = ST->getAlignment(); - bool Fast = false; - if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT, - ST->getAddressSpace(), OrigAlign, &Fast) && - Fast) { - return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr, - ST->getPointerInfo(), OrigAlign, - ST->getMemOperand()->getFlags(), ST->getAAInfo()); - } + TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT, + DAG, *ST->getMemOperand())) { + return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr, + ST->getPointerInfo(), ST->getAlignment(), + ST->getMemOperand()->getFlags(), ST->getAAInfo()); } } @@ -15205,6 +16215,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (SDValue NewST = TransformFPLoadStorePair(N)) return NewST; + // Try transforming several stores into STORE (BSWAP). + if (SDValue Store = MatchStoreCombine(ST)) + return Store; + if (ST->isUnindexed()) { // Walk up chain skipping non-aliasing memory nodes, on this store and any // adjacent stores. @@ -15221,23 +16235,22 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { Value.getValueType().isInteger() && (!isa<ConstantSDNode>(Value) || !cast<ConstantSDNode>(Value)->isOpaque())) { + APInt TruncDemandedBits = + APInt::getLowBitsSet(Value.getScalarValueSizeInBits(), + ST->getMemoryVT().getScalarSizeInBits()); + // See if we can simplify the input to this truncstore with knowledge that // only the low bits are being used. For example: // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" - SDValue Shorter = DAG.GetDemandedBits( - Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(), - ST->getMemoryVT().getScalarSizeInBits())); + SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits); AddToWorklist(Value.getNode()); - if (Shorter.getNode()) - return DAG.getTruncStore(Chain, SDLoc(N), Shorter, - Ptr, ST->getMemoryVT(), ST->getMemOperand()); + if (Shorter) + return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(), + ST->getMemOperand()); // Otherwise, see if we can simplify the operation with // SimplifyDemandedBits, which only works if the value has a single use. - if (SimplifyDemandedBits( - Value, - APInt::getLowBitsSet(Value.getScalarValueSizeInBits(), - ST->getMemoryVT().getScalarSizeInBits()))) { + if (SimplifyDemandedBits(Value, TruncDemandedBits)) { // Re-visit the store if anything changed and the store hasn't been merged // with another node (N is deleted) SimplifyDemandedBits will add Value's // node back to the worklist if necessary, but we also need to re-visit @@ -15263,25 +16276,55 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) { if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() && - !ST1->isVolatile() && ST1->getBasePtr() == Ptr && - ST->getMemoryVT() == ST1->getMemoryVT()) { - // If this is a store followed by a store with the same value to the same - // location, then the store is dead/noop. - if (ST1->getValue() == Value) { - // The store is dead, remove it. + !ST1->isVolatile()) { + if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value && + ST->getMemoryVT() == ST1->getMemoryVT()) { + // If this is a store followed by a store with the same value to the + // same location, then the store is dead/noop. return Chain; } - // If this is a store who's preceeding store to the same location - // and no one other node is chained to that store we can effectively - // drop the store. Do not remove stores to undef as they may be used as - // data sinks. if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() && !ST1->getBasePtr().isUndef()) { - // ST1 is fully overwritten and can be elided. Combine with it's chain - // value. - CombineTo(ST1, ST1->getChain()); - return SDValue(); + const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG); + const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG); + unsigned STBitSize = ST->getMemoryVT().getSizeInBits(); + unsigned ChainBitSize = ST1->getMemoryVT().getSizeInBits(); + // If this is a store who's preceding store to a subset of the current + // location and no one other node is chained to that store we can + // effectively drop the store. Do not remove stores to undef as they may + // be used as data sinks. + if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) { + CombineTo(ST1, ST1->getChain()); + return SDValue(); + } + + // If ST stores to a subset of preceding store's write set, we may be + // able to fold ST's value into the preceding stored value. As we know + // the other uses of ST1's chain are unconcerned with ST, this folding + // will not affect those nodes. + int64_t BitOffset; + if (ChainBase.contains(DAG, ChainBitSize, STBase, STBitSize, + BitOffset)) { + SDValue ChainValue = ST1->getValue(); + if (auto *C1 = dyn_cast<ConstantSDNode>(ChainValue)) { + if (auto *C = dyn_cast<ConstantSDNode>(Value)) { + APInt Val = C1->getAPIntValue(); + APInt InsertVal = C->getAPIntValue().zextOrTrunc(STBitSize); + // FIXME: Handle Big-endian mode. + if (!DAG.getDataLayout().isBigEndian()) { + Val.insertBits(InsertVal, BitOffset); + SDValue NewSDVal = + DAG.getConstant(Val, SDLoc(C), ChainValue.getValueType(), + C1->isTargetOpcode(), C1->isOpaque()); + SDNode *NewST1 = DAG.UpdateNodeOperands( + ST1, ST1->getChain(), NewSDVal, ST1->getOperand(2), + ST1->getOperand(3)); + return CombineTo(ST, SDValue(NewST1, 0)); + } + } + } + } // End ST subset of ST1 case. } } } @@ -15299,7 +16342,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // Always perform this optimization before types are legal. If the target // prefers, also try this after legalization to catch stores that were created // by intrinsics or other nodes. - if (!LegalTypes || (TLI.mergeStoresAfterLegalization())) { + if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) { while (true) { // There can be multiple store sequences on the same chain. // Keep trying to merge store sequences until we are unable to do so @@ -15333,6 +16376,54 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { return ReduceLoadOpStoreWidth(N); } +SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) { + const auto *LifetimeEnd = cast<LifetimeSDNode>(N); + if (!LifetimeEnd->hasOffset()) + return SDValue(); + + const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(), + LifetimeEnd->getOffset(), false); + + // We walk up the chains to find stores. + SmallVector<SDValue, 8> Chains = {N->getOperand(0)}; + while (!Chains.empty()) { + SDValue Chain = Chains.back(); + Chains.pop_back(); + if (!Chain.hasOneUse()) + continue; + switch (Chain.getOpcode()) { + case ISD::TokenFactor: + for (unsigned Nops = Chain.getNumOperands(); Nops;) + Chains.push_back(Chain.getOperand(--Nops)); + break; + case ISD::LIFETIME_START: + case ISD::LIFETIME_END: + // We can forward past any lifetime start/end that can be proven not to + // alias the node. + if (!isAlias(Chain.getNode(), N)) + Chains.push_back(Chain.getOperand(0)); + break; + case ISD::STORE: { + StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain); + if (ST->isVolatile() || ST->isIndexed()) + continue; + const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG); + // If we store purely within object bounds just before its lifetime ends, + // we can remove the store. + if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase, + ST->getMemoryVT().getStoreSizeInBits())) { + LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump(); + dbgs() << "\nwithin LIFETIME_END of : "; + LifetimeEndBase.dump(); dbgs() << "\n"); + CombineTo(ST, ST->getChain()); + return SDValue(N, 0); + } + } + } + } + return SDValue(); +} + /// For the instruction sequence of store below, F and I values /// are bundled together as an i64 value before being stored into memory. /// Sometimes it is more efficent to generate separate stores for F and I, @@ -15616,7 +16707,9 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, Offset = DAG.getNode( ISD::MUL, DL, PtrType, Offset, DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType)); - MPI = OriginalLoad->getPointerInfo(); + // Discard the pointer info except the address space because the memory + // operand can't represent this new access since the offset is variable. + MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace()); } NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset); @@ -15668,14 +16761,15 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, /// the math/logic after an extract element of a vector. static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG, bool LegalOperations) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Vec = ExtElt->getOperand(0); SDValue Index = ExtElt->getOperand(1); auto *IndexC = dyn_cast<ConstantSDNode>(Index); - if (!IndexC || !ISD::isBinaryOp(Vec.getNode()) || !Vec.hasOneUse()) + if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() || + Vec.getNode()->getNumValues() != 1) return SDValue(); // Targets may want to avoid this to prevent an expensive register transfer. - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!TLI.shouldScalarizeBinop(Vec)) return SDValue(); @@ -16073,7 +17167,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N, ArrayRef<int> VectorMask, SDValue VecIn1, SDValue VecIn2, - unsigned LeftIdx) { + unsigned LeftIdx, bool DidSplitVec) { MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy); @@ -16081,17 +17175,12 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N, EVT InVT1 = VecIn1.getValueType(); EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1; - unsigned Vec2Offset = 0; unsigned NumElems = VT.getVectorNumElements(); unsigned ShuffleNumElems = NumElems; - // In case both the input vectors are extracted from same base - // vector we do not need extra addend (Vec2Offset) while - // computing shuffle mask. - if (!VecIn2 || !(VecIn1.getOpcode() == ISD::EXTRACT_SUBVECTOR) || - !(VecIn2.getOpcode() == ISD::EXTRACT_SUBVECTOR) || - !(VecIn1.getOperand(0) == VecIn2.getOperand(0))) - Vec2Offset = InVT1.getVectorNumElements(); + // If we artificially split a vector in two already, then the offsets in the + // operands will all be based off of VecIn1, even those in VecIn2. + unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements(); // We can't generate a shuffle node with mismatched input and output types. // Try to make the types match the type of the output. @@ -16214,23 +17303,29 @@ static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) { // The build vector contains some number of undef elements and exactly // one other element. That other element must be a zero-extended scalar // extracted from a vector at a constant index to turn this into a shuffle. + // Also, require that the build vector does not implicitly truncate/extend + // its elements. // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND. + EVT VT = BV->getValueType(0); SDValue Zext = BV->getOperand(ZextElt); if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() || Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT || - !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1))) + !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) || + Zext.getValueSizeInBits() != VT.getScalarSizeInBits()) return SDValue(); - // The zero-extend must be a multiple of the source size. + // The zero-extend must be a multiple of the source size, and we must be + // building a vector of the same size as the source of the extract element. SDValue Extract = Zext.getOperand(0); unsigned DestSize = Zext.getValueSizeInBits(); unsigned SrcSize = Extract.getValueSizeInBits(); - if (DestSize % SrcSize != 0) + if (DestSize % SrcSize != 0 || + Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits()) return SDValue(); // Create a shuffle mask that will combine the extracted element with zeros // and undefs. - int ZextRatio = DestSize / SrcSize; + int ZextRatio = DestSize / SrcSize; int NumMaskElts = NumBVOps * ZextRatio; SmallVector<int, 32> ShufMask(NumMaskElts, -1); for (int i = 0; i != NumMaskElts; ++i) { @@ -16260,7 +17355,7 @@ static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) { SDValue ZeroVec = DAG.getConstant(0, DL, VecVT); SDValue Shuf = DAG.getVectorShuffle(VecVT, DL, Extract.getOperand(0), ZeroVec, ShufMask); - return DAG.getBitcast(BV->getValueType(0), Shuf); + return DAG.getBitcast(VT, Shuf); } // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT @@ -16316,7 +17411,7 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { return SDValue(); SDValue ExtractedFromVec = Op.getOperand(0); - APInt ExtractIdx = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue(); + const APInt &ExtractIdx = Op.getConstantOperandAPInt(1); if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements())) return SDValue(); @@ -16344,6 +17439,7 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { // vector, then split the vector efficiently based on the maximum // vector access index and adjust the VectorMask and // VecIn accordingly. + bool DidSplitVec = false; if (VecIn.size() == 2) { unsigned MaxIndex = 0; unsigned NearestPow2 = 0; @@ -16374,6 +17470,7 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { VecIn.pop_back(); VecIn.push_back(VecIn1); VecIn.push_back(VecIn2); + DidSplitVec = true; for (unsigned i = 0; i < NumElems; i++) { if (VectorMask[i] <= 0) @@ -16411,7 +17508,7 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue(); if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft, - VecRight, LeftIdx)) + VecRight, LeftIdx, DidSplitVec)) Shuffles.push_back(Shuffle); else return SDValue(); @@ -16477,18 +17574,20 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { // Try to turn a build vector of zero extends of extract vector elts into a // a vector zero extend and possibly an extract subvector. -// TODO: Support sign extend or any extend? +// TODO: Support sign extend? // TODO: Allow undef elements? -// TODO: Don't require the extracts to start at element 0. SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) { if (LegalOperations) return SDValue(); EVT VT = N->getValueType(0); + bool FoundZeroExtend = false; SDValue Op0 = N->getOperand(0); auto checkElem = [&](SDValue Op) -> int64_t { - if (Op.getOpcode() == ISD::ZERO_EXTEND && + unsigned Opc = Op.getOpcode(); + FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND); + if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) && Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT && Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0)) if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1))) @@ -16520,7 +17619,8 @@ SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) { SDLoc DL(N); In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In, Op0.getOperand(0).getOperand(1)); - return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, In); + return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL, + VT, In); } SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { @@ -16885,14 +17985,14 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { return SDValue(); } - unsigned IdentityIndex = i * PartNumElem; - ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + auto *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1)); // The extract index must be constant. if (!CS) return SDValue(); // Check that we are reading from the identity index. - if (CS->getZExtValue() != IdentityIndex) + unsigned IdentityIndex = i * PartNumElem; + if (CS->getAPIntValue() != IdentityIndex) return SDValue(); } @@ -16902,12 +18002,59 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { return SDValue(); } +static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract, + SelectionDAG &DAG) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue BinOp = Extract->getOperand(0); + unsigned BinOpcode = BinOp.getOpcode(); + if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1) + return SDValue(); + + SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1); + SDValue Index = Extract->getOperand(1); + EVT VT = Extract->getValueType(0); + + // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find + // if the source subvector is the same type as the one being extracted. + auto GetSubVector = [VT, Index](SDValue V) -> SDValue { + if (V.getOpcode() == ISD::INSERT_SUBVECTOR && + V.getOperand(1).getValueType() == VT && V.getOperand(2) == Index) { + return V.getOperand(1); + } + auto *IndexC = dyn_cast<ConstantSDNode>(Index); + if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS && + V.getOperand(0).getValueType() == VT && + (IndexC->getZExtValue() % VT.getVectorNumElements()) == 0) { + uint64_t SubIdx = IndexC->getZExtValue() / VT.getVectorNumElements(); + return V.getOperand(SubIdx); + } + return SDValue(); + }; + SDValue Sub0 = GetSubVector(Bop0); + SDValue Sub1 = GetSubVector(Bop1); + + // TODO: We could handle the case where only 1 operand is being inserted by + // creating an extract of the other operand, but that requires checking + // number of uses and/or costs. + if (!Sub0 || !Sub1 || !TLI.isOperationLegalOrCustom(BinOpcode, VT)) + return SDValue(); + + // We are inserting both operands of the wide binop only to extract back + // to the narrow vector size. Eliminate all of the insert/extract: + // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y + return DAG.getNode(BinOpcode, SDLoc(Extract), VT, Sub0, Sub1, + BinOp->getFlags()); +} + /// If we are extracting a subvector produced by a wide binary operator try /// to use a narrow binary operator and/or avoid concatenation and extraction. static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) { // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share // some of these bailouts with other transforms. + if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG)) + return V; + // The extract index must be a constant, so we can map it to a concat operand. auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1)); if (!ExtractIndexC) @@ -16915,8 +18062,10 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) { // We are looking for an optionally bitcasted wide vector binary operator // feeding an extract subvector. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0)); - if (!ISD::isBinaryOp(BinOp.getNode())) + unsigned BOpcode = BinOp.getOpcode(); + if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1) return SDValue(); // The binop must be a vector type, so we can extract some fraction of it. @@ -16945,8 +18094,6 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) { // Bail out if the target does not support a narrower version of the binop. EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(), WideNumElts / NarrowingRatio); - unsigned BOpcode = BinOp.getOpcode(); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT)) return SDValue(); @@ -16986,35 +18133,35 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) { // We need at least one concatenation operation of a binop operand to make // this transform worthwhile. The concat must double the input vector sizes. - // TODO: Should we also handle INSERT_SUBVECTOR patterns? - SDValue LHS = peekThroughBitcasts(BinOp.getOperand(0)); - SDValue RHS = peekThroughBitcasts(BinOp.getOperand(1)); - bool ConcatL = - LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2; - bool ConcatR = - RHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getNumOperands() == 2; - if (!ConcatL && !ConcatR) + auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue { + if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2) + return V.getOperand(ConcatOpNum); return SDValue(); + }; + SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0))); + SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1))); + + if (SubVecL || SubVecR) { + // If a binop operand was not the result of a concat, we must extract a + // half-sized operand for our new narrow binop: + // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN + // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC) + // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN + SDLoc DL(Extract); + SDValue IndexC = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT); + SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL) + : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, + BinOp.getOperand(0), IndexC); - // If one of the binop operands was not the result of a concat, we must - // extract a half-sized operand for our new narrow binop. - SDLoc DL(Extract); - - // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN - // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, N) - // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, N), YN - SDValue X = ConcatL ? DAG.getBitcast(NarrowBVT, LHS.getOperand(ConcatOpNum)) - : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, - BinOp.getOperand(0), - DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT)); + SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR) + : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, + BinOp.getOperand(1), IndexC); - SDValue Y = ConcatR ? DAG.getBitcast(NarrowBVT, RHS.getOperand(ConcatOpNum)) - : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, - BinOp.getOperand(1), - DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT)); + SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y); + return DAG.getBitcast(VT, NarrowBinOp); + } - SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y); - return DAG.getBitcast(VT, NarrowBinOp); + return SDValue(); } /// If we are extracting a subvector from a wide vector load, convert to a @@ -17052,7 +18199,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { return NewLd; } -SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { +SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { EVT NVT = N->getValueType(0); SDValue V = N->getOperand(0); @@ -17064,14 +18211,51 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG)) return NarrowLoad; + // Combine an extract of an extract into a single extract_subvector. + // ext (ext X, C), 0 --> ext X, C + SDValue Index = N->getOperand(1); + if (isNullConstant(Index) && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && + V.hasOneUse() && isa<ConstantSDNode>(V.getOperand(1))) { + if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(), + V.getConstantOperandVal(1)) && + TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) { + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0), + V.getOperand(1)); + } + } + + // Try to move vector bitcast after extract_subv by scaling extraction index: + // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index') + if (isa<ConstantSDNode>(Index) && V.getOpcode() == ISD::BITCAST && + V.getOperand(0).getValueType().isVector()) { + SDValue SrcOp = V.getOperand(0); + EVT SrcVT = SrcOp.getValueType(); + unsigned SrcNumElts = SrcVT.getVectorNumElements(); + unsigned DestNumElts = V.getValueType().getVectorNumElements(); + if ((SrcNumElts % DestNumElts) == 0) { + unsigned SrcDestRatio = SrcNumElts / DestNumElts; + unsigned NewExtNumElts = NVT.getVectorNumElements() * SrcDestRatio; + EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), + NewExtNumElts); + if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) { + unsigned IndexValScaled = N->getConstantOperandVal(1) * SrcDestRatio; + SDLoc DL(N); + SDValue NewIndex = DAG.getIntPtrConstant(IndexValScaled, DL); + SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT, + V.getOperand(0), NewIndex); + return DAG.getBitcast(NVT, NewExtract); + } + } + // TODO - handle (DestNumElts % SrcNumElts) == 0 + } + // Combine: // (extract_subvec (concat V1, V2, ...), i) // Into: // Vi if possible // Only operand 0 is checked as 'concat' assumes all inputs of the same // type. - if (V.getOpcode() == ISD::CONCAT_VECTORS && - isa<ConstantSDNode>(N->getOperand(1)) && + if (V.getOpcode() == ISD::CONCAT_VECTORS && isa<ConstantSDNode>(Index) && V.getOperand(0).getValueType() == NVT) { unsigned Idx = N->getConstantOperandVal(1); unsigned NumElems = NVT.getVectorNumElements(); @@ -17084,7 +18268,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { // If the input is a build vector. Try to make a smaller build vector. if (V.getOpcode() == ISD::BUILD_VECTOR) { - if (auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))) { + if (auto *IdxC = dyn_cast<ConstantSDNode>(Index)) { EVT InVT = V.getValueType(); unsigned ExtractSize = NVT.getSizeInBits(); unsigned EltSize = InVT.getScalarSizeInBits(); @@ -17092,26 +18276,27 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { if (ExtractSize % EltSize == 0) { unsigned NumElems = ExtractSize / EltSize; EVT EltVT = InVT.getVectorElementType(); - EVT ExtractVT = NumElems == 1 ? EltVT : - EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems); + EVT ExtractVT = NumElems == 1 ? EltVT + : EVT::getVectorVT(*DAG.getContext(), + EltVT, NumElems); if ((Level < AfterLegalizeDAG || (NumElems == 1 || TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) && (!LegalTypes || TLI.isTypeLegal(ExtractVT))) { - unsigned IdxVal = (Idx->getZExtValue() * NVT.getScalarSizeInBits()) / - EltSize; + unsigned IdxVal = IdxC->getZExtValue(); + IdxVal *= NVT.getScalarSizeInBits(); + IdxVal /= EltSize; + if (NumElems == 1) { SDValue Src = V->getOperand(IdxVal); if (EltVT != Src.getValueType()) Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src); - return DAG.getBitcast(NVT, Src); } // Extract the pieces from the original build_vector. - SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N), - makeArrayRef(V->op_begin() + IdxVal, - NumElems)); + SDValue BuildVec = DAG.getBuildVector( + ExtractVT, SDLoc(N), V->ops().slice(IdxVal, NumElems)); return DAG.getBitcast(NVT, BuildVec); } } @@ -17126,9 +18311,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { return SDValue(); // Only handle cases where both indexes are constants. - auto *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1)); + auto *ExtIdx = dyn_cast<ConstantSDNode>(Index); auto *InsIdx = dyn_cast<ConstantSDNode>(V.getOperand(2)); - if (InsIdx && ExtIdx) { // Combine: // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) @@ -17141,7 +18325,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { return DAG.getNode( ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)), - N->getOperand(1)); + Index); } } @@ -17154,6 +18338,53 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { return SDValue(); } +/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles +/// followed by concatenation. Narrow vector ops may have better performance +/// than wide ops, and this can unlock further narrowing of other vector ops. +/// Targets can invert this transform later if it is not profitable. +static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf, + SelectionDAG &DAG) { + SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1); + if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 || + N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 || + !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef()) + return SDValue(); + + // Split the wide shuffle mask into halves. Any mask element that is accessing + // operand 1 is offset down to account for narrowing of the vectors. + ArrayRef<int> Mask = Shuf->getMask(); + EVT VT = Shuf->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); + unsigned HalfNumElts = NumElts / 2; + SmallVector<int, 16> Mask0(HalfNumElts, -1); + SmallVector<int, 16> Mask1(HalfNumElts, -1); + for (unsigned i = 0; i != NumElts; ++i) { + if (Mask[i] == -1) + continue; + int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts; + if (i < HalfNumElts) + Mask0[i] = M; + else + Mask1[i - HalfNumElts] = M; + } + + // Ask the target if this is a valid transform. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), + HalfNumElts); + if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) || + !TLI.isShuffleMaskLegal(Mask1, HalfVT)) + return SDValue(); + + // shuffle (concat X, undef), (concat Y, undef), Mask --> + // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1) + SDValue X = N0.getOperand(0), Y = N1.getOperand(0); + SDLoc DL(Shuf); + SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0); + SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1); + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1); +} + // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat, // or turn a shuffle of a single concat into simpler shuffle then concat. static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { @@ -17163,20 +18394,24 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); + ArrayRef<int> Mask = SVN->getMask(); SmallVector<SDValue, 4> Ops; EVT ConcatVT = N0.getOperand(0).getValueType(); unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements(); unsigned NumConcats = NumElts / NumElemsPerConcat; + auto IsUndefMaskElt = [](int i) { return i == -1; }; + // Special case: shuffle(concat(A,B)) can be more efficiently represented // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high // half vector elements. if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() && - std::all_of(SVN->getMask().begin() + NumElemsPerConcat, - SVN->getMask().end(), [](int i) { return i == -1; })) { - N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1), - makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat)); + llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat), + IsUndefMaskElt)) { + N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), + N0.getOperand(1), + Mask.slice(0, NumElemsPerConcat)); N1 = DAG.getUNDEF(ConcatVT); return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1); } @@ -17184,35 +18419,32 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { // Look at every vector that's inserted. We're looking for exact // subvector-sized copies from a concatenated vector for (unsigned I = 0; I != NumConcats; ++I) { - // Make sure we're dealing with a copy. unsigned Begin = I * NumElemsPerConcat; - bool AllUndef = true, NoUndef = true; - for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) { - if (SVN->getMaskElt(J) >= 0) - AllUndef = false; - else - NoUndef = false; + ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat); + + // Make sure we're dealing with a copy. + if (llvm::all_of(SubMask, IsUndefMaskElt)) { + Ops.push_back(DAG.getUNDEF(ConcatVT)); + continue; } - if (NoUndef) { - if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0) + int OpIdx = -1; + for (int i = 0; i != (int)NumElemsPerConcat; ++i) { + if (IsUndefMaskElt(SubMask[i])) + continue; + if ((SubMask[i] % (int)NumElemsPerConcat) != i) return SDValue(); - - for (unsigned J = 1; J != NumElemsPerConcat; ++J) - if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J)) - return SDValue(); - - unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat; - if (FirstElt < N0.getNumOperands()) - Ops.push_back(N0.getOperand(FirstElt)); - else - Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands())); - - } else if (AllUndef) { - Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType())); - } else { // Mixed with general masks and undefs, can't do optimization. - return SDValue(); + int EltOpIdx = SubMask[i] / NumElemsPerConcat; + if (0 <= OpIdx && EltOpIdx != OpIdx) + return SDValue(); + OpIdx = EltOpIdx; } + assert(0 <= OpIdx && "Unknown concat_vectors op"); + + if (OpIdx < (int)N0.getNumOperands()) + Ops.push_back(N0.getOperand(OpIdx)); + else + Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands())); } return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops); @@ -17278,8 +18510,8 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, if (S.getOpcode() == ISD::BUILD_VECTOR) { Op = S.getOperand(Idx); } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) { - assert(Idx == 0 && "Unexpected SCALAR_TO_VECTOR operand index."); - Op = S.getOperand(0); + SDValue Op0 = S.getOperand(0); + Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType()); } else { // Operand can't be combined - bail out. return SDValue(); @@ -17433,11 +18665,17 @@ static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, // If splat-mask contains undef elements, we need to be careful about // introducing undef's in the folded mask which are not the result of composing // the masks of the shuffles. -static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask, - ShuffleVectorSDNode *Splat, - SelectionDAG &DAG) { +static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf, + SelectionDAG &DAG) { + if (!Shuf->getOperand(1).isUndef()) + return SDValue(); + auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0)); + if (!Splat || !Splat->isSplat()) + return SDValue(); + + ArrayRef<int> ShufMask = Shuf->getMask(); ArrayRef<int> SplatMask = Splat->getMask(); - assert(UserMask.size() == SplatMask.size() && "Mask length mismatch"); + assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch"); // Prefer simplifying to the splat-shuffle, if possible. This is legal if // every undef mask element in the splat-shuffle has a corresponding undef @@ -17463,13 +18701,13 @@ static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask, return false; return true; }; - if (CanSimplifyToExistingSplat(UserMask, SplatMask)) - return SDValue(Splat, 0); + if (CanSimplifyToExistingSplat(ShufMask, SplatMask)) + return Shuf->getOperand(0); // Create a new shuffle with a mask that is composed of the two shuffles' // masks. SmallVector<int, 32> NewMask; - for (int Idx : UserMask) + for (int Idx : ShufMask) NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]); return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat), @@ -17555,6 +18793,34 @@ static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf, Op1, Op0.getOperand(1), NewInsIndex); } +/// If we have a unary shuffle of a shuffle, see if it can be folded away +/// completely. This has the potential to lose undef knowledge because the first +/// shuffle may not have an undef mask element where the second one does. So +/// only call this after doing simplifications based on demanded elements. +static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) { + // shuf (shuf0 X, Y, Mask0), undef, Mask + auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0)); + if (!Shuf0 || !Shuf->getOperand(1).isUndef()) + return SDValue(); + + ArrayRef<int> Mask = Shuf->getMask(); + ArrayRef<int> Mask0 = Shuf0->getMask(); + for (int i = 0, e = (int)Mask.size(); i != e; ++i) { + // Ignore undef elements. + if (Mask[i] == -1) + continue; + assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value"); + + // Is the element of the shuffle operand chosen by this shuffle the same as + // the element chosen by the shuffle operand itself? + if (Mask0[Mask[i]] != Mask0[i]) + return SDValue(); + } + // Every element of this shuffle is identical to the result of the previous + // shuffle, so we can replace this value. + return Shuf->getOperand(0); +} + SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { EVT VT = N->getValueType(0); unsigned NumElts = VT.getVectorNumElements(); @@ -17604,19 +18870,35 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG)) return InsElt; - // A shuffle of a single vector that is a splat can always be folded. - if (auto *N0Shuf = dyn_cast<ShuffleVectorSDNode>(N0)) - if (N1->isUndef() && N0Shuf->isSplat()) - return combineShuffleOfSplat(SVN->getMask(), N0Shuf, DAG); + // A shuffle of a single vector that is a splatted value can always be folded. + if (SDValue V = combineShuffleOfSplatVal(SVN, DAG)) + return V; // If it is a splat, check if the argument vector is another splat or a // build_vector. if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { - SDNode *V = N0.getNode(); + int SplatIndex = SVN->getSplatIndex(); + if (TLI.isExtractVecEltCheap(VT, SplatIndex) && + TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) { + // splat (vector_bo L, R), Index --> + // splat (scalar_bo (extelt L, Index), (extelt R, Index)) + SDValue L = N0.getOperand(0), R = N0.getOperand(1); + SDLoc DL(N); + EVT EltVT = VT.getScalarType(); + SDValue Index = DAG.getIntPtrConstant(SplatIndex, DL); + SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index); + SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index); + SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, + N0.getNode()->getFlags()); + SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO); + SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0); + return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask); + } // If this is a bit convert that changes the element type of the vector but // not the number of vector elements, look through it. Be careful not to // look though conversions that change things like v4f32 to v2f64. + SDNode *V = N0.getNode(); if (V->getOpcode() == ISD::BITCAST) { SDValue ConvInput = V->getOperand(0); if (ConvInput.getValueType().isVector() && @@ -17649,7 +18931,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return N0; // Canonicalize any other splat as a build_vector. - const SDValue &Splatted = V->getOperand(SVN->getSplatIndex()); + SDValue Splatted = V->getOperand(SplatIndex); SmallVector<SDValue, 8> Ops(NumElts, Splatted); SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops); @@ -17665,6 +18947,11 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { if (SimplifyDemandedVectorElts(SDValue(N, 0))) return SDValue(N, 0); + // This is intentionally placed after demanded elements simplification because + // it could eliminate knowledge of undef elements created by this shuffle. + if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN)) + return ShufOp; + // Match shuffles that can be converted to any_vector_extend_in_reg. if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations)) return V; @@ -17704,7 +18991,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { NewMask.push_back(M < 0 ? -1 : Scale * M + s); return NewMask; }; - + SDValue BC0 = peekThroughOneUseBitcasts(N0); if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) { EVT SVT = VT.getScalarType(); @@ -17884,6 +19171,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask); } + if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG)) + return V; + return SDValue(); } @@ -18006,7 +19296,44 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { if (!isa<ConstantSDNode>(N2)) return SDValue(); - unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue(); + uint64_t InsIdx = cast<ConstantSDNode>(N2)->getZExtValue(); + + // Push subvector bitcasts to the output, adjusting the index as we go. + // insert_subvector(bitcast(v), bitcast(s), c1) + // -> bitcast(insert_subvector(v, s, c2)) + if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) && + N1.getOpcode() == ISD::BITCAST) { + SDValue N0Src = peekThroughBitcasts(N0); + SDValue N1Src = peekThroughBitcasts(N1); + EVT N0SrcSVT = N0Src.getValueType().getScalarType(); + EVT N1SrcSVT = N1Src.getValueType().getScalarType(); + if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) && + N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) { + EVT NewVT; + SDLoc DL(N); + SDValue NewIdx; + MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); + LLVMContext &Ctx = *DAG.getContext(); + unsigned NumElts = VT.getVectorNumElements(); + unsigned EltSizeInBits = VT.getScalarSizeInBits(); + if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) { + unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits(); + NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale); + NewIdx = DAG.getConstant(InsIdx * Scale, DL, IdxVT); + } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) { + unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits; + if ((NumElts % Scale) == 0 && (InsIdx % Scale) == 0) { + NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts / Scale); + NewIdx = DAG.getConstant(InsIdx / Scale, DL, IdxVT); + } + } + if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) { + SDValue Res = DAG.getBitcast(NewVT, N0Src); + Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx); + return DAG.getBitcast(VT, Res); + } + } + } // Canonicalize insert_subvector dag nodes. // Example: @@ -18070,6 +19397,36 @@ SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitVECREDUCE(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N0.getValueType(); + unsigned Opcode = N->getOpcode(); + + // VECREDUCE over 1-element vector is just an extract. + if (VT.getVectorNumElements() == 1) { + SDLoc dl(N); + SDValue Res = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0, + DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + if (Res.getValueType() != N->getValueType(0)) + Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res); + return Res; + } + + // On an boolean vector an and/or reduction is the same as a umin/umax + // reduction. Convert them if the latter is legal while the former isn't. + if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) { + unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND + ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX; + if (!TLI.isOperationLegalOrCustom(Opcode, VT) && + TLI.isOperationLegalOrCustom(NewOpcode, VT) && + DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits()) + return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0); + } + + return SDValue(); +} + /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle /// with the destination vector and a zero vector. /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> @@ -18161,6 +19518,53 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { return SDValue(); } +/// If a vector binop is performed on splat values, it may be profitable to +/// extract, scalarize, and insert/splat. +static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + unsigned Opcode = N->getOpcode(); + EVT VT = N->getValueType(0); + EVT EltVT = VT.getVectorElementType(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // TODO: Remove/replace the extract cost check? If the elements are available + // as scalars, then there may be no extract cost. Should we ask if + // inserting a scalar back into a vector is cheap instead? + int Index0, Index1; + SDValue Src0 = DAG.getSplatSourceVector(N0, Index0); + SDValue Src1 = DAG.getSplatSourceVector(N1, Index1); + if (!Src0 || !Src1 || Index0 != Index1 || + Src0.getValueType().getVectorElementType() != EltVT || + Src1.getValueType().getVectorElementType() != EltVT || + !TLI.isExtractVecEltCheap(VT, Index0) || + !TLI.isOperationLegalOrCustom(Opcode, EltVT)) + return SDValue(); + + SDLoc DL(N); + SDValue IndexC = + DAG.getConstant(Index0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())); + SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N0, IndexC); + SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N1, IndexC); + SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags()); + + // If all lanes but 1 are undefined, no need to splat the scalar result. + // TODO: Keep track of undefs and use that info in the general case. + if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() && + count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 && + count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) { + // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) --> + // build_vec ..undef, (bo X, Y), undef... + SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT)); + Ops[Index0] = ScalarBO; + return DAG.getBuildVector(VT, DL, Ops); + } + + // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index + SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO); + return DAG.getBuildVector(VT, DL, Ops); +} + /// Visit a binary vector operation, like ADD. SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { assert(N->getValueType(0).isVector() && @@ -18169,34 +19573,63 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); SDValue Ops[] = {LHS, RHS}; + EVT VT = N->getValueType(0); + unsigned Opcode = N->getOpcode(); // See if we can constant fold the vector operation. if (SDValue Fold = DAG.FoldConstantVectorArithmetic( - N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags())) + Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags())) return Fold; - // Type legalization might introduce new shuffles in the DAG. - // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask))) - // -> (shuffle (VBinOp (A, B)), Undef, Mask). - if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) && - isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() && - LHS.getOperand(1).isUndef() && - RHS.getOperand(1).isUndef()) { - ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS); - ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS); - - if (SVN0->getMask().equals(SVN1->getMask())) { - EVT VT = N->getValueType(0); - SDValue UndefVector = LHS.getOperand(1); - SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT, - LHS.getOperand(0), RHS.getOperand(0), - N->getFlags()); - AddUsersToWorklist(N); - return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector, - SVN0->getMask()); + // Move unary shuffles with identical masks after a vector binop: + // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask)) + // --> shuffle (VBinOp A, B), Undef, Mask + // This does not require type legality checks because we are creating the + // same types of operations that are in the original sequence. We do have to + // restrict ops like integer div that have immediate UB (eg, div-by-zero) + // though. This code is adapted from the identical transform in instcombine. + if (Opcode != ISD::UDIV && Opcode != ISD::SDIV && + Opcode != ISD::UREM && Opcode != ISD::SREM && + Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) { + auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS); + auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS); + if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) && + LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() && + (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) { + SDLoc DL(N); + SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0), + RHS.getOperand(0), N->getFlags()); + SDValue UndefV = LHS.getOperand(1); + return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask()); + } + } + + // The following pattern is likely to emerge with vector reduction ops. Moving + // the binary operation ahead of insertion may allow using a narrower vector + // instruction that has better performance than the wide version of the op: + // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z + if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() && + RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() && + LHS.getOperand(2) == RHS.getOperand(2) && + (LHS.hasOneUse() || RHS.hasOneUse())) { + SDValue X = LHS.getOperand(1); + SDValue Y = RHS.getOperand(1); + SDValue Z = LHS.getOperand(2); + EVT NarrowVT = X.getValueType(); + if (NarrowVT == Y.getValueType() && + TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) { + // (binop undef, undef) may not return undef, so compute that result. + SDLoc DL(N); + SDValue VecC = + DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT)); + SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y); + return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z); } } + if (SDValue V = scalarizeBinOpOfSplats(N, DAG)) + return V; + return SDValue(); } @@ -18214,13 +19647,16 @@ SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, // Check to see if we got a select_cc back (to turn into setcc/select). // Otherwise, just return whatever node we got back, like fabs. if (SCC.getOpcode() == ISD::SELECT_CC) { + const SDNodeFlags Flags = N0.getNode()->getFlags(); SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0), N0.getValueType(), SCC.getOperand(0), SCC.getOperand(1), - SCC.getOperand(4)); + SCC.getOperand(4), Flags); AddToWorklist(SETCC.getNode()); - return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC, - SCC.getOperand(2), SCC.getOperand(3)); + SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC, + SCC.getOperand(2), SCC.getOperand(3)); + SelectNode->setFlags(Flags); + return SelectNode; } return SCC; @@ -18305,6 +19741,10 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, // locations are not in the default address space. LLD->getPointerInfo().getAddrSpace() != 0 || RLD->getPointerInfo().getAddrSpace() != 0 || + // We can't produce a CMOV of a TargetFrameIndex since we won't + // generate the address generation required. + LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex || + RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex || !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(), LLD->getBasePtr().getValueType())) return false; @@ -18501,8 +19941,8 @@ SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset( // If a constant can be materialized without loads, this does not make sense. if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal || - TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) || - TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) + TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) || + TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize)) return SDValue(); // If both constants have multiple uses, then we won't need to do an extra @@ -18547,20 +19987,20 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, if (N2 == N3) return N2; EVT CmpOpVT = N0.getValueType(); + EVT CmpResVT = getSetCCResultType(CmpOpVT); EVT VT = N2.getValueType(); auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode()); // Determine if the condition we're dealing with is constant. - SDValue SCC = SimplifySetCC(getSetCCResultType(CmpOpVT), N0, N1, CC, DL, - false); - if (SCC.getNode()) AddToWorklist(SCC.getNode()); - - if (auto *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) { - // fold select_cc true, x, y -> x - // fold select_cc false, x, y -> y - return !SCCC->isNullValue() ? N2 : N3; + if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) { + AddToWorklist(SCC.getNode()); + if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) { + // fold select_cc true, x, y -> x + // fold select_cc false, x, y -> y + return !(SCCC->isNullValue()) ? N2 : N3; + } } if (SDValue V = @@ -18621,7 +20061,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, SDValue Temp, SCC; // zext (setcc n0, n1) if (LegalTypes) { - SCC = DAG.getSetCC(DL, getSetCCResultType(CmpOpVT), N0, N1, CC); + SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC); if (VT.bitsLT(SCC.getValueType())) Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT); else @@ -18644,36 +20084,6 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, getShiftAmountTy(Temp.getValueType()))); } - // Check to see if this is an integer abs. - // select_cc setg[te] X, 0, X, -X -> - // select_cc setgt X, -1, X, -X -> - // select_cc setl[te] X, 0, -X, X -> - // select_cc setlt X, 1, -X, X -> - // Y = sra (X, size(X)-1); xor (add (X, Y), Y) - if (N1C) { - ConstantSDNode *SubC = nullptr; - if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) || - (N1C->isAllOnesValue() && CC == ISD::SETGT)) && - N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) - SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0)); - else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) || - (N1C->isOne() && CC == ISD::SETLT)) && - N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1)) - SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0)); - - if (SubC && SubC->isNullValue() && CmpOpVT.isInteger()) { - SDLoc DL(N0); - SDValue Shift = DAG.getNode(ISD::SRA, DL, CmpOpVT, N0, - DAG.getConstant(CmpOpVT.getSizeInBits() - 1, - DL, - getShiftAmountTy(CmpOpVT))); - SDValue Add = DAG.getNode(ISD::ADD, DL, CmpOpVT, N0, Shift); - AddToWorklist(Shift.getNode()); - AddToWorklist(Add.getNode()); - return DAG.getNode(ISD::XOR, DL, CmpOpVT, Add, Shift); - } - } - // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X) // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X) // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X) @@ -18728,7 +20138,7 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, SDValue DAGCombiner::BuildSDIV(SDNode *N) { // when optimising for minimum size, we don't want to expand a div to a mul // and a shift. - if (DAG.getMachineFunction().getFunction().optForMinSize()) + if (DAG.getMachineFunction().getFunction().hasMinSize()) return SDValue(); SmallVector<SDNode *, 8> Built; @@ -18769,7 +20179,7 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) { SDValue DAGCombiner::BuildUDIV(SDNode *N) { // when optimising for minimum size, we don't want to expand a div to a mul // and a shift. - if (DAG.getMachineFunction().getFunction().optForMinSize()) + if (DAG.getMachineFunction().getFunction().hasMinSize()) return SDValue(); SmallVector<SDNode *, 8> Built; @@ -18821,7 +20231,6 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) { AddToWorklist(Est.getNode()); if (Iterations) { - EVT VT = Op.getValueType(); SDLoc DL(Op); SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); @@ -18977,7 +20386,6 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, if (!Reciprocal) { // The estimate is now completely wrong if the input was exactly 0.0 or // possibly a denormal. Force the answer to 0.0 for those cases. - EVT VT = Op.getValueType(); SDLoc DL(Op); EVT CCVT = getSetCCResultType(VT); ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT; @@ -19020,79 +20428,95 @@ SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) { } /// Return true if there is any possibility that the two addresses overlap. -bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { - // If they are the same then they must be aliases. - if (Op0->getBasePtr() == Op1->getBasePtr()) return true; +bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const { - // If they are both volatile then they cannot be reordered. - if (Op0->isVolatile() && Op1->isVolatile()) return true; + struct MemUseCharacteristics { + bool IsVolatile; + SDValue BasePtr; + int64_t Offset; + Optional<int64_t> NumBytes; + MachineMemOperand *MMO; + }; - // If one operation reads from invariant memory, and the other may store, they - // cannot alias. These should really be checking the equivalent of mayWrite, - // but it only matters for memory nodes other than load /store. - if (Op0->isInvariant() && Op1->writeMem()) - return false; + auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics { + if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) { + int64_t Offset = 0; + if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset())) + Offset = (LSN->getAddressingMode() == ISD::PRE_INC) + ? C->getSExtValue() + : (LSN->getAddressingMode() == ISD::PRE_DEC) + ? -1 * C->getSExtValue() + : 0; + return {LSN->isVolatile(), LSN->getBasePtr(), Offset /*base offset*/, + Optional<int64_t>(LSN->getMemoryVT().getStoreSize()), + LSN->getMemOperand()}; + } + if (const auto *LN = cast<LifetimeSDNode>(N)) + return {false /*isVolatile*/, LN->getOperand(1), + (LN->hasOffset()) ? LN->getOffset() : 0, + (LN->hasOffset()) ? Optional<int64_t>(LN->getSize()) + : Optional<int64_t>(), + (MachineMemOperand *)nullptr}; + // Default. + return {false /*isvolatile*/, SDValue(), (int64_t)0 /*offset*/, + Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr}; + }; - if (Op1->isInvariant() && Op0->writeMem()) - return false; + MemUseCharacteristics MUC0 = getCharacteristics(Op0), + MUC1 = getCharacteristics(Op1); - unsigned NumBytes0 = Op0->getMemoryVT().getStoreSize(); - unsigned NumBytes1 = Op1->getMemoryVT().getStoreSize(); - - // Check for BaseIndexOffset matching. - BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0, DAG); - BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1, DAG); - int64_t PtrDiff; - if (BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode()) { - if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) - return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0)); - - // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be - // able to calculate their relative offset if at least one arises - // from an alloca. However, these allocas cannot overlap and we - // can infer there is no alias. - if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase())) - if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) { - MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); - // If the base are the same frame index but the we couldn't find a - // constant offset, (indices are different) be conservative. - if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) || - !MFI.isFixedObjectIndex(B->getIndex()))) - return false; - } + // If they are to the same address, then they must be aliases. + if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr && + MUC0.Offset == MUC1.Offset) + return true; - bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase()); - bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase()); - bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase()); - bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase()); - bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase()); - bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase()); - - // If of mismatched base types or checkable indices we can check - // they do not alias. - if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) || - (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) && - (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1)) + // If they are both volatile then they cannot be reordered. + if (MUC0.IsVolatile && MUC1.IsVolatile) + return true; + + if (MUC0.MMO && MUC1.MMO) { + if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) || + (MUC1.MMO->isInvariant() && MUC0.MMO->isStore())) return false; } + // Try to prove that there is aliasing, or that there is no aliasing. Either + // way, we can return now. If nothing can be proved, proceed with more tests. + bool IsAlias; + if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes, + DAG, IsAlias)) + return IsAlias; + + // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if + // either are not known. + if (!MUC0.MMO || !MUC1.MMO) + return true; + + // If one operation reads from invariant memory, and the other may store, they + // cannot alias. These should really be checking the equivalent of mayWrite, + // but it only matters for memory nodes other than load /store. + if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) || + (MUC1.MMO->isInvariant() && MUC0.MMO->isStore())) + return false; + // If we know required SrcValue1 and SrcValue2 have relatively large // alignment compared to the size and offset of the access, we may be able // to prove they do not alias. This check is conservative for now to catch // cases created by splitting vector types. - int64_t SrcValOffset0 = Op0->getSrcValueOffset(); - int64_t SrcValOffset1 = Op1->getSrcValueOffset(); - unsigned OrigAlignment0 = Op0->getOriginalAlignment(); - unsigned OrigAlignment1 = Op1->getOriginalAlignment(); + int64_t SrcValOffset0 = MUC0.MMO->getOffset(); + int64_t SrcValOffset1 = MUC1.MMO->getOffset(); + unsigned OrigAlignment0 = MUC0.MMO->getBaseAlignment(); + unsigned OrigAlignment1 = MUC1.MMO->getBaseAlignment(); if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 && - NumBytes0 == NumBytes1 && OrigAlignment0 > NumBytes0) { + MUC0.NumBytes.hasValue() && MUC1.NumBytes.hasValue() && + *MUC0.NumBytes == *MUC1.NumBytes && OrigAlignment0 > *MUC0.NumBytes) { int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0; int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1; // There is no overlap between these relatively aligned accesses of // similar size. Return no alias. - if ((OffAlign0 + NumBytes0) <= OffAlign1 || - (OffAlign1 + NumBytes1) <= OffAlign0) + if ((OffAlign0 + *MUC0.NumBytes) <= OffAlign1 || + (OffAlign1 + *MUC1.NumBytes) <= OffAlign0) return false; } @@ -19105,17 +20529,16 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { UseAA = false; #endif - if (UseAA && AA && - Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) { + if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue()) { // Use alias analysis information. int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1); - int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset; - int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset; - AliasResult AAResult = - AA->alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0, - UseTBAA ? Op0->getAAInfo() : AAMDNodes()), - MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1, - UseTBAA ? Op1->getAAInfo() : AAMDNodes()) ); + int64_t Overlap0 = *MUC0.NumBytes + SrcValOffset0 - MinOffset; + int64_t Overlap1 = *MUC1.NumBytes + SrcValOffset1 - MinOffset; + AliasResult AAResult = AA->alias( + MemoryLocation(MUC0.MMO->getValue(), Overlap0, + UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()), + MemoryLocation(MUC1.MMO->getValue(), Overlap1, + UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())); if (AAResult == NoAlias) return false; } @@ -19132,18 +20555,64 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, SmallPtrSet<SDNode *, 16> Visited; // Visited node set. // Get alias information for node. - bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile(); + const bool IsLoad = isa<LoadSDNode>(N) && !cast<LoadSDNode>(N)->isVolatile(); // Starting off. Chains.push_back(OriginalChain); unsigned Depth = 0; + // Attempt to improve chain by a single step + std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool { + switch (C.getOpcode()) { + case ISD::EntryToken: + // No need to mark EntryToken. + C = SDValue(); + return true; + case ISD::LOAD: + case ISD::STORE: { + // Get alias information for C. + bool IsOpLoad = isa<LoadSDNode>(C.getNode()) && + !cast<LSBaseSDNode>(C.getNode())->isVolatile(); + if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) { + // Look further up the chain. + C = C.getOperand(0); + return true; + } + // Alias, so stop here. + return false; + } + + case ISD::CopyFromReg: + // Always forward past past CopyFromReg. + C = C.getOperand(0); + return true; + + case ISD::LIFETIME_START: + case ISD::LIFETIME_END: { + // We can forward past any lifetime start/end that can be proven not to + // alias the memory access. + if (!isAlias(N, C.getNode())) { + // Look further up the chain. + C = C.getOperand(0); + return true; + } + return false; + } + default: + return false; + } + }; + // Look at each chain and determine if it is an alias. If so, add it to the // aliases list. If not, then continue up the chain looking for the next // candidate. while (!Chains.empty()) { SDValue Chain = Chains.pop_back_val(); + // Don't bother if we've seen Chain before. + if (!Visited.insert(Chain.getNode()).second) + continue; + // For TokenFactor nodes, look at each operand and only continue up the // chain until we reach the depth limit. // @@ -19156,58 +20625,30 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, return; } - // Don't bother if we've been before. - if (!Visited.insert(Chain.getNode()).second) - continue; - - switch (Chain.getOpcode()) { - case ISD::EntryToken: - // Entry token is ideal chain operand, but handled in FindBetterChain. - break; - - case ISD::LOAD: - case ISD::STORE: { - // Get alias information for Chain. - bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) && - !cast<LSBaseSDNode>(Chain.getNode())->isVolatile(); - - // If chain is alias then stop here. - if (!(IsLoad && IsOpLoad) && - isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) { - Aliases.push_back(Chain); - } else { - // Look further up the chain. - Chains.push_back(Chain.getOperand(0)); - ++Depth; - } - break; - } - - case ISD::TokenFactor: + if (Chain.getOpcode() == ISD::TokenFactor) { // We have to check each of the operands of the token factor for "small" // token factors, so we queue them up. Adding the operands to the queue // (stack) in reverse order maintains the original order and increases the // likelihood that getNode will find a matching token factor (CSE.) if (Chain.getNumOperands() > 16) { Aliases.push_back(Chain); - break; + continue; } for (unsigned n = Chain.getNumOperands(); n;) Chains.push_back(Chain.getOperand(--n)); ++Depth; - break; - - case ISD::CopyFromReg: - // Forward past CopyFromReg. - Chains.push_back(Chain.getOperand(0)); + continue; + } + // Everything else + if (ImproveChain(Chain)) { + // Updated Chain Found, Consider new chain if one exists. + if (Chain.getNode()) + Chains.push_back(Chain); ++Depth; - break; - - default: - // For all other instructions we will just have to take what we can get. - Aliases.push_back(Chain); - break; + continue; } + // No Improved Chain Possible, treat as Alias. + Aliases.push_back(Chain); } } @@ -19232,13 +20673,15 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { return Aliases[0]; // Construct a custom tailored token factor. - return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases); + return DAG.getTokenFactor(SDLoc(N), Aliases); } +namespace { // TODO: Replace with with std::monostate when we move to C++17. struct UnitT { } Unit; bool operator==(const UnitT &, const UnitT &) { return true; } bool operator!=(const UnitT &, const UnitT &) { return false; } +} // namespace // This function tries to collect a bunch of potentially interesting // nodes to improve the chains of, all at once. This might seem @@ -19349,7 +20792,7 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) { if (AddNewChain) TFOps.insert(TFOps.begin(), NewChain); - SDValue TF = DAG.getNode(ISD::TokenFactor, SDLoc(STChain), MVT::Other, TFOps); + SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps); CombineTo(St, TF); AddToWorklist(STChain); diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index a9a3c44ea0c9..22c23ba877e8 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1,9 +1,8 @@ //===- FastISel.cpp - Implementation of the FastISel class ----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -782,7 +781,7 @@ bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops, unsigned Reg = getRegForValue(Val); if (!Reg) return false; - Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false)); + Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false)); } } return true; @@ -831,8 +830,8 @@ bool FastISel::selectStackmap(const CallInst *I) { const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC); for (unsigned i = 0; ScratchRegs[i]; ++i) Ops.push_back(MachineOperand::CreateReg( - ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false, - /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true)); + ScratchRegs[i], /*isDef=*/true, /*isImp=*/true, /*isKill=*/false, + /*isDead=*/false, /*isUndef=*/false, /*isEarlyClobber=*/true)); // Issue CALLSEQ_START unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); @@ -942,7 +941,7 @@ bool FastISel::selectPatchpoint(const CallInst *I) { assert(CLI.NumResultRegs == 0 && "Unexpected result register."); CLI.ResultReg = createResultReg(TLI.getRegClassFor(MVT::i64)); CLI.NumResultRegs = 1; - Ops.push_back(MachineOperand::CreateReg(CLI.ResultReg, /*IsDef=*/true)); + Ops.push_back(MachineOperand::CreateReg(CLI.ResultReg, /*isDef=*/true)); } // Add the <id> and <numBytes> constants. @@ -991,13 +990,13 @@ bool FastISel::selectPatchpoint(const CallInst *I) { unsigned Reg = getRegForValue(I->getArgOperand(i)); if (!Reg) return false; - Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false)); + Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false)); } } // Push the arguments from the call instruction. for (auto Reg : CLI.OutRegs) - Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false)); + Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false)); // Push live variables for the stack map. if (!addStackMapLiveVars(Ops, I, NumMetaOpers + NumArgs)) @@ -1011,13 +1010,13 @@ bool FastISel::selectPatchpoint(const CallInst *I) { const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC); for (unsigned i = 0; ScratchRegs[i]; ++i) Ops.push_back(MachineOperand::CreateReg( - ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false, - /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true)); + ScratchRegs[i], /*isDef=*/true, /*isImp=*/true, /*isKill=*/false, + /*isDead=*/false, /*isUndef=*/false, /*isEarlyClobber=*/true)); // Add implicit defs (return values). for (auto Reg : CLI.InRegs) - Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/true, - /*IsImpl=*/true)); + Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/true, + /*isImp=*/true)); // Insert the patchpoint instruction before the call generated by the target. MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, CLI.Call, DbgLoc, @@ -1045,9 +1044,9 @@ bool FastISel::selectXRayCustomEvent(const CallInst *I) { return true; // don't do anything to this instruction. SmallVector<MachineOperand, 8> Ops; Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)), - /*IsDef=*/false)); + /*isDef=*/false)); Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)), - /*IsDef=*/false)); + /*isDef=*/false)); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::PATCHABLE_EVENT_CALL)); @@ -1064,11 +1063,11 @@ bool FastISel::selectXRayTypedEvent(const CallInst *I) { return true; // don't do anything to this instruction. SmallVector<MachineOperand, 8> Ops; Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)), - /*IsDef=*/false)); + /*isDef=*/false)); Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)), - /*IsDef=*/false)); + /*isDef=*/false)); Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(2)), - /*IsDef=*/false)); + /*isDef=*/false)); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::PATCHABLE_TYPED_EVENT_CALL)); @@ -1205,9 +1204,11 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { if (Arg.IsByVal || Arg.IsInAlloca) { PointerType *Ty = cast<PointerType>(Arg.Ty); Type *ElementTy = Ty->getElementType(); - unsigned FrameSize = DL.getTypeAllocSize(ElementTy); - // For ByVal, alignment should come from FE. BE will guess if this info is - // not there, but there are cases it cannot get right. + unsigned FrameSize = + DL.getTypeAllocSize(Arg.ByValType ? Arg.ByValType : ElementTy); + + // For ByVal, alignment should come from FE. BE will guess if this info + // is not there, but there are cases it cannot get right. unsigned FrameAlign = Arg.Alignment; if (!FrameAlign) FrameAlign = TLI.getByValTypeAlignment(ElementTy, DL); @@ -1235,6 +1236,12 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { if (CLI.NumResultRegs && CLI.CS) updateValueMap(CLI.CS->getInstruction(), CLI.ResultReg, CLI.NumResultRegs); + // Set labels for heapallocsite call. + if (CLI.CS && CLI.CS->getInstruction()->getMetadata("heapallocsite")) { + MDNode *MD = CLI.CS->getInstruction()->getMetadata("heapallocsite"); + MF->addCodeViewHeapAllocSite(CLI.Call, MD); + } + return true; } @@ -1304,9 +1311,6 @@ bool FastISel::selectCall(const User *I) { return true; } - MachineModuleInfo &MMI = FuncInfo.MF->getMMI(); - computeUsesVAFloatArgument(*Call, MMI); - // Handle intrinsic function calls. if (const auto *II = dyn_cast<IntrinsicInst>(Call)) return selectIntrinsicCall(II); @@ -1710,14 +1714,11 @@ void FastISel::finishCondBranch(const BasicBlock *BranchBB, } /// Emit an FNeg operation. -bool FastISel::selectFNeg(const User *I) { - Value *X; - if (!match(I, m_FNeg(m_Value(X)))) - return false; - unsigned OpReg = getRegForValue(X); +bool FastISel::selectFNeg(const User *I, const Value *In) { + unsigned OpReg = getRegForValue(In); if (!OpReg) return false; - bool OpRegIsKill = hasTrivialKill(I); + bool OpRegIsKill = hasTrivialKill(In); // If the target has ISD::FNEG, use it. EVT VT = TLI.getValueType(DL, I->getType()); @@ -1804,9 +1805,13 @@ bool FastISel::selectOperator(const User *I, unsigned Opcode) { return selectBinaryOp(I, ISD::FADD); case Instruction::Sub: return selectBinaryOp(I, ISD::SUB); - case Instruction::FSub: + case Instruction::FSub: { // FNeg is currently represented in LLVM IR as a special case of FSub. - return selectFNeg(I) || selectBinaryOp(I, ISD::FSUB); + Value *X; + if (match(I, m_FNeg(m_Value(X)))) + return selectFNeg(I, X); + return selectBinaryOp(I, ISD::FSUB); + } case Instruction::Mul: return selectBinaryOp(I, ISD::MUL); case Instruction::FMul: @@ -1836,6 +1841,9 @@ bool FastISel::selectOperator(const User *I, unsigned Opcode) { case Instruction::Xor: return selectBinaryOp(I, ISD::XOR); + case Instruction::FNeg: + return selectFNeg(I, I->getOperand(0)); + case Instruction::GetElementPtr: return selectGetElementPtr(I); @@ -1869,6 +1877,13 @@ bool FastISel::selectOperator(const User *I, unsigned Opcode) { return false; case Instruction::Call: + // On AIX, call lowering uses the DAG-ISEL path currently so that the + // callee of the direct function call instruction will be mapped to the + // symbol for the function's entry point, which is distinct from the + // function descriptor symbol. The latter is the symbol whose XCOFF symbol + // name is the C-linkage name of the source level function. + if (TM.getTargetTriple().isOSAIX()) + return false; return selectCall(I); case Instruction::BitCast: diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index fba728625b07..8b1759246b76 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -1,9 +1,8 @@ //===-- FunctionLoweringInfo.cpp ------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -86,6 +85,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, RegInfo = &MF->getRegInfo(); const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); unsigned StackAlign = TFI->getStackAlignment(); + DA = DAG->getDivergenceAnalysis(); // Check whether the function can return without sret-demotion. SmallVector<ISD::OutputArg, 4> Outs; @@ -151,7 +151,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, auto Iter = CatchObjects.find(AI); if (Iter != CatchObjects.end() && TLI->needsFixedCatchObjects()) { FrameIndex = MF->getFrameInfo().CreateFixedObject( - TySize, 0, /*Immutable=*/false, /*isAliased=*/true); + TySize, 0, /*IsImmutable=*/false, /*isAliased=*/true); MF->getFrameInfo().setObjectAlignment(FrameIndex, Align); } else { FrameIndex = @@ -322,13 +322,6 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, NewMap[MBBMap[Src]] = MBBMap[Dst]; } EHInfo.EHPadUnwindMap = std::move(NewMap); - NewMap.clear(); - for (auto &KV : EHInfo.ThrowUnwindMap) { - const auto *Src = KV.first.get<const BasicBlock *>(); - const auto *Dst = KV.second.get<const BasicBlock *>(); - NewMap[MBBMap[Src]] = MBBMap[Dst]; - } - EHInfo.ThrowUnwindMap = std::move(NewMap); } } @@ -343,6 +336,7 @@ void FunctionLoweringInfo::clear() { LiveOutRegInfo.clear(); VisitedBBs.clear(); ArgDbgValues.clear(); + DescribedArgs.clear(); ByValArgFrameIndexMap.clear(); RegFixups.clear(); RegsWithFixups.clear(); @@ -352,9 +346,9 @@ void FunctionLoweringInfo::clear() { } /// CreateReg - Allocate a single virtual register for the given type. -unsigned FunctionLoweringInfo::CreateReg(MVT VT) { +unsigned FunctionLoweringInfo::CreateReg(MVT VT, bool isDivergent) { return RegInfo->createVirtualRegister( - MF->getSubtarget().getTargetLowering()->getRegClassFor(VT)); + MF->getSubtarget().getTargetLowering()->getRegClassFor(VT, isDivergent)); } /// CreateRegs - Allocate the appropriate number of virtual registers of @@ -364,7 +358,7 @@ unsigned FunctionLoweringInfo::CreateReg(MVT VT) { /// In the case that the given value has struct or array type, this function /// will assign registers for each member or element. /// -unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) { +unsigned FunctionLoweringInfo::CreateRegs(Type *Ty, bool isDivergent) { const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); SmallVector<EVT, 4> ValueVTs; @@ -377,13 +371,18 @@ unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) { unsigned NumRegs = TLI->getNumRegisters(Ty->getContext(), ValueVT); for (unsigned i = 0; i != NumRegs; ++i) { - unsigned R = CreateReg(RegisterVT); + unsigned R = CreateReg(RegisterVT, isDivergent); if (!FirstReg) FirstReg = R; } } return FirstReg; } +unsigned FunctionLoweringInfo::CreateRegs(const Value *V) { + return CreateRegs(V->getType(), DA && !TLI->requiresUniformRegister(*MF, V) && + DA->isDivergent(V)); +} + /// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the /// register is a PHI destination and the PHI's LiveOutInfo is not valid. If /// the register's LiveOutInfo is for a smaller bit width, it is extended to @@ -400,7 +399,7 @@ FunctionLoweringInfo::GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth) { if (BitWidth > LOI->Known.getBitWidth()) { LOI->NumSignBits = 1; - LOI->Known = LOI->Known.zextOrTrunc(BitWidth); + LOI->Known = LOI->Known.zext(BitWidth, false /* => any extend */); } return LOI; @@ -526,56 +525,6 @@ unsigned FunctionLoweringInfo::getCatchPadExceptionPointerVReg( return VReg; } -unsigned -FunctionLoweringInfo::getOrCreateSwiftErrorVReg(const MachineBasicBlock *MBB, - const Value *Val) { - auto Key = std::make_pair(MBB, Val); - auto It = SwiftErrorVRegDefMap.find(Key); - // If this is the first use of this swifterror value in this basic block, - // create a new virtual register. - // After we processed all basic blocks we will satisfy this "upwards exposed - // use" by inserting a copy or phi at the beginning of this block. - if (It == SwiftErrorVRegDefMap.end()) { - auto &DL = MF->getDataLayout(); - const TargetRegisterClass *RC = TLI->getRegClassFor(TLI->getPointerTy(DL)); - auto VReg = MF->getRegInfo().createVirtualRegister(RC); - SwiftErrorVRegDefMap[Key] = VReg; - SwiftErrorVRegUpwardsUse[Key] = VReg; - return VReg; - } else return It->second; -} - -void FunctionLoweringInfo::setCurrentSwiftErrorVReg( - const MachineBasicBlock *MBB, const Value *Val, unsigned VReg) { - SwiftErrorVRegDefMap[std::make_pair(MBB, Val)] = VReg; -} - -std::pair<unsigned, bool> -FunctionLoweringInfo::getOrCreateSwiftErrorVRegDefAt(const Instruction *I) { - auto Key = PointerIntPair<const Instruction *, 1, bool>(I, true); - auto It = SwiftErrorVRegDefUses.find(Key); - if (It == SwiftErrorVRegDefUses.end()) { - auto &DL = MF->getDataLayout(); - const TargetRegisterClass *RC = TLI->getRegClassFor(TLI->getPointerTy(DL)); - unsigned VReg = MF->getRegInfo().createVirtualRegister(RC); - SwiftErrorVRegDefUses[Key] = VReg; - return std::make_pair(VReg, true); - } - return std::make_pair(It->second, false); -} - -std::pair<unsigned, bool> -FunctionLoweringInfo::getOrCreateSwiftErrorVRegUseAt(const Instruction *I, const MachineBasicBlock *MBB, const Value *Val) { - auto Key = PointerIntPair<const Instruction *, 1, bool>(I, false); - auto It = SwiftErrorVRegDefUses.find(Key); - if (It == SwiftErrorVRegDefUses.end()) { - unsigned VReg = getOrCreateSwiftErrorVReg(MBB, Val); - SwiftErrorVRegDefUses[Key] = VReg; - return std::make_pair(VReg, true); - } - return std::make_pair(It->second, false); -} - const Value * FunctionLoweringInfo::getValueFromVirtualReg(unsigned Vreg) { if (VirtReg2Value.empty()) { diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 6a6114677cc2..9bc07d35dfc5 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -1,9 +1,8 @@ //==--- InstrEmitter.cpp - Emit MachineInstrs for the SelectionDAG class ---==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -106,7 +105,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, // Stick to the preferred register classes for legal types. if (TLI->isTypeLegal(VT)) - UseRC = TLI->getRegClassFor(VT); + UseRC = TLI->getRegClassFor(VT, Node->isDivergent()); if (!IsClone && !IsCloned) for (SDNode *User : Node->uses()) { @@ -165,7 +164,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, "Incompatible phys register def and uses!"); DstRC = UseRC; } else { - DstRC = TLI->getRegClassFor(VT); + DstRC = TLI->getRegClassFor(VT, Node->isDivergent()); } // If all uses are reading from the src physical register and copying the @@ -187,24 +186,6 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, assert(isNew && "Node emitted out of order - early"); } -/// getDstOfCopyToRegUse - If the only use of the specified result number of -/// node is a CopyToReg, return its destination register. Return 0 otherwise. -unsigned InstrEmitter::getDstOfOnlyCopyToRegUse(SDNode *Node, - unsigned ResNo) const { - if (!Node->hasOneUse()) - return 0; - - SDNode *User = *Node->use_begin(); - if (User->getOpcode() == ISD::CopyToReg && - User->getOperand(2).getNode() == Node && - User->getOperand(2).getResNo() == ResNo) { - unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) - return Reg; - } - return 0; -} - void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstrBuilder &MIB, const MCInstrDesc &II, @@ -226,8 +207,9 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, // type correctly. For example, a 64-bit float (X86::FR64) can't live in // the 32-bit float super-class (X86::FR32). if (i < NumResults && TLI->isTypeLegal(Node->getSimpleValueType(i))) { - const TargetRegisterClass *VTRC = - TLI->getRegClassFor(Node->getSimpleValueType(i)); + const TargetRegisterClass *VTRC = TLI->getRegClassFor( + Node->getSimpleValueType(i), + (Node->isDivergent() || (RC && TRI->isDivergentRegClass(RC)))); if (RC) VTRC = TRI->getCommonSubClass(RC, VTRC); if (VTRC) @@ -286,14 +268,11 @@ unsigned InstrEmitter::getVR(SDValue Op, if (Op.isMachineOpcode() && Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { // Add an IMPLICIT_DEF instruction before every use. - unsigned VReg = getDstOfOnlyCopyToRegUse(Op.getNode(), Op.getResNo()); // IMPLICIT_DEF can produce any type of result so its MCInstrDesc // does not include operand register class info. - if (!VReg) { - const TargetRegisterClass *RC = - TLI->getRegClassFor(Op.getSimpleValueType()); - VReg = MRI->createVirtualRegister(RC); - } + const TargetRegisterClass *RC = TLI->getRegClassFor( + Op.getSimpleValueType(), Op.getNode()->isDivergent()); + unsigned VReg = MRI->createVirtualRegister(RC); BuildMI(*MBB, InsertPos, Op.getDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF), VReg); return VReg; @@ -396,11 +375,15 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) { unsigned VReg = R->getReg(); MVT OpVT = Op.getSimpleValueType(); - const TargetRegisterClass *OpRC = - TLI->isTypeLegal(OpVT) ? TLI->getRegClassFor(OpVT) : nullptr; const TargetRegisterClass *IIRC = II ? TRI->getAllocatableClass(TII->getRegClass(*II, IIOpNum, TRI, *MF)) : nullptr; + const TargetRegisterClass *OpRC = + TLI->isTypeLegal(OpVT) + ? TLI->getRegClassFor(OpVT, + Op.getNode()->isDivergent() || + (IIRC && TRI->isDivergentRegClass(IIRC))) + : nullptr; if (OpRC && IIRC && OpRC != IIRC && TargetRegisterInfo::isVirtualRegister(VReg)) { @@ -465,7 +448,7 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, } unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx, - MVT VT, const DebugLoc &DL) { + MVT VT, bool isDivergent, const DebugLoc &DL) { const TargetRegisterClass *VRC = MRI->getRegClass(VReg); const TargetRegisterClass *RC = TRI->getSubClassWithSubReg(VRC, SubIdx); @@ -480,7 +463,7 @@ unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx, // VReg couldn't be reasonably constrained. Emit a COPY to a new virtual // register instead. - RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT), SubIdx); + RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT, isDivergent), SubIdx); assert(RC && "No legal register class for VT supports that SubIdx"); unsigned NewReg = MRI->createVirtualRegister(RC); BuildMI(*MBB, InsertPos, DL, TII->get(TargetOpcode::COPY), NewReg) @@ -515,7 +498,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, // classes. unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); const TargetRegisterClass *TRC = - TLI->getRegClassFor(Node->getSimpleValueType(0)); + TLI->getRegClassFor(Node->getSimpleValueType(0), Node->isDivergent()); unsigned Reg; MachineInstr *DefMI; @@ -549,8 +532,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, if (TargetRegisterInfo::isVirtualRegister(Reg)) Reg = ConstrainForSubReg(Reg, SubIdx, Node->getOperand(0).getSimpleValueType(), - Node->getDebugLoc()); - + Node->isDivergent(), Node->getDebugLoc()); // Create the destreg if it is missing. if (VRBase == 0) VRBase = MRI->createVirtualRegister(TRC); @@ -585,7 +567,8 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, // // There is no constraint on the %src register class. // - const TargetRegisterClass *SRC = TLI->getRegClassFor(Node->getSimpleValueType(0)); + const TargetRegisterClass *SRC = + TLI->getRegClassFor(Node->getSimpleValueType(0), Node->isDivergent()); SRC = TRI->getSubClassWithSubReg(SRC, SubIdx); assert(SRC && "No register class supports VT and SubIdx for INSERT_SUBREG"); @@ -900,6 +883,9 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, if (Flags.hasExact()) MI->setFlag(MachineInstr::MIFlag::IsExact); + + if (Flags.hasFPExcept()) + MI->setFlag(MachineInstr::MIFlag::FPExcept); } // Emit all of the actual operands of this instruction, adding them to the @@ -1007,14 +993,23 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, case ISD::TokenFactor: // fall thru break; case ISD::CopyToReg: { - unsigned SrcReg; + unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); SDValue SrcVal = Node->getOperand(2); + if (TargetRegisterInfo::isVirtualRegister(DestReg) && + SrcVal.isMachineOpcode() && + SrcVal.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { + // Instead building a COPY to that vreg destination, build an + // IMPLICIT_DEF instruction instead. + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), + TII->get(TargetOpcode::IMPLICIT_DEF), DestReg); + break; + } + unsigned SrcReg; if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(SrcVal)) SrcReg = R->getReg(); else SrcReg = getVR(SrcVal, VRBaseMap); - unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); if (SrcReg == DestReg) // Coalesced away the copy? Ignore. break; @@ -1049,14 +1044,18 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, break; } - case ISD::INLINEASM: { + case ISD::INLINEASM: + case ISD::INLINEASM_BR: { unsigned NumOps = Node->getNumOperands(); if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) --NumOps; // Ignore the glue operand. // Create the inline asm machine instruction. - MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), - TII->get(TargetOpcode::INLINEASM)); + unsigned TgtOpc = Node->getOpcode() == ISD::INLINEASM_BR + ? TargetOpcode::INLINEASM_BR + : TargetOpcode::INLINEASM; + MachineInstrBuilder MIB = + BuildMI(*MF, Node->getDebugLoc(), TII->get(TgtOpc)); // Add the asm string as an external symbol operand. SDValue AsmStrV = Node->getOperand(InlineAsm::Op_AsmString); @@ -1137,7 +1136,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, // then remove the early-clobber flag. for (unsigned Reg : ECRegs) { if (MIB->readsRegister(Reg, TRI)) { - MachineOperand *MO = MIB->findRegisterDefOperand(Reg, false, TRI); + MachineOperand *MO = + MIB->findRegisterDefOperand(Reg, false, false, TRI); assert(MO && "No def operand for clobbered register?"); MO->setIsEarlyClobber(false); } diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h index 701b6368690b..cfe99dd977b5 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -1,9 +1,8 @@ //===- InstrEmitter.h - Emit MachineInstrs for the SelectionDAG -*- C++ -*--==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -43,11 +42,6 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { unsigned SrcReg, DenseMap<SDValue, unsigned> &VRBaseMap); - /// getDstOfCopyToRegUse - If the only use of the specified result number of - /// node is a CopyToReg, return its destination register. Return 0 otherwise. - unsigned getDstOfOnlyCopyToRegUse(SDNode *Node, - unsigned ResNo) const; - void CreateVirtualRegisters(SDNode *Node, MachineInstrBuilder &MIB, const MCInstrDesc &II, @@ -84,7 +78,7 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { /// supports SubIdx sub-registers. Emit a copy if that isn't possible. /// Return the virtual register to use. unsigned ConstrainForSubReg(unsigned VReg, unsigned SubIdx, MVT VT, - const DebugLoc &DL); + bool isDivergent, const DebugLoc &DL); /// EmitSubregNode - Generate machine code for subreg nodes. /// diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index d3aea37f944d..bf817f00f83d 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1,9 +1,8 @@ //===- LegalizeDAG.cpp - Implement SelectionDAG::Legalize -----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -137,8 +136,6 @@ private: bool &NeedInvert, const SDLoc &dl); SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); - SDValue ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops, - unsigned NumOps, bool isSigned, const SDLoc &dl); std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); @@ -152,11 +149,17 @@ private: RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128); + SDValue ExpandArgFPLibCall(SDNode *Node, + RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, + RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128, + RTLIB::Libcall Call_PPCF128); void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results); void ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results); SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, const SDLoc &dl); + SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, + const SDLoc &dl, SDValue ChainIn); SDValue ExpandBUILD_VECTOR(SDNode *Node); SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node); void ExpandDYNAMIC_STACKALLOC(SDNode *Node, @@ -489,10 +492,9 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // If this is an unaligned store and the target doesn't support it, // expand it. EVT MemVT = ST->getMemoryVT(); - unsigned AS = ST->getAddressSpace(); - unsigned Align = ST->getAlignment(); const DataLayout &DL = DAG.getDataLayout(); - if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) { + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, + *ST->getMemOperand())) { LLVM_DEBUG(dbgs() << "Expanding unsupported unaligned store\n"); SDValue Result = TLI.expandUnalignedStore(ST, DAG); ReplaceNode(SDValue(ST, 0), Result); @@ -542,7 +544,9 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { } else if (StWidth & (StWidth - 1)) { // If not storing a power-of-2 number of bits, expand as two stores. assert(!StVT.isVector() && "Unsupported truncstore!"); - unsigned RoundWidth = 1 << Log2_32(StWidth); + unsigned LogStWidth = Log2_32(StWidth); + assert(LogStWidth < 32); + unsigned RoundWidth = 1 << LogStWidth; assert(RoundWidth < StWidth); unsigned ExtraWidth = StWidth - RoundWidth; assert(ExtraWidth < RoundWidth); @@ -602,11 +606,10 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: { EVT MemVT = ST->getMemoryVT(); - unsigned AS = ST->getAddressSpace(); - unsigned Align = ST->getAlignment(); // If this is an unaligned store and the target doesn't support it, // expand it. - if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) { + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, + *ST->getMemOperand())) { SDValue Result = TLI.expandUnalignedStore(ST, DAG); ReplaceNode(SDValue(ST, 0), Result); } @@ -663,13 +666,12 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: { EVT MemVT = LD->getMemoryVT(); - unsigned AS = LD->getAddressSpace(); - unsigned Align = LD->getAlignment(); const DataLayout &DL = DAG.getDataLayout(); // If this is an unaligned load and the target doesn't support it, // expand it. - if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) { - std::tie(RVal, RChain) = TLI.expandUnalignedLoad(LD, DAG); + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, + *LD->getMemOperand())) { + std::tie(RVal, RChain) = TLI.expandUnalignedLoad(LD, DAG); } break; } @@ -756,7 +758,9 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { } else if (SrcWidth & (SrcWidth - 1)) { // If not loading a power-of-2 number of bits, expand as two loads. assert(!SrcVT.isVector() && "Unsupported extload!"); - unsigned RoundWidth = 1 << Log2_32(SrcWidth); + unsigned LogSrcWidth = Log2_32(SrcWidth); + assert(LogSrcWidth < 32); + unsigned RoundWidth = 1 << LogSrcWidth; assert(RoundWidth < SrcWidth); unsigned ExtraWidth = SrcWidth - RoundWidth; assert(ExtraWidth < RoundWidth); @@ -853,10 +857,9 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // If this is an unaligned load and the target doesn't support it, // expand it. EVT MemVT = LD->getMemoryVT(); - unsigned AS = LD->getAddressSpace(); - unsigned Align = LD->getAlignment(); const DataLayout &DL = DAG.getDataLayout(); - if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) { + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, + *LD->getMemOperand())) { std::tie(Value, Chain) = TLI.expandUnalignedLoad(LD, DAG); } } @@ -994,6 +997,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: case ISD::EXTRACT_VECTOR_ELT: + case ISD::LROUND: + case ISD::LLROUND: + case ISD::LRINT: + case ISD::LLRINT: Action = TLI.getOperationAction(Node->getOpcode(), Node->getOperand(0).getValueType()); break; @@ -1114,6 +1121,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::STRICT_FFLOOR: case ISD::STRICT_FROUND: case ISD::STRICT_FTRUNC: + case ISD::STRICT_FP_ROUND: + case ISD::STRICT_FP_EXTEND: // These pseudo-ops get legalized as if they were their non-strict // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT // is also legal, but if ISD::FSQRT requires expansion then so does @@ -1128,7 +1137,9 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); break; } - case ISD::SMULFIX: { + case ISD::SMULFIX: + case ISD::SMULFIXSAT: + case ISD::UMULFIX: { unsigned Scale = Node->getConstantOperandVal(2); Action = TLI.getFixedPointOperationAction(Node->getOpcode(), Node->getValueType(0), Scale); @@ -1142,6 +1153,22 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Action = TLI.getOperationAction(Node->getOpcode(), cast<MaskedStoreSDNode>(Node)->getValue().getValueType()); break; + case ISD::VECREDUCE_FADD: + case ISD::VECREDUCE_FMUL: + case ISD::VECREDUCE_ADD: + case ISD::VECREDUCE_MUL: + case ISD::VECREDUCE_AND: + case ISD::VECREDUCE_OR: + case ISD::VECREDUCE_XOR: + case ISD::VECREDUCE_SMAX: + case ISD::VECREDUCE_SMIN: + case ISD::VECREDUCE_UMAX: + case ISD::VECREDUCE_UMIN: + case ISD::VECREDUCE_FMAX: + case ISD::VECREDUCE_FMIN: + Action = TLI.getOperationAction( + Node->getOpcode(), Node->getOperand(0).getValueType()); + break; default: if (Node->getOpcode() >= ISD::BUILTIN_OP_END) { Action = TargetLowering::Legal; @@ -1386,6 +1413,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { // Emit a store of each element to the stack slot. SmallVector<SDValue, 8> Stores; unsigned TypeByteSize = EltVT.getSizeInBits() / 8; + assert(TypeByteSize > 0 && "Vector element type too small for stack store!"); // Store (in the right endianness) the elements to memory. for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { // Ignore undef elements. @@ -1723,6 +1751,12 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS, /// The resultant code need not be legal. SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, const SDLoc &dl) { + return EmitStackConvert(SrcOp, SlotVT, DestVT, dl, DAG.getEntryNode()); +} + +SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT, + EVT DestVT, const SDLoc &dl, + SDValue Chain) { // Create the stack frame object. unsigned SrcAlign = DAG.getDataLayout().getPrefTypeAlignment( SrcOp.getValueType().getTypeForEVT(*DAG.getContext())); @@ -1743,19 +1777,19 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT, // later than DestVT. SDValue Store; - if (SrcSize > SlotSize) - Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, PtrInfo, + if (SrcSize > SlotSize) + Store = DAG.getTruncStore(Chain, dl, SrcOp, FIPtr, PtrInfo, SlotVT, SrcAlign); else { assert(SrcSize == SlotSize && "Invalid store"); Store = - DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, PtrInfo, SrcAlign); + DAG.getStore(Chain, dl, SrcOp, FIPtr, PtrInfo, SrcAlign); } // Result is a load from the stack slot. if (SlotSize == DestSize) return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo, DestAlign); - + assert(SlotSize < DestSize && "Unknown extension!"); return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, PtrInfo, SlotVT, DestAlign); @@ -2049,41 +2083,6 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, return CallInfo.first; } -/// Generate a libcall taking the given operands as arguments -/// and returning a result of type RetVT. -SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, - const SDValue *Ops, unsigned NumOps, - bool isSigned, const SDLoc &dl) { - TargetLowering::ArgListTy Args; - Args.reserve(NumOps); - - TargetLowering::ArgListEntry Entry; - for (unsigned i = 0; i != NumOps; ++i) { - Entry.Node = Ops[i]; - Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); - Entry.IsSExt = isSigned; - Entry.IsZExt = !isSigned; - Args.push_back(Entry); - } - SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), - TLI.getPointerTy(DAG.getDataLayout())); - - Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); - - TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(dl) - .setChain(DAG.getEntryNode()) - .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, - std::move(Args)) - .setSExtResult(isSigned) - .setZExtResult(!isSigned) - .setIsPostTypeLegalization(true); - - std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(CLI); - - return CallInfo.first; -} - // Expand a node into a call to a libcall. Similar to // ExpandLibCall except that the first operand is the in-chain. std::pair<SDValue, SDValue> @@ -2160,6 +2159,27 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, return ExpandLibCall(LC, Node, isSigned); } +/// Expand the node to a libcall based on first argument type (for instance +/// lround and its variant). +SDValue SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node, + RTLIB::Libcall Call_F32, + RTLIB::Libcall Call_F64, + RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_F128, + RTLIB::Libcall Call_PPCF128) { + RTLIB::Libcall LC; + switch (Node->getOperand(0).getValueType().getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected request for libcall!"); + case MVT::f32: LC = Call_F32; break; + case MVT::f64: LC = Call_F64; break; + case MVT::f80: LC = Call_F80; break; + case MVT::f128: LC = Call_F128; break; + case MVT::ppcf128: LC = Call_PPCF128; break; + } + + return ExpandLibCall(LC, Node, false); +} + /// Issue libcalls to __{u}divmod to compute div / rem pairs. void SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, @@ -2530,16 +2550,12 @@ SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, const SDLoc &dl) { // TODO: We can easily support i4/i2 legal types if any target ever does. if (Sz >= 8 && isPowerOf2_32(Sz)) { // Create the masks - repeating the pattern every byte. - APInt MaskHi4(Sz, 0), MaskHi2(Sz, 0), MaskHi1(Sz, 0); - APInt MaskLo4(Sz, 0), MaskLo2(Sz, 0), MaskLo1(Sz, 0); - for (unsigned J = 0; J != Sz; J += 8) { - MaskHi4 = MaskHi4 | (0xF0ull << J); - MaskLo4 = MaskLo4 | (0x0Full << J); - MaskHi2 = MaskHi2 | (0xCCull << J); - MaskLo2 = MaskLo2 | (0x33ull << J); - MaskHi1 = MaskHi1 | (0xAAull << J); - MaskLo1 = MaskLo1 | (0x55ull << J); - } + APInt MaskHi4 = APInt::getSplat(Sz, APInt(8, 0xF0)); + APInt MaskHi2 = APInt::getSplat(Sz, APInt(8, 0xCC)); + APInt MaskHi1 = APInt::getSplat(Sz, APInt(8, 0xAA)); + APInt MaskLo4 = APInt::getSplat(Sz, APInt(8, 0x0F)); + APInt MaskLo2 = APInt::getSplat(Sz, APInt(8, 0x33)); + APInt MaskLo1 = APInt::getSplat(Sz, APInt(8, 0x55)); // BSWAP if the type is wider than a single byte. Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op); @@ -2593,9 +2609,8 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, const SDLoc &dl) { switch (VT.getSimpleVT().getScalarType().SimpleTy) { default: llvm_unreachable("Unhandled Expand type in BSWAP!"); case MVT::i16: - Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); - Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); - return DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); + // Use a rotate by 8. This can be further expanded if necessary. + return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); case MVT::i32: Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); @@ -2799,12 +2814,27 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { } break; } + case ISD::STRICT_FP_ROUND: + Tmp1 = EmitStackConvert(Node->getOperand(1), + Node->getValueType(0), + Node->getValueType(0), dl, Node->getOperand(0)); + ReplaceNode(Node, Tmp1.getNode()); + LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_ROUND node\n"); + return true; case ISD::FP_ROUND: case ISD::BITCAST: - Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0), + Tmp1 = EmitStackConvert(Node->getOperand(0), + Node->getValueType(0), Node->getValueType(0), dl); Results.push_back(Tmp1); break; + case ISD::STRICT_FP_EXTEND: + Tmp1 = EmitStackConvert(Node->getOperand(1), + Node->getOperand(1).getValueType(), + Node->getValueType(0), dl, Node->getOperand(0)); + ReplaceNode(Node, Tmp1.getNode()); + LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_EXTEND node\n"); + return true; case ISD::FP_EXTEND: Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getOperand(0).getValueType(), @@ -2875,6 +2905,30 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { if (TLI.expandFP_TO_UINT(Node, Tmp1, DAG)) Results.push_back(Tmp1); break; + case ISD::LROUND: + Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LROUND_F32, + RTLIB::LROUND_F64, RTLIB::LROUND_F80, + RTLIB::LROUND_F128, + RTLIB::LROUND_PPCF128)); + break; + case ISD::LLROUND: + Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLROUND_F32, + RTLIB::LLROUND_F64, RTLIB::LLROUND_F80, + RTLIB::LLROUND_F128, + RTLIB::LLROUND_PPCF128)); + break; + case ISD::LRINT: + Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LRINT_F32, + RTLIB::LRINT_F64, RTLIB::LRINT_F80, + RTLIB::LRINT_F128, + RTLIB::LRINT_PPCF128)); + break; + case ISD::LLRINT: + Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLRINT_F32, + RTLIB::LLRINT_F64, RTLIB::LLRINT_F80, + RTLIB::LLRINT_F128, + RTLIB::LLRINT_PPCF128)); + break; case ISD::VAARG: Results.push_back(DAG.expandVAArg(Node)); Results.push_back(Results[0].getValue(1)); @@ -3117,7 +3171,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node); // Check to see if this FP immediate is already legal. // If this is a legal constant, turn it into a TargetConstantFP node. - if (!TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0))) + if (!TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0), + DAG.getMachineFunction().getFunction().hasOptSize())) Results.push_back(ExpandConstantFP(CFP, true)); break; } @@ -3291,176 +3346,75 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(TLI.expandAddSubSat(Node, DAG)); break; case ISD::SMULFIX: - Results.push_back(TLI.getExpandedFixedPointMultiplication(Node, DAG)); + case ISD::SMULFIXSAT: + case ISD::UMULFIX: + Results.push_back(TLI.expandFixedPointMul(Node, DAG)); break; - case ISD::SADDO: - case ISD::SSUBO: { + case ISD::ADDCARRY: + case ISD::SUBCARRY: { SDValue LHS = Node->getOperand(0); SDValue RHS = Node->getOperand(1); - SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ? - ISD::ADD : ISD::SUB, dl, LHS.getValueType(), - LHS, RHS); - Results.push_back(Sum); - EVT ResultType = Node->getValueType(1); - EVT OType = getSetCCResultType(Node->getValueType(0)); - - SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType()); - - // LHSSign -> LHS >= 0 - // RHSSign -> RHS >= 0 - // SumSign -> Sum >= 0 - // - // Add: - // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) - // Sub: - // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) - SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE); - SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE); - SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign, - Node->getOpcode() == ISD::SADDO ? - ISD::SETEQ : ISD::SETNE); - - SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE); - SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE); - - SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE); - Results.push_back(DAG.getBoolExtOrTrunc(Cmp, dl, ResultType, ResultType)); - break; - } - case ISD::UADDO: - case ISD::USUBO: { - SDValue LHS = Node->getOperand(0); - SDValue RHS = Node->getOperand(1); - bool IsAdd = Node->getOpcode() == ISD::UADDO; - // If ADD/SUBCARRY is legal, use that instead. - unsigned OpcCarry = IsAdd ? ISD::ADDCARRY : ISD::SUBCARRY; - if (TLI.isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) { - SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1)); - SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(), - { LHS, RHS, CarryIn }); - Results.push_back(SDValue(NodeCarry.getNode(), 0)); - Results.push_back(SDValue(NodeCarry.getNode(), 1)); - break; - } + SDValue Carry = Node->getOperand(2); + + bool IsAdd = Node->getOpcode() == ISD::ADDCARRY; - SDValue Sum = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl, - LHS.getValueType(), LHS, RHS); - Results.push_back(Sum); + // Initial add of the 2 operands. + unsigned Op = IsAdd ? ISD::ADD : ISD::SUB; + EVT VT = LHS.getValueType(); + SDValue Sum = DAG.getNode(Op, dl, VT, LHS, RHS); - EVT ResultType = Node->getValueType(1); + // Initial check for overflow. + EVT CarryType = Node->getValueType(1); EVT SetCCType = getSetCCResultType(Node->getValueType(0)); ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT; - SDValue SetCC = DAG.getSetCC(dl, SetCCType, Sum, LHS, CC); - - Results.push_back(DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType)); + SDValue Overflow = DAG.getSetCC(dl, SetCCType, Sum, LHS, CC); + + // Add of the sum and the carry. + SDValue CarryExt = + DAG.getZeroExtendInReg(DAG.getZExtOrTrunc(Carry, dl, VT), dl, MVT::i1); + SDValue Sum2 = DAG.getNode(Op, dl, VT, Sum, CarryExt); + + // Second check for overflow. If we are adding, we can only overflow if the + // initial sum is all 1s ang the carry is set, resulting in a new sum of 0. + // If we are subtracting, we can only overflow if the initial sum is 0 and + // the carry is set, resulting in a new sum of all 1s. + SDValue Zero = DAG.getConstant(0, dl, VT); + SDValue Overflow2 = + IsAdd ? DAG.getSetCC(dl, SetCCType, Sum2, Zero, ISD::SETEQ) + : DAG.getSetCC(dl, SetCCType, Sum, Zero, ISD::SETEQ); + Overflow2 = DAG.getNode(ISD::AND, dl, SetCCType, Overflow2, + DAG.getZExtOrTrunc(Carry, dl, SetCCType)); + + SDValue ResultCarry = + DAG.getNode(ISD::OR, dl, SetCCType, Overflow, Overflow2); + + Results.push_back(Sum2); + Results.push_back(DAG.getBoolExtOrTrunc(ResultCarry, dl, CarryType, VT)); + break; + } + case ISD::SADDO: + case ISD::SSUBO: { + SDValue Result, Overflow; + TLI.expandSADDSUBO(Node, Result, Overflow, DAG); + Results.push_back(Result); + Results.push_back(Overflow); + break; + } + case ISD::UADDO: + case ISD::USUBO: { + SDValue Result, Overflow; + TLI.expandUADDSUBO(Node, Result, Overflow, DAG); + Results.push_back(Result); + Results.push_back(Overflow); break; } case ISD::UMULO: case ISD::SMULO: { - EVT VT = Node->getValueType(0); - EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2); - SDValue LHS = Node->getOperand(0); - SDValue RHS = Node->getOperand(1); - SDValue BottomHalf; - SDValue TopHalf; - static const unsigned Ops[2][3] = - { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND }, - { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }}; - bool isSigned = Node->getOpcode() == ISD::SMULO; - if (TLI.isOperationLegalOrCustom(Ops[isSigned][0], VT)) { - BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS); - TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS); - } else if (TLI.isOperationLegalOrCustom(Ops[isSigned][1], VT)) { - BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS, - RHS); - TopHalf = BottomHalf.getValue(1); - } else if (TLI.isTypeLegal(WideVT)) { - LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS); - RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS); - Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS); - BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1, - DAG.getIntPtrConstant(0, dl)); - TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1, - DAG.getIntPtrConstant(1, dl)); - } else { - // We can fall back to a libcall with an illegal type for the MUL if we - // have a libcall big enough. - // Also, we can fall back to a division in some cases, but that's a big - // performance hit in the general case. - RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; - if (WideVT == MVT::i16) - LC = RTLIB::MUL_I16; - else if (WideVT == MVT::i32) - LC = RTLIB::MUL_I32; - else if (WideVT == MVT::i64) - LC = RTLIB::MUL_I64; - else if (WideVT == MVT::i128) - LC = RTLIB::MUL_I128; - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!"); - - SDValue HiLHS; - SDValue HiRHS; - if (isSigned) { - // The high part is obtained by SRA'ing all but one of the bits of low - // part. - unsigned LoSize = VT.getSizeInBits(); - HiLHS = - DAG.getNode(ISD::SRA, dl, VT, LHS, - DAG.getConstant(LoSize - 1, dl, - TLI.getPointerTy(DAG.getDataLayout()))); - HiRHS = - DAG.getNode(ISD::SRA, dl, VT, RHS, - DAG.getConstant(LoSize - 1, dl, - TLI.getPointerTy(DAG.getDataLayout()))); - } else { - HiLHS = DAG.getConstant(0, dl, VT); - HiRHS = DAG.getConstant(0, dl, VT); - } - - // Here we're passing the 2 arguments explicitly as 4 arguments that are - // pre-lowered to the correct types. This all depends upon WideVT not - // being a legal type for the architecture and thus has to be split to - // two arguments. - SDValue Ret; - if(DAG.getDataLayout().isLittleEndian()) { - // Halves of WideVT are packed into registers in different order - // depending on platform endianness. This is usually handled by - // the C calling convention, but we can't defer to it in - // the legalizer. - SDValue Args[] = { LHS, HiLHS, RHS, HiRHS }; - Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl); - } else { - SDValue Args[] = { HiLHS, LHS, HiRHS, RHS }; - Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl); - } - assert(Ret.getOpcode() == ISD::MERGE_VALUES && - "Ret value is a collection of constituent nodes holding result."); - BottomHalf = Ret.getOperand(0); - TopHalf = Ret.getOperand(1); + SDValue Result, Overflow; + if (TLI.expandMULO(Node, Result, Overflow, DAG)) { + Results.push_back(Result); + Results.push_back(Overflow); } - - if (isSigned) { - Tmp1 = DAG.getConstant( - VT.getSizeInBits() - 1, dl, - TLI.getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout())); - Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, Tmp1); - TopHalf = DAG.getSetCC(dl, getSetCCResultType(VT), TopHalf, Tmp1, - ISD::SETNE); - } else { - TopHalf = DAG.getSetCC(dl, getSetCCResultType(VT), TopHalf, - DAG.getConstant(0, dl, VT), ISD::SETNE); - } - - // Truncate the result if SetCC returns a larger type than needed. - EVT RType = Node->getValueType(1); - if (RType.getSizeInBits() < TopHalf.getValueSizeInBits()) - TopHalf = DAG.getNode(ISD::TRUNCATE, dl, RType, TopHalf); - - assert(RType.getSizeInBits() == TopHalf.getValueSizeInBits() && - "Unexpected result type for S/UMULO legalization"); - - Results.push_back(BottomHalf); - Results.push_back(TopHalf); break; } case ISD::BUILD_PAIR: { @@ -3487,6 +3441,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { DAG.getConstant(0, dl, Tmp1.getValueType()), Tmp2, Tmp3, ISD::SETNE); } + Tmp1->setFlags(Node->getFlags()); Results.push_back(Tmp1); break; case ISD::BR_JT: { @@ -3570,7 +3525,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { // condition code, create a new SETCC node. if (Tmp3.getNode()) Tmp1 = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), - Tmp1, Tmp2, Tmp3); + Tmp1, Tmp2, Tmp3, Node->getFlags()); // If we expanded the SETCC by inverting the condition code, then wrap // the existing SETCC in a NOT to restore the intended condition. @@ -3598,6 +3553,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { DAG.getConstant(TrueValue, dl, VT), DAG.getConstant(0, dl, VT), Tmp3); + Tmp1->setFlags(Node->getFlags()); Results.push_back(Tmp1); break; } @@ -3617,9 +3573,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { assert(!TLI.isOperationExpand(ISD::SELECT, VT) && "Cannot expand ISD::SELECT_CC when ISD::SELECT also needs to be " "expanded."); - EVT CCVT = - TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT); - SDValue Cond = DAG.getNode(ISD::SETCC, dl, CCVT, Tmp1, Tmp2, CC); + EVT CCVT = getSetCCResultType(CmpVT); + SDValue Cond = DAG.getNode(ISD::SETCC, dl, CCVT, Tmp1, Tmp2, CC, Node->getFlags()); Results.push_back(DAG.getSelect(dl, VT, Cond, Tmp3, Tmp4)); break; } @@ -3635,6 +3590,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { // Use the new condition code and swap true and false Legalized = true; Tmp1 = DAG.getSelectCC(dl, Tmp1, Tmp2, Tmp4, Tmp3, InvCC); + Tmp1->setFlags(Node->getFlags()); } else { // If The inverse is not legal, then try to swap the arguments using // the inverse condition code. @@ -3644,6 +3600,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { // lhs and rhs. Legalized = true; Tmp1 = DAG.getSelectCC(dl, Tmp2, Tmp1, Tmp4, Tmp3, SwapInvCC); + Tmp1->setFlags(Node->getFlags()); } } @@ -3670,6 +3627,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2, Tmp3, Tmp4, CC); } + Tmp1->setFlags(Node->getFlags()); } Results.push_back(Tmp1); break; @@ -3729,6 +3687,21 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { ReplaceNode(SDValue(Node, 0), Result); break; } + case ISD::VECREDUCE_FADD: + case ISD::VECREDUCE_FMUL: + case ISD::VECREDUCE_ADD: + case ISD::VECREDUCE_MUL: + case ISD::VECREDUCE_AND: + case ISD::VECREDUCE_OR: + case ISD::VECREDUCE_XOR: + case ISD::VECREDUCE_SMAX: + case ISD::VECREDUCE_SMIN: + case ISD::VECREDUCE_UMAX: + case ISD::VECREDUCE_UMIN: + case ISD::VECREDUCE_FMAX: + case ISD::VECREDUCE_FMIN: + Results.push_back(TLI.expandVecReduce(Node, DAG)); + break; case ISD::GLOBAL_OFFSET_TABLE: case ISD::GlobalAddress: case ISD::GlobalTLSAddress: @@ -4273,6 +4246,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Tmp3 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2)); // Perform the larger operation, then round down. Tmp1 = DAG.getSelect(dl, NVT, Tmp1, Tmp2, Tmp3); + Tmp1->setFlags(Node->getFlags()); if (TruncOp != ISD::FP_ROUND) Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1); else @@ -4303,8 +4277,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { } Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1)); - Results.push_back(DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), - Tmp1, Tmp2, Node->getOperand(2))); + Results.push_back(DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), Tmp1, + Tmp2, Node->getOperand(2), Node->getFlags())); break; } case ISD::BR_CC: { @@ -4532,6 +4506,24 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Results.push_back(CvtVec); break; } + case ISD::ATOMIC_SWAP: { + AtomicSDNode *AM = cast<AtomicSDNode>(Node); + SDLoc SL(Node); + SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, NVT, AM->getVal()); + assert(NVT.getSizeInBits() == OVT.getSizeInBits() && + "unexpected promotion type"); + assert(AM->getMemoryVT().getSizeInBits() == NVT.getSizeInBits() && + "unexpected atomic_swap with illegal type"); + + SDValue NewAtomic + = DAG.getAtomic(ISD::ATOMIC_SWAP, SL, NVT, + DAG.getVTList(NVT, MVT::Other), + { AM->getChain(), AM->getBasePtr(), CastVal }, + AM->getMemOperand()); + Results.push_back(DAG.getNode(ISD::BITCAST, SL, OVT, NewAtomic)); + Results.push_back(NewAtomic.getValue(1)); + break; + } } // Replace the original node with the legalized result. diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 4644e9588e7b..b4849b2881e6 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -1,9 +1,8 @@ //===-------- LegalizeFloatTypes.cpp - Legalization of float types --------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -104,6 +103,7 @@ bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break; case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break; case ISD::LOAD: R = SoftenFloatRes_LOAD(N, ResNo); break; + case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break; case ISD::SELECT: R = SoftenFloatRes_SELECT(N, ResNo); break; case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N, ResNo); break; case ISD::SINT_TO_FP: @@ -440,6 +440,15 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo) { return SDValue(N, ResNo); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDLoc dl(N); + + EVT FloatVT = N->getValueType(ResNo); + if (FloatVT == MVT::f32 || FloatVT == MVT::f64 || FloatVT == MVT::f128) { + // Expand Y = FNEG(X) -> Y = X ^ sign mask + APInt SignMask = APInt::getSignMask(NVT.getSizeInBits()); + return DAG.getNode(ISD::XOR, dl, NVT, GetSoftenedFloat(N->getOperand(0)), + DAG.getConstant(SignMask, dl, NVT)); + } + // Expand Y = FNEG(X) -> Y = SUB -0.0, X SDValue Ops[2] = { DAG.getConstantFP(-0.0, dl, N->getValueType(0)), GetSoftenedFloat(N->getOperand(0)) }; @@ -763,6 +772,10 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break; case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_XINT(N); break; + case ISD::LROUND: Res = SoftenFloatOp_LROUND(N); break; + case ISD::LLROUND: Res = SoftenFloatOp_LLROUND(N); break; + case ISD::LRINT: Res = SoftenFloatOp_LRINT(N); break; + case ISD::LLRINT: Res = SoftenFloatOp_LLRINT(N); break; case ISD::SELECT: Res = SoftenFloatOp_SELECT(N); break; case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break; case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break; @@ -1029,6 +1042,61 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { ST->getMemOperand()); } +SDValue DAGTypeLegalizer::SoftenFloatOp_LROUND(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, + RTLIB::LROUND_F32, + RTLIB::LROUND_F64, + RTLIB::LROUND_F80, + RTLIB::LROUND_F128, + RTLIB::LROUND_PPCF128), + NVT, Op, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, + RTLIB::LLROUND_F32, + RTLIB::LLROUND_F64, + RTLIB::LLROUND_F80, + RTLIB::LLROUND_F128, + RTLIB::LLROUND_PPCF128), + NVT, Op, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_LRINT(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, + RTLIB::LRINT_F32, + RTLIB::LRINT_F64, + RTLIB::LRINT_F80, + RTLIB::LRINT_F128, + RTLIB::LRINT_PPCF128), + NVT, Op, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, + RTLIB::LLRINT_F32, + RTLIB::LLRINT_F64, + RTLIB::LLRINT_F80, + RTLIB::LLRINT_F128, + RTLIB::LLRINT_PPCF128), + NVT, Op, false, SDLoc(N)).first; +} //===----------------------------------------------------------------------===// // Float Result Expansion @@ -1562,6 +1630,10 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break; case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break; case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break; + case ISD::LROUND: Res = ExpandFloatOp_LROUND(N); break; + case ISD::LLROUND: Res = ExpandFloatOp_LLROUND(N); break; + case ISD::LRINT: Res = ExpandFloatOp_LRINT(N); break; + case ISD::LLRINT: Res = ExpandFloatOp_LLRINT(N); break; case ISD::SELECT_CC: Res = ExpandFloatOp_SELECT_CC(N); break; case ISD::SETCC: Res = ExpandFloatOp_SETCC(N); break; case ISD::STORE: Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N), @@ -1732,6 +1804,54 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) { ST->getMemoryVT(), ST->getMemOperand()); } +SDValue DAGTypeLegalizer::ExpandFloatOp_LROUND(SDNode *N) { + EVT RVT = N->getValueType(0); + EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, + RTLIB::LROUND_F32, + RTLIB::LROUND_F64, + RTLIB::LROUND_F80, + RTLIB::LROUND_F128, + RTLIB::LROUND_PPCF128), + RVT, N->getOperand(0), false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_LLROUND(SDNode *N) { + EVT RVT = N->getValueType(0); + EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, + RTLIB::LLROUND_F32, + RTLIB::LLROUND_F64, + RTLIB::LLROUND_F80, + RTLIB::LLROUND_F128, + RTLIB::LLROUND_PPCF128), + RVT, N->getOperand(0), false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_LRINT(SDNode *N) { + EVT RVT = N->getValueType(0); + EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, + RTLIB::LRINT_F32, + RTLIB::LRINT_F64, + RTLIB::LRINT_F80, + RTLIB::LRINT_F128, + RTLIB::LRINT_PPCF128), + RVT, N->getOperand(0), false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_LLRINT(SDNode *N) { + EVT RVT = N->getValueType(0); + EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, + RTLIB::LLRINT_F32, + RTLIB::LLRINT_F64, + RTLIB::LLRINT_F80, + RTLIB::LLRINT_F128, + RTLIB::LLRINT_PPCF128), + RVT, N->getOperand(0), false, SDLoc(N)).first; +} + //===----------------------------------------------------------------------===// // Float Operand Promotion //===----------------------------------------------------------------------===// @@ -1748,6 +1868,8 @@ static ISD::NodeType GetPromotionOpcode(EVT OpVT, EVT RetVT) { } bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) { + LLVM_DEBUG(dbgs() << "Promote float operand " << OpNo << ": "; N->dump(&DAG); + dbgs() << "\n"); SDValue R = SDValue(); if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) { @@ -1762,6 +1884,10 @@ bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) { // a part of PromoteFloatResult. switch (N->getOpcode()) { default: + #ifndef NDEBUG + dbgs() << "PromoteFloatOperand Op #" << OpNo << ": "; + N->dump(&DAG); dbgs() << "\n"; + #endif llvm_unreachable("Do not know how to promote this operator's operand!"); case ISD::BITCAST: R = PromoteFloatOp_BITCAST(N, OpNo); break; @@ -1872,6 +1998,8 @@ SDValue DAGTypeLegalizer::PromoteFloatOp_STORE(SDNode *N, unsigned OpNo) { //===----------------------------------------------------------------------===// void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { + LLVM_DEBUG(dbgs() << "Promote float result " << ResNo << ": "; N->dump(&DAG); + dbgs() << "\n"); SDValue R = SDValue(); switch (N->getOpcode()) { @@ -1880,6 +2008,10 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { case ISD::FP16_TO_FP: case ISD::FP_TO_FP16: default: +#ifndef NDEBUG + dbgs() << "PromoteFloatResult #" << ResNo << ": "; + N->dump(&DAG); dbgs() << "\n"; +#endif llvm_unreachable("Do not know how to promote this operator's result!"); case ISD::BITCAST: R = PromoteFloatRes_BITCAST(N); break; @@ -1932,7 +2064,7 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: R = PromoteFloatRes_XINT_TO_FP(N); break; case ISD::UNDEF: R = PromoteFloatRes_UNDEF(N); break; - + case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break; } if (R.getNode()) @@ -2166,3 +2298,29 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_UNDEF(SDNode *N) { N->getValueType(0))); } +SDValue DAGTypeLegalizer::BitcastToInt_ATOMIC_SWAP(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + + AtomicSDNode *AM = cast<AtomicSDNode>(N); + SDLoc SL(N); + + SDValue CastVal = BitConvertToInteger(AM->getVal()); + EVT CastVT = CastVal.getValueType(); + + SDValue NewAtomic + = DAG.getAtomic(ISD::ATOMIC_SWAP, SL, CastVT, + DAG.getVTList(CastVT, MVT::Other), + { AM->getChain(), AM->getBasePtr(), CastVal }, + AM->getMemOperand()); + + SDValue ResultCast = DAG.getNode(GetPromotionOpcode(VT, NFPVT), SL, NFPVT, + NewAtomic); + // Legalize the chain result by replacing uses of the old value chain with the + // new one + ReplaceValueWith(SDValue(N, 1), NewAtomic.getValue(1)); + + return ResultCast; + +} + diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 5fbc70fce60d..15ac45c37c66 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1,9 +1,8 @@ //===----- LegalizeIntegerTypes.cpp - Legalization of integer types -------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -149,7 +148,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::UADDSAT: case ISD::SSUBSAT: case ISD::USUBSAT: Res = PromoteIntRes_ADDSUBSAT(N); break; - case ISD::SMULFIX: Res = PromoteIntRes_SMULFIX(N); break; + case ISD::SMULFIX: + case ISD::SMULFIXSAT: + case ISD::UMULFIX: Res = PromoteIntRes_MULFIX(N); break; + case ISD::ABS: Res = PromoteIntRes_ABS(N); break; case ISD::ATOMIC_LOAD: Res = PromoteIntRes_Atomic0(cast<AtomicSDNode>(N)); break; @@ -172,6 +174,18 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: Res = PromoteIntRes_AtomicCmpSwap(cast<AtomicSDNode>(N), ResNo); break; + + case ISD::VECREDUCE_ADD: + case ISD::VECREDUCE_MUL: + case ISD::VECREDUCE_AND: + case ISD::VECREDUCE_OR: + case ISD::VECREDUCE_XOR: + case ISD::VECREDUCE_SMAX: + case ISD::VECREDUCE_SMIN: + case ISD::VECREDUCE_UMAX: + case ISD::VECREDUCE_UMIN: + Res = PromoteIntRes_VECREDUCE(N); + break; } // If the result is null then the sub-method took care of registering it. @@ -293,21 +307,24 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { BitConvertToInteger(GetScalarizedVector(InOp))); break; case TargetLowering::TypeSplitVector: { - // For example, i32 = BITCAST v2i16 on alpha. Convert the split - // pieces of the input into integers and reassemble in the final type. - SDValue Lo, Hi; - GetSplitVector(N->getOperand(0), Lo, Hi); - Lo = BitConvertToInteger(Lo); - Hi = BitConvertToInteger(Hi); - - if (DAG.getDataLayout().isBigEndian()) - std::swap(Lo, Hi); - - InOp = DAG.getNode(ISD::ANY_EXTEND, dl, - EVT::getIntegerVT(*DAG.getContext(), - NOutVT.getSizeInBits()), - JoinIntegers(Lo, Hi)); - return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp); + if (!NOutVT.isVector()) { + // For example, i32 = BITCAST v2i16 on alpha. Convert the split + // pieces of the input into integers and reassemble in the final type. + SDValue Lo, Hi; + GetSplitVector(N->getOperand(0), Lo, Hi); + Lo = BitConvertToInteger(Lo); + Hi = BitConvertToInteger(Hi); + + if (DAG.getDataLayout().isBigEndian()) + std::swap(Lo, Hi); + + InOp = DAG.getNode(ISD::ANY_EXTEND, dl, + EVT::getIntegerVT(*DAG.getContext(), + NOutVT.getSizeInBits()), + JoinIntegers(Lo, Hi)); + return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp); + } + break; } case TargetLowering::TypeWidenVector: // The input is widened to the same size. Convert to the widened value. @@ -555,7 +572,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) { SDLoc dl(N); SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(), N->getMask(), ExtPassThru, N->getMemoryVT(), - N->getMemOperand(), ISD::SEXTLOAD); + N->getMemOperand(), ISD::EXTLOAD); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); @@ -582,23 +599,27 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) { /// Promote the overflow flag of an overflowing arithmetic node. SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) { - // Simply change the return type of the boolean result. + // Change the return type of the boolean result while obeying + // getSetCCResultType. EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1)); - EVT ValueVTs[] = { N->getValueType(0), NVT }; + EVT VT = N->getValueType(0); + EVT SVT = getSetCCResultType(VT); SDValue Ops[3] = { N->getOperand(0), N->getOperand(1) }; unsigned NumOps = N->getNumOperands(); assert(NumOps <= 3 && "Too many operands"); if (NumOps == 3) Ops[2] = N->getOperand(2); - SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N), - DAG.getVTList(ValueVTs), makeArrayRef(Ops, NumOps)); + SDLoc dl(N); + SDValue Res = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(VT, SVT), + makeArrayRef(Ops, NumOps)); // Modified the sum result - switch anything that used the old sum to use // the new one. ReplaceValueWith(SDValue(N, 0), Res); - return SDValue(Res.getNode(), 1); + // Convert to the expected type. + return DAG.getBoolExtOrTrunc(Res.getValue(1), dl, NVT, VT); } SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSAT(SDNode *N) { @@ -646,12 +667,39 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSAT(SDNode *N) { return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount); } -SDValue DAGTypeLegalizer::PromoteIntRes_SMULFIX(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) { // Can just promote the operands then continue with operation. SDLoc dl(N); - SDValue Op1Promoted = SExtPromotedInteger(N->getOperand(0)); - SDValue Op2Promoted = SExtPromotedInteger(N->getOperand(1)); + SDValue Op1Promoted, Op2Promoted; + bool Signed = + N->getOpcode() == ISD::SMULFIX || N->getOpcode() == ISD::SMULFIXSAT; + if (Signed) { + Op1Promoted = SExtPromotedInteger(N->getOperand(0)); + Op2Promoted = SExtPromotedInteger(N->getOperand(1)); + } else { + Op1Promoted = ZExtPromotedInteger(N->getOperand(0)); + Op2Promoted = ZExtPromotedInteger(N->getOperand(1)); + } + EVT OldType = N->getOperand(0).getValueType(); EVT PromotedType = Op1Promoted.getValueType(); + unsigned DiffSize = + PromotedType.getScalarSizeInBits() - OldType.getScalarSizeInBits(); + + bool Saturating = N->getOpcode() == ISD::SMULFIXSAT; + if (Saturating) { + // Promoting the operand and result values changes the saturation width, + // which is extends the values that we clamp to on saturation. This could be + // resolved by shifting one of the operands the same amount, which would + // also shift the result we compare against, then shifting back. + EVT ShiftTy = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout()); + Op1Promoted = DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, + DAG.getConstant(DiffSize, dl, ShiftTy)); + SDValue Result = DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted, + Op2Promoted, N->getOperand(2)); + unsigned ShiftOp = Signed ? ISD::SRA : ISD::SRL; + return DAG.getNode(ShiftOp, dl, PromotedType, Result, + DAG.getConstant(DiffSize, dl, ShiftTy)); + } return DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted, Op2Promoted, N->getOperand(2)); } @@ -875,7 +923,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) { // Calculate the overflow flag: zero extend the arithmetic result from // the original type. - SDValue Ofl = DAG.getZeroExtendInReg(Res, dl, OVT); + SDValue Ofl = DAG.getZeroExtendInReg(Res, dl, OVT.getScalarType()); // Overflowed if and only if this is not equal to Res. Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE); @@ -917,6 +965,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo) { return SDValue(Res.getNode(), 0); } +SDValue DAGTypeLegalizer::PromoteIntRes_ABS(SDNode *N) { + SDValue Op0 = SExtPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::ABS, SDLoc(N), Op0.getValueType(), Op0); +} + SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { // Promote the overflow bit trivially. if (ResNo == 1) @@ -946,9 +999,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { SDValue Overflow; if (N->getOpcode() == ISD::UMULO) { // Unsigned overflow occurred if the high part is non-zero. + unsigned Shift = SmallVT.getScalarSizeInBits(); + EVT ShiftTy = getShiftAmountTyForConstant(Shift, Mul.getValueType(), + TLI, DAG); SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul, - DAG.getIntPtrConstant(SmallVT.getSizeInBits(), - DL)); + DAG.getConstant(Shift, DL, ShiftTy)); Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi, DAG.getConstant(0, DL, Hi.getValueType()), ISD::SETNE); @@ -1091,7 +1146,21 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::PREFETCH: Res = PromoteIntOp_PREFETCH(N, OpNo); break; - case ISD::SMULFIX: Res = PromoteIntOp_SMULFIX(N); break; + case ISD::SMULFIX: + case ISD::SMULFIXSAT: + case ISD::UMULFIX: Res = PromoteIntOp_MULFIX(N); break; + + case ISD::FPOWI: Res = PromoteIntOp_FPOWI(N); break; + + case ISD::VECREDUCE_ADD: + case ISD::VECREDUCE_MUL: + case ISD::VECREDUCE_AND: + case ISD::VECREDUCE_OR: + case ISD::VECREDUCE_XOR: + case ISD::VECREDUCE_SMAX: + case ISD::VECREDUCE_SMIN: + case ISD::VECREDUCE_UMAX: + case ISD::VECREDUCE_UMIN: Res = PromoteIntOp_VECREDUCE(N); break; } // If the result is null, the sub-method took care of registering results etc. @@ -1434,24 +1503,12 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo) { SDValue Carry = N->getOperand(2); SDLoc DL(N); - auto VT = getSetCCResultType(LHS.getValueType()); - TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(VT); - switch (BoolType) { - case TargetLoweringBase::UndefinedBooleanContent: - Carry = DAG.getAnyExtOrTrunc(Carry, DL, VT); - break; - case TargetLoweringBase::ZeroOrOneBooleanContent: - Carry = DAG.getZExtOrTrunc(Carry, DL, VT); - break; - case TargetLoweringBase::ZeroOrNegativeOneBooleanContent: - Carry = DAG.getSExtOrTrunc(Carry, DL, VT); - break; - } + Carry = PromoteTargetBoolean(Carry, LHS.getValueType()); return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, Carry), 0); } -SDValue DAGTypeLegalizer::PromoteIntOp_SMULFIX(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntOp_MULFIX(SDNode *N) { SDValue Op2 = ZExtPromotedInteger(N->getOperand(2)); return SDValue( DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1), Op2), 0); @@ -1475,6 +1532,44 @@ SDValue DAGTypeLegalizer::PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo) { 0); } +SDValue DAGTypeLegalizer::PromoteIntOp_FPOWI(SDNode *N) { + SDValue Op = SExtPromotedInteger(N->getOperand(1)); + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) { + SDLoc dl(N); + SDValue Op; + switch (N->getOpcode()) { + default: llvm_unreachable("Expected integer vector reduction"); + case ISD::VECREDUCE_ADD: + case ISD::VECREDUCE_MUL: + case ISD::VECREDUCE_AND: + case ISD::VECREDUCE_OR: + case ISD::VECREDUCE_XOR: + Op = GetPromotedInteger(N->getOperand(0)); + break; + case ISD::VECREDUCE_SMAX: + case ISD::VECREDUCE_SMIN: + Op = SExtPromotedInteger(N->getOperand(0)); + break; + case ISD::VECREDUCE_UMAX: + case ISD::VECREDUCE_UMIN: + Op = ZExtPromotedInteger(N->getOperand(0)); + break; + } + + EVT EltVT = Op.getValueType().getVectorElementType(); + EVT VT = N->getValueType(0); + if (VT.bitsGE(EltVT)) + return DAG.getNode(N->getOpcode(), SDLoc(N), VT, Op); + + // Result size must be >= element size. If this is not the case after + // promotion, also promote the result type and then truncate. + SDValue Reduce = DAG.getNode(N->getOpcode(), dl, EltVT, Op); + return DAG.getNode(ISD::TRUNCATE, dl, VT, Reduce); +} + //===----------------------------------------------------------------------===// // Integer Result Expansion //===----------------------------------------------------------------------===// @@ -1499,7 +1594,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { dbgs() << "ExpandIntegerResult #" << ResNo << ": "; N->dump(&DAG); dbgs() << "\n"; #endif - llvm_unreachable("Do not know how to expand the result of this operator!"); + report_fatal_error("Do not know how to expand the result of this " + "operator!"); case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break; case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; @@ -1518,6 +1614,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::BITREVERSE: ExpandIntRes_BITREVERSE(N, Lo, Hi); break; case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break; case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break; + case ISD::ABS: ExpandIntRes_ABS(N, Lo, Hi); break; case ISD::CTLZ_ZERO_UNDEF: case ISD::CTLZ: ExpandIntRes_CTLZ(N, Lo, Hi); break; case ISD::CTPOP: ExpandIntRes_CTPOP(N, Lo, Hi); break; @@ -1526,6 +1623,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::FLT_ROUNDS_: ExpandIntRes_FLT_ROUNDS(N, Lo, Hi); break; case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break; case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break; + case ISD::LLROUND: ExpandIntRes_LLROUND(N, Lo, Hi); break; + case ISD::LLRINT: ExpandIntRes_LLRINT(N, Lo, Hi); break; case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break; case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break; case ISD::READCYCLECOUNTER: ExpandIntRes_READCYCLECOUNTER(N, Lo, Hi); break; @@ -1613,7 +1712,20 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::UADDSAT: case ISD::SSUBSAT: case ISD::USUBSAT: ExpandIntRes_ADDSUBSAT(N, Lo, Hi); break; - case ISD::SMULFIX: ExpandIntRes_SMULFIX(N, Lo, Hi); break; + + case ISD::SMULFIX: + case ISD::SMULFIXSAT: + case ISD::UMULFIX: ExpandIntRes_MULFIX(N, Lo, Hi); break; + + case ISD::VECREDUCE_ADD: + case ISD::VECREDUCE_MUL: + case ISD::VECREDUCE_AND: + case ISD::VECREDUCE_OR: + case ISD::VECREDUCE_XOR: + case ISD::VECREDUCE_SMAX: + case ISD::VECREDUCE_SMIN: + case ISD::VECREDUCE_UMAX: + case ISD::VECREDUCE_UMIN: ExpandIntRes_VECREDUCE(N, Lo, Hi); break; } // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -2267,6 +2379,25 @@ void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N, IsOpaque); } +void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) { + SDLoc dl(N); + + // abs(HiLo) -> (Hi < 0 ? -HiLo : HiLo) + EVT VT = N->getValueType(0); + SDValue N0 = N->getOperand(0); + SDValue Neg = DAG.getNode(ISD::SUB, dl, VT, + DAG.getConstant(0, dl, VT), N0); + SDValue NegLo, NegHi; + SplitInteger(Neg, NegLo, NegHi); + + GetExpandedInteger(N0, Lo, Hi); + EVT NVT = Lo.getValueType(); + SDValue HiIsNeg = DAG.getSetCC(dl, getSetCCResultType(NVT), + DAG.getConstant(0, dl, NVT), Hi, ISD::SETGT); + Lo = DAG.getSelect(dl, NVT, HiIsNeg, NegLo, Lo); + Hi = DAG.getSelect(dl, NVT, HiIsNeg, NegHi, Hi); +} + void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); @@ -2361,6 +2492,58 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, Lo, Hi); } +void DAGTypeLegalizer::ExpandIntRes_LLROUND(SDNode *N, SDValue &Lo, + SDValue &Hi) { + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + EVT VT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + if (VT == MVT::f32) + LC = RTLIB::LLROUND_F32; + else if (VT == MVT::f64) + LC = RTLIB::LLROUND_F64; + else if (VT == MVT::f80) + LC = RTLIB::LLROUND_F80; + else if (VT == MVT::f128) + LC = RTLIB::LLROUND_F128; + else if (VT == MVT::ppcf128) + LC = RTLIB::LLROUND_PPCF128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llround input type!"); + + SDValue Op = N->getOperand(0); + if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) + Op = GetPromotedFloat(Op); + + SDLoc dl(N); + EVT RetVT = N->getValueType(0); + SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, true/*irrelevant*/, dl).first, + Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_LLRINT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + EVT VT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + if (VT == MVT::f32) + LC = RTLIB::LLRINT_F32; + else if (VT == MVT::f64) + LC = RTLIB::LLRINT_F64; + else if (VT == MVT::f80) + LC = RTLIB::LLRINT_F80; + else if (VT == MVT::f128) + LC = RTLIB::LLRINT_F128; + else if (VT == MVT::ppcf128) + LC = RTLIB::LLRINT_PPCF128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llrint input type!"); + + SDValue Op = N->getOperand(0); + if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) + Op = GetPromotedFloat(Op); + + SDLoc dl(N); + EVT RetVT = N->getValueType(0); + SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, true/*irrelevant*/, dl).first, + Lo, Hi); +} + void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi) { if (ISD::isNormalLoad(N)) { @@ -2581,15 +2764,39 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBSAT(SDNode *N, SDValue &Lo, SplitInteger(Result, Lo, Hi); } -void DAGTypeLegalizer::ExpandIntRes_SMULFIX(SDNode *N, SDValue &Lo, - SDValue &Hi) { +/// This performs an expansion of the integer result for a fixed point +/// multiplication. The default expansion performs rounding down towards +/// negative infinity, though targets that do care about rounding should specify +/// a target hook for rounding and provide their own expansion or lowering of +/// fixed point multiplication to be consistent with rounding. +void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo, + SDValue &Hi) { SDLoc dl(N); EVT VT = N->getValueType(0); + unsigned VTSize = VT.getScalarSizeInBits(); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); uint64_t Scale = N->getConstantOperandVal(2); + bool Saturating = N->getOpcode() == ISD::SMULFIXSAT; + EVT BoolVT = getSetCCResultType(VT); + SDValue Zero = DAG.getConstant(0, dl, VT); if (!Scale) { - SDValue Result = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS); + SDValue Result; + if (!Saturating) { + Result = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS); + } else { + Result = DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS); + SDValue Product = Result.getValue(0); + SDValue Overflow = Result.getValue(1); + + APInt MinVal = APInt::getSignedMinValue(VTSize); + APInt MaxVal = APInt::getSignedMaxValue(VTSize); + SDValue SatMin = DAG.getConstant(MinVal, dl, VT); + SDValue SatMax = DAG.getConstant(MaxVal, dl, VT); + SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT); + Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin); + Result = DAG.getSelect(dl, VT, Overflow, Result, Product); + } SplitInteger(Result, Lo, Hi); return; } @@ -2600,15 +2807,19 @@ void DAGTypeLegalizer::ExpandIntRes_SMULFIX(SDNode *N, SDValue &Lo, GetExpandedInteger(RHS, RL, RH); SmallVector<SDValue, 4> Result; - if (!TLI.expandMUL_LOHI(ISD::SMUL_LOHI, VT, dl, LHS, RHS, Result, NVT, DAG, + bool Signed = (N->getOpcode() == ISD::SMULFIX || + N->getOpcode() == ISD::SMULFIXSAT); + unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI; + if (!TLI.expandMUL_LOHI(LoHiOp, VT, dl, LHS, RHS, Result, NVT, DAG, TargetLowering::MulExpansionKind::OnlyLegalOrCustom, LL, LH, RL, RH)) { - report_fatal_error("Unable to expand SMUL_FIX using SMUL_LOHI."); + report_fatal_error("Unable to expand MUL_FIX using MUL_LOHI."); return; } - unsigned VTSize = VT.getScalarSizeInBits(); unsigned NVTSize = NVT.getScalarSizeInBits(); + assert((VTSize == NVTSize * 2) && "Expected the new value type to be half " + "the size of the current value type"); EVT ShiftTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout()); // Shift whole amount by scale. @@ -2617,6 +2828,11 @@ void DAGTypeLegalizer::ExpandIntRes_SMULFIX(SDNode *N, SDValue &Lo, SDValue ResultHL = Result[2]; SDValue ResultHH = Result[3]; + SDValue SatMax, SatMin; + SDValue NVTZero = DAG.getConstant(0, dl, NVT); + SDValue NVTNeg1 = DAG.getConstant(-1, dl, NVT); + EVT BoolNVT = getSetCCResultType(NVT); + // After getting the multplication result in 4 parts, we need to perform a // shift right by the amount of the scale to get the result in that scale. // Let's say we multiply 2 64 bit numbers. The resulting value can be held in @@ -2645,11 +2861,60 @@ void DAGTypeLegalizer::ExpandIntRes_SMULFIX(SDNode *N, SDValue &Lo, Hi = DAG.getNode(ISD::SRL, dl, NVT, ResultLH, SRLAmnt); Hi = DAG.getNode(ISD::OR, dl, NVT, Hi, DAG.getNode(ISD::SHL, dl, NVT, ResultHL, SHLAmnt)); + + // We cannot overflow past HH when multiplying 2 ints of size VTSize, so the + // highest bit of HH determines saturation direction in the event of + // saturation. + // The number of overflow bits we can check are VTSize - Scale + 1 (we + // include the sign bit). If these top bits are > 0, then we overflowed past + // the max value. If these top bits are < -1, then we overflowed past the + // min value. Otherwise, we did not overflow. + if (Saturating) { + unsigned OverflowBits = VTSize - Scale + 1; + assert(OverflowBits <= VTSize && OverflowBits > NVTSize && + "Extent of overflow bits must start within HL"); + SDValue HLHiMask = DAG.getConstant( + APInt::getHighBitsSet(NVTSize, OverflowBits - NVTSize), dl, NVT); + SDValue HLLoMask = DAG.getConstant( + APInt::getLowBitsSet(NVTSize, VTSize - OverflowBits), dl, NVT); + + // HH > 0 or HH == 0 && HL > HLLoMask + SDValue HHPos = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT); + SDValue HHZero = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ); + SDValue HLPos = + DAG.getSetCC(dl, BoolNVT, ResultHL, HLLoMask, ISD::SETUGT); + SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHPos, + DAG.getNode(ISD::AND, dl, BoolNVT, HHZero, HLPos)); + + // HH < -1 or HH == -1 && HL < HLHiMask + SDValue HHNeg = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT); + SDValue HHNeg1 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ); + SDValue HLNeg = + DAG.getSetCC(dl, BoolNVT, ResultHL, HLHiMask, ISD::SETULT); + SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHNeg, + DAG.getNode(ISD::AND, dl, BoolNVT, HHNeg1, HLNeg)); + } } else if (Scale == NVTSize) { // If the scales are equal, Lo and Hi are ResultLH and Result HL, // respectively. Avoid shifting to prevent undefined behavior. Lo = ResultLH; Hi = ResultHL; + + // We overflow max if HH > 0 or HH == 0 && HL sign bit is 1. + // We overflow min if HH < -1 or HH == -1 && HL sign bit is 0. + if (Saturating) { + SDValue HHPos = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT); + SDValue HHZero = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ); + SDValue HLNeg = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETLT); + SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHPos, + DAG.getNode(ISD::AND, dl, BoolNVT, HHZero, HLNeg)); + + SDValue HHNeg = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT); + SDValue HHNeg1 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ); + SDValue HLPos = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETGE); + SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHNeg, + DAG.getNode(ISD::AND, dl, BoolNVT, HHNeg1, HLPos)); + } } else if (Scale < VTSize) { // If the scale is instead less than the old VT size, but greater than or // equal to the expanded VT size, the first part of the result (ResultLL) is @@ -2664,9 +2929,39 @@ void DAGTypeLegalizer::ExpandIntRes_SMULFIX(SDNode *N, SDValue &Lo, Hi = DAG.getNode(ISD::SRL, dl, NVT, ResultHL, SRLAmnt); Hi = DAG.getNode(ISD::OR, dl, NVT, Hi, DAG.getNode(ISD::SHL, dl, NVT, ResultHH, SHLAmnt)); + + // This is similar to the case when we saturate if Scale < NVTSize, but we + // only need to chech HH. + if (Saturating) { + unsigned OverflowBits = VTSize - Scale + 1; + SDValue HHHiMask = DAG.getConstant( + APInt::getHighBitsSet(NVTSize, OverflowBits), dl, NVT); + SDValue HHLoMask = DAG.getConstant( + APInt::getLowBitsSet(NVTSize, NVTSize - OverflowBits), dl, NVT); + + SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, HHLoMask, ISD::SETGT); + SatMin = DAG.getSetCC(dl, BoolNVT, ResultHH, HHHiMask, ISD::SETLT); + } + } else if (Scale == VTSize) { + assert( + !Signed && + "Only unsigned types can have a scale equal to the operand bit width"); + + Lo = ResultHL; + Hi = ResultHH; } else { - llvm_unreachable( - "Expected the scale to be less than the width of the operands"); + llvm_unreachable("Expected the scale to be less than or equal to the width " + "of the operands"); + } + + if (Saturating) { + APInt LHMax = APInt::getSignedMaxValue(NVTSize); + APInt LLMax = APInt::getAllOnesValue(NVTSize); + APInt LHMin = APInt::getSignedMinValue(NVTSize); + Hi = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(LHMax, dl, NVT), Hi); + Hi = DAG.getSelect(dl, NVT, SatMin, DAG.getConstant(LHMin, dl, NVT), Hi); + Lo = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(LLMax, dl, NVT), Lo); + Lo = DAG.getSelect(dl, NVT, SatMin, NVTZero, Lo); } } @@ -2765,11 +3060,15 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, } // Next check to see if the target supports this SHL_PARTS operation or if it - // will custom expand it. + // will custom expand it. Don't lower this to SHL_PARTS when we optimise for + // size, but create a libcall instead. EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); TargetLowering::LegalizeAction Action = TLI.getOperationAction(PartsOpc, NVT); - if ((Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) || - Action == TargetLowering::Custom) { + const bool LegalOrCustom = + (Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) || + Action == TargetLowering::Custom; + + if (LegalOrCustom && TLI.shouldExpandShift(DAG, N)) { // Expand the subcomponents. SDValue LHSL, LHSH; GetExpandedInteger(N->getOperand(0), LHSL, LHSH); @@ -3145,6 +3444,14 @@ void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N, ReplaceValueWith(SDValue(N, 1), Swap.getValue(2)); } +void DAGTypeLegalizer::ExpandIntRes_VECREDUCE(SDNode *N, + SDValue &Lo, SDValue &Hi) { + // TODO For VECREDUCE_(AND|OR|XOR) we could split the vector and calculate + // both halves independently. + SDValue Res = TLI.expandVecReduce(N, DAG); + SplitInteger(Res, Lo, Hi); +} + //===----------------------------------------------------------------------===// // Integer Operand Expansion //===----------------------------------------------------------------------===// @@ -3167,7 +3474,7 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { dbgs() << "ExpandIntegerOperand Op #" << OpNo << ": "; N->dump(&DAG); dbgs() << "\n"; #endif - llvm_unreachable("Do not know how to expand this operator's operand!"); + report_fatal_error("Do not know how to expand this operator's operand!"); case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break; case ISD::BR_CC: Res = ExpandIntOp_BR_CC(N); break; @@ -3632,8 +3939,6 @@ SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { - SDValue InOp0 = N->getOperand(0); - EVT InVT = InOp0.getValueType(); EVT OutVT = N->getValueType(0); EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); @@ -3644,6 +3949,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { SDLoc dl(N); SDValue BaseIdx = N->getOperand(1); + SDValue InOp0 = N->getOperand(0); + if (getTypeAction(InOp0.getValueType()) == TargetLowering::TypePromoteInteger) + InOp0 = GetPromotedInteger(N->getOperand(0)); + + EVT InVT = InOp0.getValueType(); + SmallVector<SDValue, 8> Ops; Ops.reserve(OutNumElems); for (unsigned i = 0; i != OutNumElems; ++i) { @@ -3654,7 +3965,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InVT.getVectorElementType(), N->getOperand(0), Index); - SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, Ext); + SDValue Op = DAG.getAnyExtOrTrunc(Ext, dl, NOutVTElem); // Insert the converted element to the new vector. Ops.push_back(Op); } @@ -3809,6 +4120,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) { V0, ConvElem, N->getOperand(2)); } +SDValue DAGTypeLegalizer::PromoteIntRes_VECREDUCE(SDNode *N) { + // The VECREDUCE result size may be larger than the element size, so + // we can simply change the result type. + SDLoc dl(N); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0)); +} + SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) { SDLoc dl(N); SDValue V0 = GetPromotedInteger(N->getOperand(0)); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index a9f144c06e9a..14fd5be23ccb 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -1,9 +1,8 @@ //===-- LegalizeTypes.cpp - Common code for DAG type legalizer ------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -708,6 +707,7 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { auto &OpIdEntry = PromotedIntegers[getTableId(Op)]; assert((OpIdEntry == 0) && "Node is already promoted!"); OpIdEntry = getTableId(Result); + Result->setFlags(Op->getFlags()); DAG.transferDbgValues(Op, Result); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 032000f6cb79..1d489b1b3a33 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -1,9 +1,8 @@ //===-- LegalizeTypes.h - DAG Type Legalizer class definition ---*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -345,8 +344,10 @@ private: SDValue PromoteIntRes_VAARG(SDNode *N); SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo); SDValue PromoteIntRes_ADDSUBSAT(SDNode *N); - SDValue PromoteIntRes_SMULFIX(SDNode *N); + SDValue PromoteIntRes_MULFIX(SDNode *N); SDValue PromoteIntRes_FLT_ROUNDS(SDNode *N); + SDValue PromoteIntRes_VECREDUCE(SDNode *N); + SDValue PromoteIntRes_ABS(SDNode *N); // Integer Operand Promotion. bool PromoteIntegerOperand(SDNode *N, unsigned OpNo); @@ -379,7 +380,9 @@ private: SDValue PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_FRAMERETURNADDR(SDNode *N); SDValue PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo); - SDValue PromoteIntOp_SMULFIX(SDNode *N); + SDValue PromoteIntOp_MULFIX(SDNode *N); + SDValue PromoteIntOp_FPOWI(SDNode *N); + SDValue PromoteIntOp_VECREDUCE(SDNode *N); void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code); @@ -402,6 +405,7 @@ private: void ExpandIntRes_AssertSext (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_Constant (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ABS (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_CTLZ (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_CTPOP (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_CTTZ (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -414,6 +418,8 @@ private: void ExpandIntRes_FLT_ROUNDS (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_FP_TO_SINT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_FP_TO_UINT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_LLROUND (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_LLRINT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_Logical (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -435,9 +441,10 @@ private: void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ADDSUBSAT (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandIntRes_SMULFIX (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_MULFIX (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ATOMIC_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_VECREDUCE (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandShiftByConstant(SDNode *N, const APInt &Amt, SDValue &Lo, SDValue &Hi); @@ -548,6 +555,10 @@ private: SDValue SoftenFloatOp_FP_EXTEND(SDNode *N); SDValue SoftenFloatOp_FP_ROUND(SDNode *N); SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N); + SDValue SoftenFloatOp_LROUND(SDNode *N); + SDValue SoftenFloatOp_LLROUND(SDNode *N); + SDValue SoftenFloatOp_LRINT(SDNode *N); + SDValue SoftenFloatOp_LLRINT(SDNode *N); SDValue SoftenFloatOp_SELECT(SDNode *N); SDValue SoftenFloatOp_SELECT_CC(SDNode *N); SDValue SoftenFloatOp_SETCC(SDNode *N); @@ -607,6 +618,10 @@ private: SDValue ExpandFloatOp_FP_ROUND(SDNode *N); SDValue ExpandFloatOp_FP_TO_SINT(SDNode *N); SDValue ExpandFloatOp_FP_TO_UINT(SDNode *N); + SDValue ExpandFloatOp_LROUND(SDNode *N); + SDValue ExpandFloatOp_LLROUND(SDNode *N); + SDValue ExpandFloatOp_LRINT(SDNode *N); + SDValue ExpandFloatOp_LLRINT(SDNode *N); SDValue ExpandFloatOp_SELECT_CC(SDNode *N); SDValue ExpandFloatOp_SETCC(SDNode *N); SDValue ExpandFloatOp_STORE(SDNode *N, unsigned OpNo); @@ -640,6 +655,7 @@ private: SDValue PromoteFloatRes_SELECT_CC(SDNode *N); SDValue PromoteFloatRes_UnaryOp(SDNode *N); SDValue PromoteFloatRes_UNDEF(SDNode *N); + SDValue BitcastToInt_ATOMIC_SWAP(SDNode *N); SDValue PromoteFloatRes_XINT_TO_FP(SDNode *N); bool PromoteFloatOperand(SDNode *N, unsigned OpNo); @@ -673,6 +689,7 @@ private: SDValue ScalarizeVecRes_TernaryOp(SDNode *N); SDValue ScalarizeVecRes_UnaryOp(SDNode *N); SDValue ScalarizeVecRes_StrictFPOp(SDNode *N); + SDValue ScalarizeVecRes_OverflowOp(SDNode *N, unsigned ResNo); SDValue ScalarizeVecRes_InregOp(SDNode *N); SDValue ScalarizeVecRes_VecInregOp(SDNode *N); @@ -680,6 +697,7 @@ private: SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N); SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N); SDValue ScalarizeVecRes_FP_ROUND(SDNode *N); + SDValue ScalarizeVecRes_STRICT_FP_ROUND(SDNode *N); SDValue ScalarizeVecRes_FPOWI(SDNode *N); SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); @@ -691,7 +709,7 @@ private: SDValue ScalarizeVecRes_UNDEF(SDNode *N); SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N); - SDValue ScalarizeVecRes_SMULFIX(SDNode *N); + SDValue ScalarizeVecRes_MULFIX(SDNode *N); // Vector Operand Scalarization: <1 x ty> -> ty. bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo); @@ -703,6 +721,8 @@ private: SDValue ScalarizeVecOp_VSETCC(SDNode *N); SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo); SDValue ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo); + SDValue ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N, unsigned OpNo); + SDValue ScalarizeVecOp_VECREDUCE(SDNode *N); //===--------------------------------------------------------------------===// // Vector Splitting Support: LegalizeVectorTypes.cpp @@ -727,8 +747,10 @@ private: void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_OverflowOp(SDNode *N, unsigned ResNo, + SDValue &Lo, SDValue &Hi); - void SplitVecRes_SMULFIX(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_MULFIX(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -745,6 +767,7 @@ private: void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi); // Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>. bool SplitVectorOperand(SDNode *N, unsigned OpNo); @@ -808,7 +831,9 @@ private: SDValue WidenVecRes_Binary(SDNode *N); SDValue WidenVecRes_BinaryCanTrap(SDNode *N); SDValue WidenVecRes_StrictFP(SDNode *N); + SDValue WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo); SDValue WidenVecRes_Convert(SDNode *N); + SDValue WidenVecRes_Convert_StrictFP(SDNode *N); SDValue WidenVecRes_FCOPYSIGN(SDNode *N); SDValue WidenVecRes_POWI(SDNode *N); SDValue WidenVecRes_Shift(SDNode *N); @@ -827,9 +852,16 @@ private: SDValue WidenVecOp_MGATHER(SDNode* N, unsigned OpNo); SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo); SDValue WidenVecOp_SETCC(SDNode* N); + SDValue WidenVecOp_VSELECT(SDNode *N); SDValue WidenVecOp_Convert(SDNode *N); SDValue WidenVecOp_FCOPYSIGN(SDNode *N); + SDValue WidenVecOp_VECREDUCE(SDNode *N); + + /// Helper function to generate a set of operations to perform + /// a vector operation for a wider type. + /// + SDValue UnrollVectorOp_StrictFP(SDNode *N, unsigned ResNE); //===--------------------------------------------------------------------===// // Vector Widening Utilities Support: LegalizeVectorTypes.cpp diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index b9d370441c3e..943f63f46c47 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -1,9 +1,8 @@ //===-------- LegalizeTypesGeneric.cpp - Generic type legalization --------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 4923a529c21b..10b8b705869e 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -1,9 +1,8 @@ //===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -141,7 +140,11 @@ class VectorLegalizer { SDValue ExpandFunnelShift(SDValue Op); SDValue ExpandROT(SDValue Op); SDValue ExpandFMINNUM_FMAXNUM(SDValue Op); + SDValue ExpandUADDSUBO(SDValue Op); + SDValue ExpandSADDSUBO(SDValue Op); + SDValue ExpandMULO(SDValue Op); SDValue ExpandAddSubSat(SDValue Op); + SDValue ExpandFixedPointMul(SDValue Op); SDValue ExpandStrictFPOp(SDValue Op); /// Implements vector promotion. @@ -263,7 +266,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { LLVM_FALLTHROUGH; case TargetLowering::Expand: Changed = true; - return LegalizeOp(ExpandLoad(Op)); + return ExpandLoad(Op); } } } else if (Op.getOpcode() == ISD::STORE) { @@ -288,17 +291,18 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { } case TargetLowering::Expand: Changed = true; - return LegalizeOp(ExpandStore(Op)); + return ExpandStore(Op); } } } - bool HasVectorValue = false; - for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end(); - J != E; - ++J) - HasVectorValue |= J->isVector(); - if (!HasVectorValue) + bool HasVectorValueOrOp = false; + for (auto J = Node->value_begin(), E = Node->value_end(); J != E; ++J) + HasVectorValueOrOp |= J->isVector(); + for (const SDValue &Op : Node->op_values()) + HasVectorValueOrOp |= Op.getValueType().isVector(); + + if (!HasVectorValueOrOp) return TranslateLegalizeResults(Op, Result); TargetLowering::LegalizeAction Action = TargetLowering::Legal; @@ -329,6 +333,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::STRICT_FFLOOR: case ISD::STRICT_FROUND: case ISD::STRICT_FTRUNC: + case ISD::STRICT_FP_ROUND: + case ISD::STRICT_FP_EXTEND: // These pseudo-ops get legalized as if they were their non-strict // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT // is also legal, but if ISD::FSQRT requires expansion then so does @@ -418,6 +424,12 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::UMAX: case ISD::SMUL_LOHI: case ISD::UMUL_LOHI: + case ISD::SADDO: + case ISD::UADDO: + case ISD::SSUBO: + case ISD::USUBO: + case ISD::SMULO: + case ISD::UMULO: case ISD::FCANONICALIZE: case ISD::SADDSAT: case ISD::UADDSAT: @@ -425,7 +437,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::USUBSAT: Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); break; - case ISD::SMULFIX: { + case ISD::SMULFIX: + case ISD::SMULFIXSAT: + case ISD::UMULFIX: { unsigned Scale = Node->getConstantOperandVal(2); Action = TLI.getFixedPointOperationAction(Node->getOpcode(), Node->getValueType(0), Scale); @@ -437,6 +451,19 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: + case ISD::VECREDUCE_ADD: + case ISD::VECREDUCE_MUL: + case ISD::VECREDUCE_AND: + case ISD::VECREDUCE_OR: + case ISD::VECREDUCE_XOR: + case ISD::VECREDUCE_SMAX: + case ISD::VECREDUCE_SMIN: + case ISD::VECREDUCE_UMAX: + case ISD::VECREDUCE_UMIN: + case ISD::VECREDUCE_FADD: + case ISD::VECREDUCE_FMUL: + case ISD::VECREDUCE_FMAX: + case ISD::VECREDUCE_FMIN: Action = TLI.getOperationAction(Node->getOpcode(), Node->getOperand(0).getValueType()); break; @@ -650,23 +677,21 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { LoadChains.push_back(ScalarLoad.getValue(1)); } - // Extract bits, pack and extend/trunc them into destination type. - unsigned SrcEltBits = SrcEltVT.getSizeInBits(); - SDValue SrcEltBitMask = DAG.getConstant((1U << SrcEltBits) - 1, dl, WideVT); - unsigned BitOffset = 0; unsigned WideIdx = 0; unsigned WideBits = WideVT.getSizeInBits(); + // Extract bits, pack and extend/trunc them into destination type. + unsigned SrcEltBits = SrcEltVT.getSizeInBits(); + SDValue SrcEltBitMask = DAG.getConstant( + APInt::getLowBitsSet(WideBits, SrcEltBits), dl, WideVT); + for (unsigned Idx = 0; Idx != NumElem; ++Idx) { - SDValue Lo, Hi, ShAmt; + assert(BitOffset < WideBits && "Unexpected offset!"); - if (BitOffset < WideBits) { - ShAmt = DAG.getConstant( - BitOffset, dl, TLI.getShiftAmountTy(WideVT, DAG.getDataLayout())); - Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt); - Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask); - } + SDValue ShAmt = DAG.getConstant( + BitOffset, dl, TLI.getShiftAmountTy(WideVT, DAG.getDataLayout())); + SDValue Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt); BitOffset += SrcEltBits; if (BitOffset >= WideBits) { @@ -676,13 +701,13 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { ShAmt = DAG.getConstant( SrcEltBits - BitOffset, dl, TLI.getShiftAmountTy(WideVT, DAG.getDataLayout())); - Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt); - Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask); + SDValue Hi = + DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt); + Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi); } } - if (Hi.getNode()) - Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi); + Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask); switch (ExtType) { default: llvm_unreachable("Unknown extended-load op!"); @@ -778,11 +803,23 @@ SDValue VectorLegalizer::Expand(SDValue Op) { case ISD::FMINNUM: case ISD::FMAXNUM: return ExpandFMINNUM_FMAXNUM(Op); + case ISD::UADDO: + case ISD::USUBO: + return ExpandUADDSUBO(Op); + case ISD::SADDO: + case ISD::SSUBO: + return ExpandSADDSUBO(Op); + case ISD::UMULO: + case ISD::SMULO: + return ExpandMULO(Op); case ISD::USUBSAT: case ISD::SSUBSAT: case ISD::UADDSAT: case ISD::SADDSAT: return ExpandAddSubSat(Op); + case ISD::SMULFIX: + case ISD::UMULFIX: + return ExpandFixedPointMul(Op); case ISD::STRICT_FADD: case ISD::STRICT_FSUB: case ISD::STRICT_FMUL: @@ -808,6 +845,20 @@ SDValue VectorLegalizer::Expand(SDValue Op) { case ISD::STRICT_FROUND: case ISD::STRICT_FTRUNC: return ExpandStrictFPOp(Op); + case ISD::VECREDUCE_ADD: + case ISD::VECREDUCE_MUL: + case ISD::VECREDUCE_AND: + case ISD::VECREDUCE_OR: + case ISD::VECREDUCE_XOR: + case ISD::VECREDUCE_SMAX: + case ISD::VECREDUCE_SMIN: + case ISD::VECREDUCE_UMAX: + case ISD::VECREDUCE_UMIN: + case ISD::VECREDUCE_FADD: + case ISD::VECREDUCE_FMUL: + case ISD::VECREDUCE_FMAX: + case ISD::VECREDUCE_FMIN: + return TLI.expandVecReduce(Op.getNode(), DAG); default: return DAG.UnrollVectorOp(Op.getNode()); } @@ -898,6 +949,19 @@ SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) { EVT SrcVT = Src.getValueType(); int NumSrcElements = SrcVT.getVectorNumElements(); + // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector + // into a larger vector type. + if (SrcVT.bitsLE(VT)) { + assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 && + "ANY_EXTEND_VECTOR_INREG vector size mismatch"); + NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits(); + SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), + NumSrcElements); + Src = DAG.getNode( + ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT), Src, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + // Build a base mask of undef shuffles. SmallVector<int, 16> ShuffleMask; ShuffleMask.resize(NumSrcElements, -1); @@ -945,6 +1009,19 @@ SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) { EVT SrcVT = Src.getValueType(); int NumSrcElements = SrcVT.getVectorNumElements(); + // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector + // into a larger vector type. + if (SrcVT.bitsLE(VT)) { + assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 && + "ZERO_EXTEND_VECTOR_INREG vector size mismatch"); + NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits(); + SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), + NumSrcElements); + Src = DAG.getNode( + ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT), Src, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + // Build up a zero vector to blend into this one. SDValue Zero = DAG.getConstant(0, DL, SrcVT); @@ -1212,12 +1289,58 @@ SDValue VectorLegalizer::ExpandFMINNUM_FMAXNUM(SDValue Op) { return DAG.UnrollVectorOp(Op.getNode()); } +SDValue VectorLegalizer::ExpandUADDSUBO(SDValue Op) { + SDValue Result, Overflow; + TLI.expandUADDSUBO(Op.getNode(), Result, Overflow, DAG); + + if (Op.getResNo() == 0) { + AddLegalizedOperand(Op.getValue(1), LegalizeOp(Overflow)); + return Result; + } else { + AddLegalizedOperand(Op.getValue(0), LegalizeOp(Result)); + return Overflow; + } +} + +SDValue VectorLegalizer::ExpandSADDSUBO(SDValue Op) { + SDValue Result, Overflow; + TLI.expandSADDSUBO(Op.getNode(), Result, Overflow, DAG); + + if (Op.getResNo() == 0) { + AddLegalizedOperand(Op.getValue(1), LegalizeOp(Overflow)); + return Result; + } else { + AddLegalizedOperand(Op.getValue(0), LegalizeOp(Result)); + return Overflow; + } +} + +SDValue VectorLegalizer::ExpandMULO(SDValue Op) { + SDValue Result, Overflow; + if (!TLI.expandMULO(Op.getNode(), Result, Overflow, DAG)) + std::tie(Result, Overflow) = DAG.UnrollVectorOverflowOp(Op.getNode()); + + if (Op.getResNo() == 0) { + AddLegalizedOperand(Op.getValue(1), LegalizeOp(Overflow)); + return Result; + } else { + AddLegalizedOperand(Op.getValue(0), LegalizeOp(Result)); + return Overflow; + } +} + SDValue VectorLegalizer::ExpandAddSubSat(SDValue Op) { if (SDValue Expanded = TLI.expandAddSubSat(Op.getNode(), DAG)) return Expanded; return DAG.UnrollVectorOp(Op.getNode()); } +SDValue VectorLegalizer::ExpandFixedPointMul(SDValue Op) { + if (SDValue Expanded = TLI.expandFixedPointMul(Op.getNode(), DAG)) + return Expanded; + return DAG.UnrollVectorOp(Op.getNode()); +} + SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) { EVT VT = Op.getValueType(); EVT EltVT = VT.getVectorElementType(); @@ -1245,7 +1368,7 @@ SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) { if (OperVT.isVector()) Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - EltVT, Oper, Idx); + OperVT.getVectorElementType(), Oper, Idx); Opers.push_back(Oper); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index f367e9358576..7e4d52617977 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1,9 +1,8 @@ //===------- LegalizeVectorTypes.cpp - Legalization of vector types -------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -51,6 +50,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break; case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break; case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break; + case ISD::STRICT_FP_ROUND: R = ScalarizeVecRes_STRICT_FP_ROUND(N); break; case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break; case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break; case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break; @@ -69,6 +69,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::ZERO_EXTEND_VECTOR_INREG: R = ScalarizeVecRes_VecInregOp(N); break; + case ISD::ABS: case ISD::ANY_EXTEND: case ISD::BITREVERSE: case ISD::BSWAP: @@ -170,10 +171,21 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::STRICT_FFLOOR: case ISD::STRICT_FROUND: case ISD::STRICT_FTRUNC: + case ISD::STRICT_FP_EXTEND: R = ScalarizeVecRes_StrictFPOp(N); break; + case ISD::UADDO: + case ISD::SADDO: + case ISD::USUBO: + case ISD::SSUBO: + case ISD::UMULO: + case ISD::SMULO: + R = ScalarizeVecRes_OverflowOp(N, ResNo); + break; case ISD::SMULFIX: - R = ScalarizeVecRes_SMULFIX(N); + case ISD::SMULFIXSAT: + case ISD::UMULFIX: + R = ScalarizeVecRes_MULFIX(N); break; } @@ -197,7 +209,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) { Op0.getValueType(), Op0, Op1, Op2); } -SDValue DAGTypeLegalizer::ScalarizeVecRes_SMULFIX(SDNode *N) { +SDValue DAGTypeLegalizer::ScalarizeVecRes_MULFIX(SDNode *N) { SDValue Op0 = GetScalarizedVector(N->getOperand(0)); SDValue Op1 = GetScalarizedVector(N->getOperand(1)); SDValue Op2 = N->getOperand(2); @@ -235,6 +247,43 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) { return Result; } +SDValue DAGTypeLegalizer::ScalarizeVecRes_OverflowOp(SDNode *N, + unsigned ResNo) { + SDLoc DL(N); + EVT ResVT = N->getValueType(0); + EVT OvVT = N->getValueType(1); + + SDValue ScalarLHS, ScalarRHS; + if (getTypeAction(ResVT) == TargetLowering::TypeScalarizeVector) { + ScalarLHS = GetScalarizedVector(N->getOperand(0)); + ScalarRHS = GetScalarizedVector(N->getOperand(1)); + } else { + SmallVector<SDValue, 1> ElemsLHS, ElemsRHS; + DAG.ExtractVectorElements(N->getOperand(0), ElemsLHS); + DAG.ExtractVectorElements(N->getOperand(1), ElemsRHS); + ScalarLHS = ElemsLHS[0]; + ScalarRHS = ElemsRHS[0]; + } + + SDVTList ScalarVTs = DAG.getVTList( + ResVT.getVectorElementType(), OvVT.getVectorElementType()); + SDNode *ScalarNode = DAG.getNode( + N->getOpcode(), DL, ScalarVTs, ScalarLHS, ScalarRHS).getNode(); + + // Replace the other vector result not being explicitly scalarized here. + unsigned OtherNo = 1 - ResNo; + EVT OtherVT = N->getValueType(OtherNo); + if (getTypeAction(OtherVT) == TargetLowering::TypeScalarizeVector) { + SetScalarizedVector(SDValue(N, OtherNo), SDValue(ScalarNode, OtherNo)); + } else { + SDValue OtherVal = DAG.getNode( + ISD::SCALAR_TO_VECTOR, DL, OtherVT, SDValue(ScalarNode, OtherNo)); + ReplaceValueWith(SDValue(N, OtherNo), OtherVal); + } + + return SDValue(ScalarNode, ResNo); +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo) { SDValue Op = DisintegrateMERGE_VALUES(N, ResNo); @@ -275,6 +324,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) { NewVT, Op, N->getOperand(1)); } +SDValue DAGTypeLegalizer::ScalarizeVecRes_STRICT_FP_ROUND(SDNode *N) { + EVT NewVT = N->getValueType(0).getVectorElementType(); + SDValue Op = GetScalarizedVector(N->getOperand(1)); + SDValue Res = DAG.getNode(ISD::STRICT_FP_ROUND, SDLoc(N), + { NewVT, MVT::Other }, + { N->getOperand(0), Op, N->getOperand(2) }); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) { SDValue Op = GetScalarizedVector(N->getOperand(0)); return DAG.getNode(ISD::FPOWI, SDLoc(N), @@ -558,9 +619,27 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { case ISD::STORE: Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo); break; + case ISD::STRICT_FP_ROUND: + Res = ScalarizeVecOp_STRICT_FP_ROUND(N, OpNo); + break; case ISD::FP_ROUND: Res = ScalarizeVecOp_FP_ROUND(N, OpNo); break; + case ISD::VECREDUCE_FADD: + case ISD::VECREDUCE_FMUL: + case ISD::VECREDUCE_ADD: + case ISD::VECREDUCE_MUL: + case ISD::VECREDUCE_AND: + case ISD::VECREDUCE_OR: + case ISD::VECREDUCE_XOR: + case ISD::VECREDUCE_SMAX: + case ISD::VECREDUCE_SMIN: + case ISD::VECREDUCE_UMAX: + case ISD::VECREDUCE_UMIN: + case ISD::VECREDUCE_FMAX: + case ISD::VECREDUCE_FMIN: + Res = ScalarizeVecOp_VECREDUCE(N); + break; } } @@ -691,6 +770,28 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) { return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res); } +SDValue DAGTypeLegalizer::ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N, + unsigned OpNo) { + assert(OpNo == 1 && "Wrong operand for scalarization!"); + SDValue Elt = GetScalarizedVector(N->getOperand(1)); + SDValue Res = DAG.getNode(ISD::STRICT_FP_ROUND, SDLoc(N), + { N->getValueType(0).getVectorElementType(), + MVT::Other }, + { N->getOperand(0), Elt, N->getOperand(2) }); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res); +} + +SDValue DAGTypeLegalizer::ScalarizeVecOp_VECREDUCE(SDNode *N) { + SDValue Res = GetScalarizedVector(N->getOperand(0)); + // Result type may be wider than element type. + if (Res.getValueType() != N->getValueType(0)) + Res = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), N->getValueType(0), Res); + return Res; +} + //===----------------------------------------------------------------------===// // Result Vector Splitting //===----------------------------------------------------------------------===// @@ -748,6 +849,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::VECTOR_SHUFFLE: SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi); break; + case ISD::VAARG: + SplitVecRes_VAARG(N, Lo, Hi); + break; case ISD::ANY_EXTEND_VECTOR_INREG: case ISD::SIGN_EXTEND_VECTOR_INREG: @@ -755,6 +859,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_ExtVecInRegOp(N, Lo, Hi); break; + case ISD::ABS: case ISD::BITREVERSE: case ISD::BSWAP: case ISD::CTLZ: @@ -774,7 +879,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FNEARBYINT: case ISD::FNEG: case ISD::FP_EXTEND: + case ISD::STRICT_FP_EXTEND: case ISD::FP_ROUND: + case ISD::STRICT_FP_ROUND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::FRINT: @@ -859,8 +966,18 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::STRICT_FTRUNC: SplitVecRes_StrictFPOp(N, Lo, Hi); break; + case ISD::UADDO: + case ISD::SADDO: + case ISD::USUBO: + case ISD::SSUBO: + case ISD::UMULO: + case ISD::SMULO: + SplitVecRes_OverflowOp(N, ResNo, Lo, Hi); + break; case ISD::SMULFIX: - SplitVecRes_SMULFIX(N, Lo, Hi); + case ISD::SMULFIXSAT: + case ISD::UMULFIX: + SplitVecRes_MULFIX(N, Lo, Hi); break; } @@ -899,8 +1016,7 @@ void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, Op0Hi, Op1Hi, Op2Hi); } -void DAGTypeLegalizer::SplitVecRes_SMULFIX(SDNode *N, SDValue &Lo, - SDValue &Hi) { +void DAGTypeLegalizer::SplitVecRes_MULFIX(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue LHSLo, LHSHi; GetSplitVector(N->getOperand(0), LHSLo, LHSHi); SDValue RHSLo, RHSHi; @@ -1205,6 +1321,104 @@ void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo, ReplaceValueWith(SDValue(N, 1), Chain); } +SDValue DAGTypeLegalizer::UnrollVectorOp_StrictFP(SDNode *N, unsigned ResNE) { + SDValue Chain = N->getOperand(0); + EVT VT = N->getValueType(0); + unsigned NE = VT.getVectorNumElements(); + EVT EltVT = VT.getVectorElementType(); + SDLoc dl(N); + + SmallVector<SDValue, 8> Scalars; + SmallVector<SDValue, 4> Operands(N->getNumOperands()); + + // If ResNE is 0, fully unroll the vector op. + if (ResNE == 0) + ResNE = NE; + else if (NE > ResNE) + NE = ResNE; + + //The results of each unrolled operation, including the chain. + EVT ChainVTs[] = {EltVT, MVT::Other}; + SmallVector<SDValue, 8> Chains; + + unsigned i; + for (i = 0; i != NE; ++i) { + Operands[0] = Chain; + for (unsigned j = 1, e = N->getNumOperands(); j != e; ++j) { + SDValue Operand = N->getOperand(j); + EVT OperandVT = Operand.getValueType(); + if (OperandVT.isVector()) { + EVT OperandEltVT = OperandVT.getVectorElementType(); + Operands[j] = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, Operand, + DAG.getConstant(i, dl, TLI.getVectorIdxTy( + DAG.getDataLayout()))); + } else { + Operands[j] = Operand; + } + } + SDValue Scalar = DAG.getNode(N->getOpcode(), dl, ChainVTs, Operands); + Scalar.getNode()->setFlags(N->getFlags()); + + //Add in the scalar as well as its chain value to the + //result vectors. + Scalars.push_back(Scalar); + Chains.push_back(Scalar.getValue(1)); + } + + for (; i < ResNE; ++i) + Scalars.push_back(DAG.getUNDEF(EltVT)); + + // Build a new factor node to connect the chain back together. + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); + ReplaceValueWith(SDValue(N, 1), Chain); + + // Create a new BUILD_VECTOR node + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, ResNE); + return DAG.getBuildVector(VecVT, dl, Scalars); +} + +void DAGTypeLegalizer::SplitVecRes_OverflowOp(SDNode *N, unsigned ResNo, + SDValue &Lo, SDValue &Hi) { + SDLoc dl(N); + EVT ResVT = N->getValueType(0); + EVT OvVT = N->getValueType(1); + EVT LoResVT, HiResVT, LoOvVT, HiOvVT; + std::tie(LoResVT, HiResVT) = DAG.GetSplitDestVTs(ResVT); + std::tie(LoOvVT, HiOvVT) = DAG.GetSplitDestVTs(OvVT); + + SDValue LoLHS, HiLHS, LoRHS, HiRHS; + if (getTypeAction(ResVT) == TargetLowering::TypeSplitVector) { + GetSplitVector(N->getOperand(0), LoLHS, HiLHS); + GetSplitVector(N->getOperand(1), LoRHS, HiRHS); + } else { + std::tie(LoLHS, HiLHS) = DAG.SplitVectorOperand(N, 0); + std::tie(LoRHS, HiRHS) = DAG.SplitVectorOperand(N, 1); + } + + unsigned Opcode = N->getOpcode(); + SDVTList LoVTs = DAG.getVTList(LoResVT, LoOvVT); + SDVTList HiVTs = DAG.getVTList(HiResVT, HiOvVT); + SDNode *LoNode = DAG.getNode(Opcode, dl, LoVTs, LoLHS, LoRHS).getNode(); + SDNode *HiNode = DAG.getNode(Opcode, dl, HiVTs, HiLHS, HiRHS).getNode(); + + Lo = SDValue(LoNode, ResNo); + Hi = SDValue(HiNode, ResNo); + + // Replace the other vector result not being explicitly split here. + unsigned OtherNo = 1 - ResNo; + EVT OtherVT = N->getValueType(OtherNo); + if (getTypeAction(OtherVT) == TargetLowering::TypeSplitVector) { + SetSplitVector(SDValue(N, OtherNo), + SDValue(LoNode, OtherNo), SDValue(HiNode, OtherNo)); + } else { + SDValue OtherVal = DAG.getNode( + ISD::CONCAT_VECTORS, dl, OtherVT, + SDValue(LoNode, OtherNo), SDValue(HiNode, OtherNo)); + ReplaceValueWith(SDValue(N, OtherNo), OtherVal); + } +} + void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Vec = N->getOperand(0); @@ -1344,12 +1558,6 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, unsigned Alignment = MLD->getOriginalAlignment(); ISD::LoadExtType ExtType = MLD->getExtensionType(); - // if Alignment is equal to the vector size, - // take the half of it for the second part - unsigned SecondHalfAlignment = - (Alignment == MLD->getValueType(0).getSizeInBits()/8) ? - Alignment/2 : Alignment; - // Split Mask operand SDValue MaskLo, MaskHi; if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) @@ -1381,7 +1589,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, MMO = DAG.getMachineFunction().getMachineMemOperand( MLD->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOLoad, - HiMemVT.getStoreSize(), SecondHalfAlignment, MLD->getAAInfo(), + HiMemVT.getStoreSize(), Alignment, MLD->getAAInfo(), MLD->getRanges()); Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, PassThruHi, HiMemVT, MMO, @@ -1496,15 +1704,34 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, // If the input also splits, handle it directly for a compile time speedup. // Otherwise split it by hand. - EVT InVT = N->getOperand(0).getValueType(); + unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; + EVT InVT = N->getOperand(OpNo).getValueType(); if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) - GetSplitVector(N->getOperand(0), Lo, Hi); + GetSplitVector(N->getOperand(OpNo), Lo, Hi); else - std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0); + std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, OpNo); if (N->getOpcode() == ISD::FP_ROUND) { Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1)); Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1)); + } else if (N->getOpcode() == ISD::STRICT_FP_ROUND) { + Lo = DAG.getNode(N->getOpcode(), dl, { LoVT, MVT::Other }, + { N->getOperand(0), Lo, N->getOperand(2) }); + Hi = DAG.getNode(N->getOpcode(), dl, { HiVT, MVT::Other }, + { N->getOperand(0), Hi, N->getOperand(2) }); + SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + Lo.getValue(1), Hi.getValue(1)); + ReplaceValueWith(SDValue(N, 1), NewChain); + } else if (N->isStrictFPOpcode()) { + Lo = DAG.getNode(N->getOpcode(), dl, { LoVT, MVT::Other }, + { N->getOperand(0), Lo }); + Hi = DAG.getNode(N->getOpcode(), dl, { HiVT, MVT::Other }, + { N->getOperand(0), Hi }); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + Lo.getValue(1), Hi.getValue(1)); + ReplaceValueWith(SDValue(N, 1), NewChain); } else { Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); @@ -1669,6 +1896,26 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, } } +void DAGTypeLegalizer::SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) { + EVT OVT = N->getValueType(0); + EVT NVT = OVT.getHalfNumVectorElementsVT(*DAG.getContext()); + SDValue Chain = N->getOperand(0); + SDValue Ptr = N->getOperand(1); + SDValue SV = N->getOperand(2); + SDLoc dl(N); + + const unsigned Alignment = DAG.getDataLayout().getABITypeAlignment( + NVT.getTypeForEVT(*DAG.getContext())); + + Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, SV, Alignment); + Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, SV, Alignment); + Chain = Hi.getValue(1); + + // Modified the chain - switch anything that used the old chain to use + // the new one. + ReplaceValueWith(SDValue(N, 1), Chain); +} + //===----------------------------------------------------------------------===// // Operand Vector Splitting @@ -1705,6 +1952,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::TRUNCATE: Res = SplitVecOp_TruncateHelper(N); break; + case ISD::STRICT_FP_ROUND: case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break; case ISD::FCOPYSIGN: Res = SplitVecOp_FCOPYSIGN(N); break; case ISD::STORE: @@ -1734,6 +1982,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::CTTZ: case ISD::CTLZ: case ISD::CTPOP: + case ISD::STRICT_FP_EXTEND: case ISD::FP_EXTEND: case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: @@ -1775,7 +2024,11 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { if (Res.getNode() == N) return true; - assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + if (N->isStrictFPOpcode()) + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 2 && + "Invalid operand expansion"); + else + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && "Invalid operand expansion"); ReplaceValueWith(SDValue(N, 0), Res); @@ -1863,14 +2116,30 @@ SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) { EVT ResVT = N->getValueType(0); SDValue Lo, Hi; SDLoc dl(N); - GetSplitVector(N->getOperand(0), Lo, Hi); + GetSplitVector(N->getOperand(N->isStrictFPOpcode() ? 1 : 0), Lo, Hi); EVT InVT = Lo.getValueType(); EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(), InVT.getVectorNumElements()); - Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo); - Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi); + if (N->isStrictFPOpcode()) { + Lo = DAG.getNode(N->getOpcode(), dl, { OutVT, MVT::Other }, + { N->getOperand(0), Lo }); + Hi = DAG.getNode(N->getOpcode(), dl, { OutVT, MVT::Other }, + { N->getOperand(0), Hi }); + + // Build a factor node to remember that this operation is independent + // of the other one. + SDValue Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Ch); + } else { + Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo); + Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi); + } return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); } @@ -1920,7 +2189,6 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { if (isa<ConstantSDNode>(Idx)) { uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); - assert(IdxVal < VecVT.getVectorNumElements() && "Invalid vector index!"); SDValue Lo, Hi; GetSplitVector(Vec, Lo, Hi); @@ -2079,12 +2347,6 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, else std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); - // if Alignment is equal to the vector size, - // take the half of it for the second part - unsigned SecondHalfAlignment = - (Alignment == Data->getValueType(0).getSizeInBits()/8) ? - Alignment/2 : Alignment; - SDValue Lo, Hi; MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(N->getPointerInfo(), @@ -2101,7 +2363,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, MMO = DAG.getMachineFunction().getMachineMemOperand( N->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOStore, - HiMemVT.getStoreSize(), SecondHalfAlignment, N->getAAInfo(), + HiMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges()); Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, @@ -2343,14 +2605,26 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) { EVT ResVT = N->getValueType(0); SDValue Lo, Hi; SDLoc DL(N); - GetSplitVector(N->getOperand(0), Lo, Hi); + GetSplitVector(N->getOperand(N->isStrictFPOpcode() ? 1 : 0), Lo, Hi); EVT InVT = Lo.getValueType(); EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(), InVT.getVectorNumElements()); - Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1)); - Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1)); + if (N->isStrictFPOpcode()) { + Lo = DAG.getNode(N->getOpcode(), DL, { OutVT, MVT::Other }, + { N->getOperand(0), Lo, N->getOperand(2) }); + Hi = DAG.getNode(N->getOpcode(), DL, { OutVT, MVT::Other }, + { N->getOperand(0), Hi, N->getOperand(2) }); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + Lo.getValue(1), Hi.getValue(1)); + ReplaceValueWith(SDValue(N, 1), NewChain); + } else { + Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1)); + Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1)); + } return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi); } @@ -2472,6 +2746,15 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_StrictFP(N); break; + case ISD::UADDO: + case ISD::SADDO: + case ISD::USUBO: + case ISD::SSUBO: + case ISD::UMULO: + case ISD::SMULO: + Res = WidenVecRes_OverflowOp(N, ResNo); + break; + case ISD::FCOPYSIGN: Res = WidenVecRes_FCOPYSIGN(N); break; @@ -2505,6 +2788,11 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_Convert(N); break; + case ISD::STRICT_FP_EXTEND: + case ISD::STRICT_FP_ROUND: + Res = WidenVecRes_Convert_StrictFP(N); + break; + case ISD::FABS: case ISD::FCEIL: case ISD::FCOS: @@ -2523,13 +2811,11 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { // We're going to widen this vector op to a legal type by padding with undef // elements. If the wide vector op is eventually going to be expanded to // scalar libcalls, then unroll into scalar ops now to avoid unnecessary - // libcalls on the undef elements. We are assuming that if the scalar op - // requires expanding, then the vector op needs expanding too. + // libcalls on the undef elements. EVT VT = N->getValueType(0); - if (TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) { - EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); - assert(!TLI.isOperationLegalOrCustom(N->getOpcode(), WideVecVT) && - "Target supports vector op, but scalar requires expansion?"); + EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + if (!TLI.isOperationLegalOrCustom(N->getOpcode(), WideVecVT) && + TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) { Res = DAG.UnrollVectorOp(N, WideVecVT.getVectorNumElements()); break; } @@ -2539,11 +2825,14 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { // any other unary ops. LLVM_FALLTHROUGH; + case ISD::ABS: case ISD::BITREVERSE: case ISD::BSWAP: case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: case ISD::CTPOP: case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: case ISD::FNEG: case ISD::FCANONICALIZE: Res = WidenVecRes_Unary(N); @@ -2593,14 +2882,13 @@ static SDValue CollectOpsToWiden(SelectionDAG &DAG, const TargetLowering &TLI, SDLoc dl(ConcatOps[0]); EVT WidenEltVT = WidenVT.getVectorElementType(); - int Idx = 0; // while (Some element of ConcatOps is not of type MaxVT) { // From the end of ConcatOps, collect elements of the same type and put // them into an op of the next larger supported type // } while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) { - Idx = ConcatEnd - 1; + int Idx = ConcatEnd - 1; VT = ConcatOps[Idx--].getValueType(); while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT) Idx--; @@ -2750,7 +3038,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) { // No legal vector version so unroll the vector operation and then widen. if (NumElts == 1) - return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); + return UnrollVectorOp_StrictFP(N, WidenVT.getVectorNumElements()); // Since the operation can trap, apply operation on the original vector. EVT MaxVT = VT; @@ -2846,6 +3134,58 @@ SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) { return CollectOpsToWiden(DAG, TLI, ConcatOps, ConcatEnd, VT, MaxVT, WidenVT); } +SDValue DAGTypeLegalizer::WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo) { + SDLoc DL(N); + EVT ResVT = N->getValueType(0); + EVT OvVT = N->getValueType(1); + EVT WideResVT, WideOvVT; + SDValue WideLHS, WideRHS; + + // TODO: This might result in a widen/split loop. + if (ResNo == 0) { + WideResVT = TLI.getTypeToTransformTo(*DAG.getContext(), ResVT); + WideOvVT = EVT::getVectorVT( + *DAG.getContext(), OvVT.getVectorElementType(), + WideResVT.getVectorNumElements()); + + WideLHS = GetWidenedVector(N->getOperand(0)); + WideRHS = GetWidenedVector(N->getOperand(1)); + } else { + WideOvVT = TLI.getTypeToTransformTo(*DAG.getContext(), OvVT); + WideResVT = EVT::getVectorVT( + *DAG.getContext(), ResVT.getVectorElementType(), + WideOvVT.getVectorNumElements()); + + SDValue Zero = DAG.getConstant( + 0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())); + WideLHS = DAG.getNode( + ISD::INSERT_SUBVECTOR, DL, WideResVT, DAG.getUNDEF(WideResVT), + N->getOperand(0), Zero); + WideRHS = DAG.getNode( + ISD::INSERT_SUBVECTOR, DL, WideResVT, DAG.getUNDEF(WideResVT), + N->getOperand(1), Zero); + } + + SDVTList WideVTs = DAG.getVTList(WideResVT, WideOvVT); + SDNode *WideNode = DAG.getNode( + N->getOpcode(), DL, WideVTs, WideLHS, WideRHS).getNode(); + + // Replace the other vector result not being explicitly widened here. + unsigned OtherNo = 1 - ResNo; + EVT OtherVT = N->getValueType(OtherNo); + if (getTypeAction(OtherVT) == TargetLowering::TypeWidenVector) { + SetWidenedVector(SDValue(N, OtherNo), SDValue(WideNode, OtherNo)); + } else { + SDValue Zero = DAG.getConstant( + 0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())); + SDValue OtherVal = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, DL, OtherVT, SDValue(WideNode, OtherNo), Zero); + ReplaceValueWith(SDValue(N, OtherNo), OtherVal); + } + + return SDValue(WideNode, ResNo); +} + SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { SDValue InOp = N->getOperand(0); SDLoc DL(N); @@ -2929,6 +3269,43 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { return DAG.getBuildVector(WidenVT, DL, Ops); } +SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) { + SDValue InOp = N->getOperand(1); + SDLoc DL(N); + SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end()); + + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + SmallVector<EVT, 2> WidenVTs = { WidenVT, MVT::Other }; + + EVT InVT = InOp.getValueType(); + EVT InEltVT = InVT.getVectorElementType(); + + unsigned Opcode = N->getOpcode(); + + // FIXME: Optimizations need to be implemented here. + + // Otherwise unroll into some nasty scalar code and rebuild the vector. + EVT EltVT = WidenVT.getVectorElementType(); + SmallVector<EVT, 2> EltVTs = { EltVT, MVT::Other }; + SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT)); + SmallVector<SDValue, 32> OpChains; + // Use the original element count so we don't do more scalar opts than + // necessary. + unsigned MinElts = N->getValueType(0).getVectorNumElements(); + for (unsigned i=0; i < MinElts; ++i) { + NewOps[1] = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp, + DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + Ops[i] = DAG.getNode(Opcode, DL, EltVTs, NewOps); + OpChains.push_back(Ops[i].getValue(1)); + } + SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OpChains); + ReplaceValueWith(SDValue(N, 1), NewChain); + + return DAG.getBuildVector(WidenVT, DL, Ops); +} + SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) { unsigned Opcode = N->getOpcode(); SDValue InOp = N->getOperand(0); @@ -3654,8 +4031,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) { return Res; } - InOp1 = GetWidenedVector(InOp1); - SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + // If the inputs also widen, handle them directly. Otherwise widen by hand. + SDValue InOp2 = N->getOperand(1); + if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { + InOp1 = GetWidenedVector(InOp1); + InOp2 = GetWidenedVector(InOp2); + } else { + InOp1 = DAG.WidenVector(InOp1, SDLoc(N)); + InOp2 = DAG.WidenVector(InOp2, SDLoc(N)); + } // Assume that the input and output will be widen appropriately. If not, // we will have to unroll it at some point. @@ -3698,6 +4082,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::MGATHER: Res = WidenVecOp_MGATHER(N, OpNo); break; case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break; case ISD::SETCC: Res = WidenVecOp_SETCC(N); break; + case ISD::VSELECT: Res = WidenVecOp_VSELECT(N); break; case ISD::FCOPYSIGN: Res = WidenVecOp_FCOPYSIGN(N); break; case ISD::ANY_EXTEND: @@ -3707,6 +4092,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { break; case ISD::FP_EXTEND: + case ISD::STRICT_FP_EXTEND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::SINT_TO_FP: @@ -3714,6 +4100,22 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::TRUNCATE: Res = WidenVecOp_Convert(N); break; + + case ISD::VECREDUCE_FADD: + case ISD::VECREDUCE_FMUL: + case ISD::VECREDUCE_ADD: + case ISD::VECREDUCE_MUL: + case ISD::VECREDUCE_AND: + case ISD::VECREDUCE_OR: + case ISD::VECREDUCE_XOR: + case ISD::VECREDUCE_SMAX: + case ISD::VECREDUCE_SMIN: + case ISD::VECREDUCE_UMAX: + case ISD::VECREDUCE_UMIN: + case ISD::VECREDUCE_FMAX: + case ISD::VECREDUCE_FMIN: + Res = WidenVecOp_VECREDUCE(N); + break; } // If Res is null, the sub-method took care of registering the result. @@ -3725,8 +4127,12 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { return true; - assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && - "Invalid operand expansion"); + if (N->isStrictFPOpcode()) + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 2 && + "Invalid operand expansion"); + else + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); ReplaceValueWith(SDValue(N, 0), Res); return false; @@ -3806,7 +4212,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { EVT EltVT = VT.getVectorElementType(); SDLoc dl(N); unsigned NumElts = VT.getVectorNumElements(); - SDValue InOp = N->getOperand(0); + SDValue InOp = N->getOperand(N->isStrictFPOpcode() ? 1 : 0); assert(getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector && "Unexpected type action"); @@ -3815,10 +4221,19 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { unsigned Opcode = N->getOpcode(); // See if a widened result type would be legal, if so widen the node. + // FIXME: This isn't safe for StrictFP. Other optimization here is needed. EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, InVT.getVectorNumElements()); - if (TLI.isTypeLegal(WideVT)) { - SDValue Res = DAG.getNode(Opcode, dl, WideVT, InOp); + if (TLI.isTypeLegal(WideVT) && !N->isStrictFPOpcode()) { + SDValue Res; + if (N->isStrictFPOpcode()) { + Res = DAG.getNode(Opcode, dl, { WideVT, MVT::Other }, + { N->getOperand(0), InOp }); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + } else + Res = DAG.getNode(Opcode, dl, WideVT, InOp); return DAG.getNode( ISD::EXTRACT_SUBVECTOR, dl, VT, Res, DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); @@ -3828,12 +4243,26 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { // Unroll the convert into some scalar code and create a nasty build vector. SmallVector<SDValue, 16> Ops(NumElts); - for (unsigned i=0; i < NumElts; ++i) - Ops[i] = DAG.getNode( - Opcode, dl, EltVT, - DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, - DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())))); + if (N->isStrictFPOpcode()) { + SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end()); + SmallVector<SDValue, 32> OpChains; + for (unsigned i=0; i < NumElts; ++i) { + NewOps[1] = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, + DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + Ops[i] = DAG.getNode(Opcode, dl, { EltVT, MVT::Other }, NewOps); + OpChains.push_back(Ops[i].getValue(1)); + } + SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains); + ReplaceValueWith(SDValue(N, 1), NewChain); + } else { + for (unsigned i = 0; i < NumElts; ++i) + Ops[i] = DAG.getNode( + Opcode, dl, EltVT, + DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, + DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())))); + } return DAG.getBuildVector(VT, dl, Ops); } @@ -3859,6 +4288,24 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) { } } + // Handle a case like bitcast v12i8 -> v3i32. Normally that would get widened + // to v16i8 -> v4i32, but for a target where v3i32 is legal but v12i8 is not, + // we end up here. Handling the case here with EXTRACT_SUBVECTOR avoids + // having to copy via memory. + if (VT.isVector()) { + EVT EltVT = VT.getVectorElementType(); + unsigned EltSize = EltVT.getSizeInBits(); + if (InWidenSize % EltSize == 0) { + unsigned NewNumElts = InWidenSize / EltSize; + EVT NewVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NewNumElts); + if (TLI.isTypeLegal(NewVT)) { + SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, BitOp, + DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + } + } + return CreateStackStoreLoad(InOp, VT); } @@ -4000,10 +4447,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) { SDValue Index = MSC->getIndex(); SDValue Scale = MSC->getScale(); - unsigned NumElts; if (OpNo == 1) { DataOp = GetWidenedVector(DataOp); - NumElts = DataOp.getValueType().getVectorNumElements(); + unsigned NumElts = DataOp.getValueType().getVectorNumElements(); // Widen index. EVT IndexVT = Index.getValueType(); @@ -4041,8 +4487,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { // Get a new SETCC node to compare the newly widened operands. // Only some of the compared elements are legal. - EVT SVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), - InOp0.getValueType()); + EVT SVT = getSetCCResultType(InOp0.getValueType()); // The result type is legal, if its vXi1, keep vXi1 for the new SETCC. if (VT.getScalarType() == MVT::i1) SVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, @@ -4062,6 +4507,80 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { return PromoteTargetBoolean(CC, VT); } +SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) { + SDLoc dl(N); + SDValue Op = GetWidenedVector(N->getOperand(0)); + EVT OrigVT = N->getOperand(0).getValueType(); + EVT WideVT = Op.getValueType(); + EVT ElemVT = OrigVT.getVectorElementType(); + + SDValue NeutralElem; + switch (N->getOpcode()) { + case ISD::VECREDUCE_ADD: + case ISD::VECREDUCE_OR: + case ISD::VECREDUCE_XOR: + case ISD::VECREDUCE_UMAX: + NeutralElem = DAG.getConstant(0, dl, ElemVT); + break; + case ISD::VECREDUCE_MUL: + NeutralElem = DAG.getConstant(1, dl, ElemVT); + break; + case ISD::VECREDUCE_AND: + case ISD::VECREDUCE_UMIN: + NeutralElem = DAG.getAllOnesConstant(dl, ElemVT); + break; + case ISD::VECREDUCE_SMAX: + NeutralElem = DAG.getConstant( + APInt::getSignedMinValue(ElemVT.getSizeInBits()), dl, ElemVT); + break; + case ISD::VECREDUCE_SMIN: + NeutralElem = DAG.getConstant( + APInt::getSignedMaxValue(ElemVT.getSizeInBits()), dl, ElemVT); + break; + case ISD::VECREDUCE_FADD: + NeutralElem = DAG.getConstantFP(0.0, dl, ElemVT); + break; + case ISD::VECREDUCE_FMUL: + NeutralElem = DAG.getConstantFP(1.0, dl, ElemVT); + break; + case ISD::VECREDUCE_FMAX: + NeutralElem = DAG.getConstantFP( + std::numeric_limits<double>::infinity(), dl, ElemVT); + break; + case ISD::VECREDUCE_FMIN: + NeutralElem = DAG.getConstantFP( + -std::numeric_limits<double>::infinity(), dl, ElemVT); + break; + } + + // Pad the vector with the neutral element. + unsigned OrigElts = OrigVT.getVectorNumElements(); + unsigned WideElts = WideVT.getVectorNumElements(); + for (unsigned Idx = OrigElts; Idx < WideElts; Idx++) + Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, WideVT, Op, NeutralElem, + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + + return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), Op, N->getFlags()); +} + +SDValue DAGTypeLegalizer::WidenVecOp_VSELECT(SDNode *N) { + // This only gets called in the case that the left and right inputs and + // result are of a legal odd vector type, and the condition is illegal i1 of + // the same odd width that needs widening. + EVT VT = N->getValueType(0); + assert(VT.isVector() && !VT.isPow2VectorType() && isTypeLegal(VT)); + + SDValue Cond = GetWidenedVector(N->getOperand(0)); + SDValue LeftIn = DAG.WidenVector(N->getOperand(1), SDLoc(N)); + SDValue RightIn = DAG.WidenVector(N->getOperand(2), SDLoc(N)); + SDLoc DL(N); + + SDValue Select = DAG.getNode(N->getOpcode(), DL, LeftIn.getValueType(), Cond, + LeftIn, RightIn); + return DAG.getNode( + ISD::EXTRACT_SUBVECTOR, DL, VT, Select, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); +} //===----------------------------------------------------------------------===// // Vector Widening Utilities @@ -4102,6 +4621,8 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, isPowerOf2_32(WidenWidth / MemVTWidth) && (MemVTWidth <= Width || (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { + if (MemVTWidth == WidenWidth) + return MemVT; RetVT = MemVT; break; } @@ -4113,7 +4634,10 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) { EVT MemVT = (MVT::SimpleValueType) VT; unsigned MemVTWidth = MemVT.getSizeInBits(); - if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() && + auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT); + if ((Action == TargetLowering::TypeLegal || + Action == TargetLowering::TypePromoteInteger) && + WidenEltVT == MemVT.getVectorElementType() && (WidenWidth % MemVTWidth) == 0 && isPowerOf2_32(WidenWidth / MemVTWidth) && (MemVTWidth <= Width || diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index 7f369c746d24..34660e3a48ec 100644 --- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -1,9 +1,8 @@ //===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -85,6 +84,7 @@ ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) { case ISD::CopyFromReg: NumberDeps++; break; case ISD::CopyToReg: break; case ISD::INLINEASM: break; + case ISD::INLINEASM_BR: break; } if (!ScegN->isMachineOpcode()) continue; @@ -121,6 +121,7 @@ unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU, case ISD::CopyFromReg: break; case ISD::CopyToReg: NumberDeps++; break; case ISD::INLINEASM: break; + case ISD::INLINEASM_BR: break; } if (!ScegN->isMachineOpcode()) continue; @@ -446,6 +447,7 @@ int ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) { break; case ISD::INLINEASM: + case ISD::INLINEASM_BR: ResCount += PriorityThree; break; } @@ -548,6 +550,7 @@ void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) { NodeNumDefs++; break; case ISD::INLINEASM: + case ISD::INLINEASM_BR: NodeNumDefs++; break; } diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h index f7566b246f32..65b9d017fc5c 100644 --- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h +++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h @@ -1,9 +1,8 @@ //===-- llvm/CodeGen/SDNodeDbgValue.h - SelectionDAG dbg_value --*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -136,7 +135,8 @@ public: /// dbg.addr is emitted twice. void clearIsEmitted() { Emitted = false; } - LLVM_DUMP_METHOD void dump(raw_ostream &OS) const; + LLVM_DUMP_METHOD void dump() const; + LLVM_DUMP_METHOD void print(raw_ostream &OS) const; }; /// Holds the information from a dbg_label node through SDISel. diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 90e109b022fd..2cb850fa1a3d 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -1,9 +1,8 @@ //===----- ScheduleDAGFast.cpp - Fast poor list scheduler -----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -480,7 +479,8 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, } for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) { - if (Node->getOpcode() == ISD::INLINEASM) { + if (Node->getOpcode() == ISD::INLINEASM || + Node->getOpcode() == ISD::INLINEASM_BR) { // Inline asm can clobber physical defs. unsigned NumOps = Node->getNumOperands(); if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 8d75b8133a30..34b4c8502353 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -1,9 +1,8 @@ //===- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler ------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -220,6 +219,14 @@ public: return Topo.WillCreateCycle(SU, TargetSU); } + /// AddPredQueued - Queues and update to add a predecessor edge to SUnit SU. + /// This returns true if this is a new predecessor. + /// Does *NOT* update the topological ordering! It just queues an update. + void AddPredQueued(SUnit *SU, const SDep &D) { + Topo.AddPredQueued(SU, D.getSUnit()); + SU->addPred(D); + } + /// AddPred - adds a predecessor edge to SUnit SU. /// This returns true if this is a new predecessor. /// Updates the topological ordering if required. @@ -267,24 +274,22 @@ private: void ListScheduleBottomUp(); /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. - /// Updates the topological ordering if required. SUnit *CreateNewSUnit(SDNode *N) { unsigned NumSUnits = SUnits.size(); SUnit *NewNode = newSUnit(N); // Update the topological ordering. if (NewNode->NodeNum >= NumSUnits) - Topo.InitDAGTopologicalSorting(); + Topo.MarkDirty(); return NewNode; } /// CreateClone - Creates a new SUnit from an existing one. - /// Updates the topological ordering if required. SUnit *CreateClone(SUnit *N) { unsigned NumSUnits = SUnits.size(); SUnit *NewNode = Clone(N); // Update the topological ordering. if (NewNode->NodeNum >= NumSUnits) - Topo.InitDAGTopologicalSorting(); + Topo.MarkDirty(); return NewNode; } @@ -366,7 +371,7 @@ void ScheduleDAGRRList::Schedule() { BuildSchedGraph(nullptr); LLVM_DEBUG(dump()); - Topo.InitDAGTopologicalSorting(); + Topo.MarkDirty(); AvailableQueue->initNodes(SUnits); @@ -709,6 +714,7 @@ void ScheduleDAGRRList::EmitNode(SUnit *SU) { // removed. return; case ISD::INLINEASM: + case ISD::INLINEASM_BR: // For inline asm, clear the pipeline state. HazardRec->Reset(); return; @@ -1017,8 +1023,9 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) { NewSU = &SUnits[N->getNodeId()]; // If NewSU has already been scheduled, we need to clone it, but this // negates the benefit to unfolding so just return SU. - if (NewSU->isScheduled) + if (NewSU->isScheduled) { return SU; + } isNewN = false; } else { NewSU = CreateNewSUnit(N); @@ -1071,23 +1078,23 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) { for (const SDep &Pred : ChainPreds) { RemovePred(SU, Pred); if (isNewLoad) - AddPred(LoadSU, Pred); + AddPredQueued(LoadSU, Pred); } for (const SDep &Pred : LoadPreds) { RemovePred(SU, Pred); if (isNewLoad) - AddPred(LoadSU, Pred); + AddPredQueued(LoadSU, Pred); } for (const SDep &Pred : NodePreds) { RemovePred(SU, Pred); - AddPred(NewSU, Pred); + AddPredQueued(NewSU, Pred); } for (SDep D : NodeSuccs) { SUnit *SuccDep = D.getSUnit(); D.setSUnit(SU); RemovePred(SuccDep, D); D.setSUnit(NewSU); - AddPred(SuccDep, D); + AddPredQueued(SuccDep, D); // Balance register pressure. if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled && !D.isCtrl() && NewSU->NumRegDefsLeft > 0) @@ -1099,7 +1106,7 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) { RemovePred(SuccDep, D); if (isNewLoad) { D.setSUnit(LoadSU); - AddPred(SuccDep, D); + AddPredQueued(SuccDep, D); } } @@ -1107,7 +1114,7 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) { // by LoadSU. SDep D(LoadSU, SDep::Data, 0); D.setLatency(LoadSU->Latency); - AddPred(NewSU, D); + AddPredQueued(NewSU, D); if (isNewLoad) AvailableQueue->addNode(LoadSU); @@ -1179,7 +1186,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { // New SUnit has the exact same predecessors. for (SDep &Pred : SU->Preds) if (!Pred.isArtificial()) - AddPred(NewSU, Pred); + AddPredQueued(NewSU, Pred); // Only copy scheduled successors. Cut them from old node's successor // list and move them over. @@ -1191,7 +1198,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { if (SuccSU->isScheduled) { SDep D = Succ; D.setSUnit(NewSU); - AddPred(SuccSU, D); + AddPredQueued(SuccSU, D); D.setSUnit(SU); DelDeps.push_back(std::make_pair(SuccSU, D)); } @@ -1230,14 +1237,14 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, if (SuccSU->isScheduled) { SDep D = Succ; D.setSUnit(CopyToSU); - AddPred(SuccSU, D); + AddPredQueued(SuccSU, D); DelDeps.push_back(std::make_pair(SuccSU, Succ)); } else { // Avoid scheduling the def-side copy before other successors. Otherwise // we could introduce another physreg interference on the copy and // continue inserting copies indefinitely. - AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial)); + AddPredQueued(SuccSU, SDep(CopyFromSU, SDep::Artificial)); } } for (auto &DelDep : DelDeps) @@ -1245,10 +1252,10 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, SDep FromDep(SU, SDep::Data, Reg); FromDep.setLatency(SU->Latency); - AddPred(CopyFromSU, FromDep); + AddPredQueued(CopyFromSU, FromDep); SDep ToDep(CopyFromSU, SDep::Data, 0); ToDep.setLatency(CopyFromSU->Latency); - AddPred(CopyToSU, ToDep); + AddPredQueued(CopyToSU, ToDep); AvailableQueue->updateNode(SU); AvailableQueue->addNode(CopyFromSU); @@ -1348,7 +1355,8 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) { } for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) { - if (Node->getOpcode() == ISD::INLINEASM) { + if (Node->getOpcode() == ISD::INLINEASM || + Node->getOpcode() == ISD::INLINEASM_BR) { // Inline asm can clobber physical defs. unsigned NumOps = Node->getNumOperands(); if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) @@ -1477,6 +1485,11 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { if (CurSU) return CurSU; + // We query the topological order in the loop body, so make sure outstanding + // updates are applied before entering it (we only enter the loop if there + // are some interferences). If we make changes to the ordering, we exit + // the loop. + // All candidates are delayed due to live physical reg dependencies. // Try backtracking, code duplication, or inserting cross class copies // to resolve it. @@ -1506,7 +1519,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { } LLVM_DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum << ") to SU(" << TrySU->NodeNum << ")\n"); - AddPred(TrySU, SDep(BtSU, SDep::Artificial)); + AddPredQueued(TrySU, SDep(BtSU, SDep::Artificial)); // If one or more successors has been unscheduled, then the current // node is no longer available. @@ -1560,14 +1573,14 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum << " to SU #" << Copies.front()->NodeNum << "\n"); - AddPred(TrySU, SDep(Copies.front(), SDep::Artificial)); + AddPredQueued(TrySU, SDep(Copies.front(), SDep::Artificial)); NewDef = Copies.back(); } LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum << " to SU #" << TrySU->NodeNum << "\n"); LiveRegDefs[Reg] = NewDef; - AddPred(NewDef, SDep(TrySU, SDep::Artificial)); + AddPredQueued(NewDef, SDep(TrySU, SDep::Artificial)); TrySU->isAvailable = false; CurSU = NewDef; } @@ -2939,6 +2952,29 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { (cast<RegisterSDNode>(N->getOperand(1))->getReg())) continue; + SDNode *PredFrameSetup = nullptr; + for (const SDep &Pred : SU.Preds) + if (Pred.isCtrl() && Pred.getSUnit()) { + // Find the predecessor which is not data dependence. + SDNode *PredND = Pred.getSUnit()->getNode(); + + // If PredND is FrameSetup, we should not pre-scheduled the node, + // or else, when bottom up scheduling, ADJCALLSTACKDOWN and + // ADJCALLSTACKUP may hold CallResource too long and make other + // calls can't be scheduled. If there's no other available node + // to schedule, the schedular will try to rename the register by + // creating copy to avoid the conflict which will fail because + // CallResource is not a real physical register. + if (PredND && PredND->isMachineOpcode() && + (PredND->getMachineOpcode() == TII->getCallFrameSetupOpcode())) { + PredFrameSetup = PredND; + break; + } + } + // Skip the node has FrameSetup parent. + if (PredFrameSetup != nullptr) + continue; + // Locate the single data predecessor. SUnit *PredSU = nullptr; for (const SDep &Pred : SU.Preds) @@ -2993,9 +3029,9 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { if (SuccSU != &SU) { Edge.setSUnit(PredSU); scheduleDAG->RemovePred(SuccSU, Edge); - scheduleDAG->AddPred(&SU, Edge); + scheduleDAG->AddPredQueued(&SU, Edge); Edge.setSUnit(&SU); - scheduleDAG->AddPred(SuccSU, Edge); + scheduleDAG->AddPredQueued(SuccSU, Edge); --i; } } @@ -3077,7 +3113,7 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() { LLVM_DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #" << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); - scheduleDAG->AddPred(&SU, SDep(SuccSU, SDep::Artificial)); + scheduleDAG->AddPredQueued(&SU, SDep(SuccSU, SDep::Artificial)); } } } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index e258f0a218a5..568c6191e512 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -1,9 +1,8 @@ //===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -206,6 +205,19 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { if (!Chain) return; + // Skip any load instruction that has a tied input. There may be an additional + // dependency requiring a different order than by increasing offsets, and the + // added glue may introduce a cycle. + auto hasTiedInput = [this](const SDNode *N) { + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + for (unsigned I = 0; I != MCID.getNumOperands(); ++I) { + if (MCID.getOperandConstraint(I, MCOI::TIED_TO) != -1) + return true; + } + + return false; + }; + // Look for other loads of the same chain. Find loads that are loading from // the same base pointer and different offsets. SmallPtrSet<SDNode*, 16> Visited; @@ -213,6 +225,10 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { DenseMap<long long, SDNode*> O2SMap; // Map from offset to SDNode. bool Cluster = false; SDNode *Base = Node; + + if (hasTiedInput(Base)) + return; + // This algorithm requires a reasonably low use count before finding a match // to avoid uselessly blowing up compile time in large blocks. unsigned UseCount = 0; @@ -223,10 +239,12 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { continue; int64_t Offset1, Offset2; if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) || - Offset1 == Offset2) + Offset1 == Offset2 || + hasTiedInput(User)) { // FIXME: Should be ok if they addresses are identical. But earlier // optimizations really should have eliminated one of the loads. continue; + } if (O2SMap.insert(std::make_pair(Offset1, Base)).second) Offsets.push_back(Offset1); O2SMap.insert(std::make_pair(Offset2, User)); @@ -741,28 +759,27 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, DenseMap<SDValue, unsigned> &VRBaseMap, - SmallVectorImpl<std::pair<unsigned, MachineInstr*> > &Orders, - SmallSet<unsigned, 8> &Seen) { + SmallVectorImpl<std::pair<unsigned, MachineInstr *>> &Orders, + SmallSet<unsigned, 8> &Seen, MachineInstr *NewInsn) { unsigned Order = N->getIROrder(); - if (!Order || !Seen.insert(Order).second) { + if (!Order || Seen.count(Order)) { // Process any valid SDDbgValues even if node does not have any order // assigned. ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, 0); return; } - MachineBasicBlock *BB = Emitter.getBlock(); - auto IP = Emitter.getInsertPos(); - if (IP == BB->begin() || BB->back().isPHI() || - // Fast-isel may have inserted some instructions, in which case the - // BB->back().isPHI() test will not fire when we want it to. - std::prev(IP)->isPHI()) { - // Did not insert any instruction. - Orders.push_back({Order, (MachineInstr *)nullptr}); - return; + // If a new instruction was generated for this Order number, record it. + // Otherwise, leave this order number unseen: we will either find later + // instructions for it, or leave it unseen if there were no instructions at + // all. + if (NewInsn) { + Seen.insert(Order); + Orders.push_back({Order, NewInsn}); } - Orders.push_back({Order, &*std::prev(IP)}); + // Even if no instruction was generated, a Value may have become defined via + // earlier nodes. Try to process them now. ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order); } @@ -815,6 +832,43 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { SmallSet<unsigned, 8> Seen; bool HasDbg = DAG->hasDebugValues(); + // Emit a node, and determine where its first instruction is for debuginfo. + // Zero, one, or multiple instructions can be created when emitting a node. + auto EmitNode = + [&](SDNode *Node, bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap) -> MachineInstr * { + // Fetch instruction prior to this, or end() if nonexistant. + auto GetPrevInsn = [&](MachineBasicBlock::iterator I) { + if (I == BB->begin()) + return BB->end(); + else + return std::prev(Emitter.getInsertPos()); + }; + + MachineBasicBlock::iterator Before = GetPrevInsn(Emitter.getInsertPos()); + Emitter.EmitNode(Node, IsClone, IsCloned, VRBaseMap); + MachineBasicBlock::iterator After = GetPrevInsn(Emitter.getInsertPos()); + + // If the iterator did not change, no instructions were inserted. + if (Before == After) + return nullptr; + + MachineInstr *MI; + if (Before == BB->end()) { + // There were no prior instructions; the new ones must start at the + // beginning of the block. + MI = &Emitter.getBlock()->instr_front(); + } else { + // Return first instruction after the pre-existing instructions. + MI = &*std::next(Before); + } + + if (MI->isCall() && DAG->getTarget().Options.EnableDebugEntryValues) + MF.addCallArgsForwardingRegs(MI, DAG->getSDCallSiteInfo(Node)); + + return MI; + }; + // If this is the first BB, emit byval parameter dbg_value's. if (HasDbg && BB->getParent()->begin() == MachineFunction::iterator(BB)) { SDDbgInfo::DbgIterator PDI = DAG->ByvalParmDbgBegin(); @@ -851,18 +905,18 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { GluedNodes.push_back(N); while (!GluedNodes.empty()) { SDNode *N = GluedNodes.back(); - Emitter.EmitNode(N, SU->OrigNode != SU, SU->isCloned, VRBaseMap); + auto NewInsn = EmitNode(N, SU->OrigNode != SU, SU->isCloned, VRBaseMap); // Remember the source order of the inserted instruction. if (HasDbg) - ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen); + ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen, NewInsn); GluedNodes.pop_back(); } - Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned, - VRBaseMap); + auto NewInsn = + EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned, VRBaseMap); // Remember the source order of the inserted instruction. if (HasDbg) - ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders, - Seen); + ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders, Seen, + NewInsn); } // Insert all the dbg_values which have not already been inserted in source @@ -873,7 +927,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { // Sort the source order instructions and use the order to insert debug // values. Use stable_sort so that DBG_VALUEs are inserted in the same order // regardless of the host's implementation fo std::sort. - std::stable_sort(Orders.begin(), Orders.end(), less_first()); + llvm::stable_sort(Orders, less_first()); std::stable_sort(DAG->DbgBegin(), DAG->DbgEnd(), [](const SDDbgValue *LHS, const SDDbgValue *RHS) { return LHS->getOrder() < RHS->getOrder(); @@ -887,8 +941,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { unsigned Order = Orders[i].first; MachineInstr *MI = Orders[i].second; // Insert all SDDbgValue's whose order(s) are before "Order". - if (!MI) - continue; + assert(MI); for (; DI != DE; ++DI) { if ((*DI)->getOrder() < LastOrder || (*DI)->getOrder() >= Order) break; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 3fa7ad895725..5163b4fa4fd3 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -1,9 +1,8 @@ //===---- ScheduleDAGSDNodes.h - SDNode Scheduling --------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index 416061475b1a..ab06b55b49fd 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -1,9 +1,8 @@ //===- ScheduleDAGVLIW.cpp - SelectionDAG list scheduler for VLIW -*- C++ -*-=// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 647496c1afcb..5852e693fa9f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1,9 +1,8 @@ //===- SelectionDAG.cpp - Implement the SelectionDAG data structures ------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -86,6 +85,7 @@ static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) { // Default null implementations of the callbacks. void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {} void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {} +void SelectionDAG::DAGUpdateListener::NodeInserted(SDNode *) {} void SelectionDAG::DAGNodeDeletedListener::anchor() {} @@ -262,12 +262,7 @@ bool ISD::allOperandsUndef(const SDNode *N) { // is probably the desired behavior. if (N->getNumOperands() == 0) return false; - - for (const SDValue &Op : N->op_values()) - if (!Op.isUndef()) - return false; - - return true; + return all_of(N->op_values(), [](SDValue Op) { return Op.isUndef(); }); } bool ISD::matchUnaryPredicate(SDValue Op, @@ -299,8 +294,8 @@ bool ISD::matchUnaryPredicate(SDValue Op, bool ISD::matchBinaryPredicate( SDValue LHS, SDValue RHS, std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match, - bool AllowUndefs) { - if (LHS.getValueType() != RHS.getValueType()) + bool AllowUndefs, bool AllowTypeMismatch) { + if (!AllowTypeMismatch && LHS.getValueType() != RHS.getValueType()) return false; // TODO: Add support for scalar UNDEF cases? @@ -323,8 +318,8 @@ bool ISD::matchBinaryPredicate( auto *RHSCst = dyn_cast<ConstantSDNode>(RHSOp); if ((!LHSCst && !LHSUndef) || (!RHSCst && !RHSUndef)) return false; - if (LHSOp.getValueType() != SVT || - LHSOp.getValueType() != RHSOp.getValueType()) + if (!AllowTypeMismatch && (LHSOp.getValueType() != SVT || + LHSOp.getValueType() != RHSOp.getValueType())) return false; if (!Match(LHSCst, RHSCst)) return false; @@ -518,6 +513,13 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { case ISD::TargetFrameIndex: ID.AddInteger(cast<FrameIndexSDNode>(N)->getIndex()); break; + case ISD::LIFETIME_START: + case ISD::LIFETIME_END: + if (cast<LifetimeSDNode>(N)->hasOffset()) { + ID.AddInteger(cast<LifetimeSDNode>(N)->getSize()); + ID.AddInteger(cast<LifetimeSDNode>(N)->getOffset()); + } + break; case ISD::JumpTable: case ISD::TargetJumpTable: ID.AddInteger(cast<JumpTableSDNode>(N)->getIndex()); @@ -834,6 +836,8 @@ void SelectionDAG::InsertNode(SDNode *N) { N->PersistentId = NextPersistentId++; VerifySDNode(N); #endif + for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next) + DUL->NodeInserted(N); } /// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that @@ -1136,6 +1140,18 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) { getConstant(Imm, DL, Op.getValueType())); } +SDValue SelectionDAG::getPtrExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { + // Only unsigned pointer semantics are supported right now. In the future this + // might delegate to TLI to check pointer signedness. + return getZExtOrTrunc(Op, DL, VT); +} + +SDValue SelectionDAG::getPtrExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) { + // Only unsigned pointer semantics are supported right now. In the future this + // might delegate to TLI to check pointer signedness. + return getZeroExtendInReg(Op, DL, VT); +} + /// getNOT - Create a bitwise NOT operation as (XOR Val, -1). SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) { EVT EltVT = VT.getScalarType(); @@ -1274,6 +1290,12 @@ SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, const SDLoc &DL, return getConstant(Val, DL, TLI->getPointerTy(getDataLayout()), isTarget); } +SDValue SelectionDAG::getShiftAmountConstant(uint64_t Val, EVT VT, + const SDLoc &DL, bool LegalTypes) { + EVT ShiftVT = TLI->getShiftAmountTy(VT, getDataLayout(), LegalTypes); + return getConstant(Val, DL, ShiftVT); +} + SDValue SelectionDAG::getConstantFP(const APFloat &V, const SDLoc &DL, EVT VT, bool isTarget) { return getConstantFP(*ConstantFP::get(*getContext(), V), DL, VT, isTarget); @@ -1403,7 +1425,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) - Alignment = MF->getFunction().optForSize() + Alignment = MF->getFunction().hasOptSize() ? getDataLayout().getABITypeAlignment(C->getType()) : getDataLayout().getPrefTypeAlignment(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; @@ -1770,7 +1792,8 @@ SDValue SelectionDAG::getLabelNode(unsigned Opcode, const SDLoc &dl, if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - auto *N = newSDNode<LabelSDNode>(dl.getIROrder(), dl.getDebugLoc(), Label); + auto *N = + newSDNode<LabelSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(), Label); createOperands(N, Ops); CSEMap.InsertNode(N, IP); @@ -1965,10 +1988,30 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, case ISD::SETUO: case ISD::SETUEQ: case ISD::SETUNE: - assert(!N1.getValueType().isInteger() && "Illegal setcc for integer!"); + assert(!OpVT.isInteger() && "Illegal setcc for integer!"); break; } + if (OpVT.isInteger()) { + // For EQ and NE, we can always pick a value for the undef to make the + // predicate pass or fail, so we can return undef. + // Matches behavior in llvm::ConstantFoldCompareInstruction. + // icmp eq/ne X, undef -> undef. + if ((N1.isUndef() || N2.isUndef()) && + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) + return getUNDEF(VT); + + // If both operands are undef, we can return undef for int comparison. + // icmp undef, undef -> undef. + if (N1.isUndef() && N2.isUndef()) + return getUNDEF(VT); + + // icmp X, X -> true/false + // icmp X, undef -> true/false because undef could be X. + if (N1 == N2) + return getBoolConstant(ISD::isTrueWhenEqual(Cond), dl, VT, OpVT); + } + if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2)) { const APInt &C2 = N2C->getAPIntValue(); if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) { @@ -1989,71 +2032,88 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, } } } - if (ConstantFPSDNode *N1C = dyn_cast<ConstantFPSDNode>(N1)) { - if (ConstantFPSDNode *N2C = dyn_cast<ConstantFPSDNode>(N2)) { - APFloat::cmpResult R = N1C->getValueAPF().compare(N2C->getValueAPF()); - switch (Cond) { - default: break; - case ISD::SETEQ: if (R==APFloat::cmpUnordered) - return getUNDEF(VT); - LLVM_FALLTHROUGH; - case ISD::SETOEQ: return getBoolConstant(R==APFloat::cmpEqual, dl, VT, - OpVT); - case ISD::SETNE: if (R==APFloat::cmpUnordered) - return getUNDEF(VT); - LLVM_FALLTHROUGH; - case ISD::SETONE: return getBoolConstant(R==APFloat::cmpGreaterThan || - R==APFloat::cmpLessThan, dl, VT, - OpVT); - case ISD::SETLT: if (R==APFloat::cmpUnordered) - return getUNDEF(VT); - LLVM_FALLTHROUGH; - case ISD::SETOLT: return getBoolConstant(R==APFloat::cmpLessThan, dl, VT, - OpVT); - case ISD::SETGT: if (R==APFloat::cmpUnordered) - return getUNDEF(VT); - LLVM_FALLTHROUGH; - case ISD::SETOGT: return getBoolConstant(R==APFloat::cmpGreaterThan, dl, - VT, OpVT); - case ISD::SETLE: if (R==APFloat::cmpUnordered) - return getUNDEF(VT); - LLVM_FALLTHROUGH; - case ISD::SETOLE: return getBoolConstant(R==APFloat::cmpLessThan || - R==APFloat::cmpEqual, dl, VT, - OpVT); - case ISD::SETGE: if (R==APFloat::cmpUnordered) - return getUNDEF(VT); - LLVM_FALLTHROUGH; - case ISD::SETOGE: return getBoolConstant(R==APFloat::cmpGreaterThan || - R==APFloat::cmpEqual, dl, VT, OpVT); - case ISD::SETO: return getBoolConstant(R!=APFloat::cmpUnordered, dl, VT, - OpVT); - case ISD::SETUO: return getBoolConstant(R==APFloat::cmpUnordered, dl, VT, - OpVT); - case ISD::SETUEQ: return getBoolConstant(R==APFloat::cmpUnordered || - R==APFloat::cmpEqual, dl, VT, - OpVT); - case ISD::SETUNE: return getBoolConstant(R!=APFloat::cmpEqual, dl, VT, - OpVT); - case ISD::SETULT: return getBoolConstant(R==APFloat::cmpUnordered || - R==APFloat::cmpLessThan, dl, VT, - OpVT); - case ISD::SETUGT: return getBoolConstant(R==APFloat::cmpGreaterThan || - R==APFloat::cmpUnordered, dl, VT, - OpVT); - case ISD::SETULE: return getBoolConstant(R!=APFloat::cmpGreaterThan, dl, - VT, OpVT); - case ISD::SETUGE: return getBoolConstant(R!=APFloat::cmpLessThan, dl, VT, - OpVT); - } - } else { - // Ensure that the constant occurs on the RHS. - ISD::CondCode SwappedCond = ISD::getSetCCSwappedOperands(Cond); - MVT CompVT = N1.getValueType().getSimpleVT(); - if (!TLI->isCondCodeLegal(SwappedCond, CompVT)) - return SDValue(); - return getSetCC(dl, VT, N2, N1, SwappedCond); + auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + auto *N2CFP = dyn_cast<ConstantFPSDNode>(N2); + + if (N1CFP && N2CFP) { + APFloat::cmpResult R = N1CFP->getValueAPF().compare(N2CFP->getValueAPF()); + switch (Cond) { + default: break; + case ISD::SETEQ: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + LLVM_FALLTHROUGH; + case ISD::SETOEQ: return getBoolConstant(R==APFloat::cmpEqual, dl, VT, + OpVT); + case ISD::SETNE: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + LLVM_FALLTHROUGH; + case ISD::SETONE: return getBoolConstant(R==APFloat::cmpGreaterThan || + R==APFloat::cmpLessThan, dl, VT, + OpVT); + case ISD::SETLT: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + LLVM_FALLTHROUGH; + case ISD::SETOLT: return getBoolConstant(R==APFloat::cmpLessThan, dl, VT, + OpVT); + case ISD::SETGT: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + LLVM_FALLTHROUGH; + case ISD::SETOGT: return getBoolConstant(R==APFloat::cmpGreaterThan, dl, + VT, OpVT); + case ISD::SETLE: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + LLVM_FALLTHROUGH; + case ISD::SETOLE: return getBoolConstant(R==APFloat::cmpLessThan || + R==APFloat::cmpEqual, dl, VT, + OpVT); + case ISD::SETGE: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + LLVM_FALLTHROUGH; + case ISD::SETOGE: return getBoolConstant(R==APFloat::cmpGreaterThan || + R==APFloat::cmpEqual, dl, VT, OpVT); + case ISD::SETO: return getBoolConstant(R!=APFloat::cmpUnordered, dl, VT, + OpVT); + case ISD::SETUO: return getBoolConstant(R==APFloat::cmpUnordered, dl, VT, + OpVT); + case ISD::SETUEQ: return getBoolConstant(R==APFloat::cmpUnordered || + R==APFloat::cmpEqual, dl, VT, + OpVT); + case ISD::SETUNE: return getBoolConstant(R!=APFloat::cmpEqual, dl, VT, + OpVT); + case ISD::SETULT: return getBoolConstant(R==APFloat::cmpUnordered || + R==APFloat::cmpLessThan, dl, VT, + OpVT); + case ISD::SETUGT: return getBoolConstant(R==APFloat::cmpGreaterThan || + R==APFloat::cmpUnordered, dl, VT, + OpVT); + case ISD::SETULE: return getBoolConstant(R!=APFloat::cmpGreaterThan, dl, + VT, OpVT); + case ISD::SETUGE: return getBoolConstant(R!=APFloat::cmpLessThan, dl, VT, + OpVT); + } + } else if (N1CFP && OpVT.isSimple() && !N2.isUndef()) { + // Ensure that the constant occurs on the RHS. + ISD::CondCode SwappedCond = ISD::getSetCCSwappedOperands(Cond); + if (!TLI->isCondCodeLegal(SwappedCond, OpVT.getSimpleVT())) + return SDValue(); + return getSetCC(dl, VT, N2, N1, SwappedCond); + } else if ((N2CFP && N2CFP->getValueAPF().isNaN()) || + (OpVT.isFloatingPoint() && (N1.isUndef() || N2.isUndef()))) { + // If an operand is known to be a nan (or undef that could be a nan), we can + // fold it. + // Choosing NaN for the undef will always make unordered comparison succeed + // and ordered comparison fails. + // Matches behavior in llvm::ConstantFoldCompareInstruction. + switch (ISD::getUnorderedFlavor(Cond)) { + default: + llvm_unreachable("Unknown flavor!"); + case 0: // Known false. + return getBoolConstant(false, dl, VT, OpVT); + case 1: // Known true. + return getBoolConstant(true, dl, VT, OpVT); + case 2: // Undefined. + return getUNDEF(VT); } } @@ -2062,16 +2122,32 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, } /// See if the specified operand can be simplified with the knowledge that only -/// the bits specified by Mask are used. -SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &Mask) { +/// the bits specified by DemandedBits are used. +/// TODO: really we should be making this into the DAG equivalent of +/// SimplifyMultipleUseDemandedBits and not generate any new nodes. +SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits) { + EVT VT = V.getValueType(); + APInt DemandedElts = VT.isVector() + ? APInt::getAllOnesValue(VT.getVectorNumElements()) + : APInt(1, 1); + return GetDemandedBits(V, DemandedBits, DemandedElts); +} + +/// See if the specified operand can be simplified with the knowledge that only +/// the bits specified by DemandedBits are used in the elements specified by +/// DemandedElts. +/// TODO: really we should be making this into the DAG equivalent of +/// SimplifyMultipleUseDemandedBits and not generate any new nodes. +SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits, + const APInt &DemandedElts) { switch (V.getOpcode()) { default: break; case ISD::Constant: { - const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode()); + auto *CV = cast<ConstantSDNode>(V.getNode()); assert(CV && "Const value should be ConstSDNode."); const APInt &CVal = CV->getAPIntValue(); - APInt NewVal = CVal & Mask; + APInt NewVal = CVal & DemandedBits; if (NewVal != CVal) return getConstant(NewVal, SDLoc(V), V.getValueType()); break; @@ -2079,44 +2155,51 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &Mask) { case ISD::OR: case ISD::XOR: // If the LHS or RHS don't contribute bits to the or, drop them. - if (MaskedValueIsZero(V.getOperand(0), Mask)) + if (MaskedValueIsZero(V.getOperand(0), DemandedBits)) return V.getOperand(1); - if (MaskedValueIsZero(V.getOperand(1), Mask)) + if (MaskedValueIsZero(V.getOperand(1), DemandedBits)) return V.getOperand(0); break; case ISD::SRL: // Only look at single-use SRLs. if (!V.getNode()->hasOneUse()) break; - if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) { + if (auto *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) { // See if we can recursively simplify the LHS. unsigned Amt = RHSC->getZExtValue(); // Watch out for shift count overflow though. - if (Amt >= Mask.getBitWidth()) + if (Amt >= DemandedBits.getBitWidth()) break; - APInt NewMask = Mask << Amt; - if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask)) + APInt SrcDemandedBits = DemandedBits << Amt; + if (SDValue SimplifyLHS = + GetDemandedBits(V.getOperand(0), SrcDemandedBits)) return getNode(ISD::SRL, SDLoc(V), V.getValueType(), SimplifyLHS, V.getOperand(1)); } break; case ISD::AND: { // X & -1 -> X (ignoring bits which aren't demanded). - ConstantSDNode *AndVal = isConstOrConstSplat(V.getOperand(1)); - if (AndVal && Mask.isSubsetOf(AndVal->getAPIntValue())) - return V.getOperand(0); + // Also handle the case where masked out bits in X are known to be zero. + if (ConstantSDNode *RHSC = isConstOrConstSplat(V.getOperand(1))) { + const APInt &AndVal = RHSC->getAPIntValue(); + if (DemandedBits.isSubsetOf(AndVal) || + DemandedBits.isSubsetOf(computeKnownBits(V.getOperand(0)).Zero | + AndVal)) + return V.getOperand(0); + } break; } case ISD::ANY_EXTEND: { SDValue Src = V.getOperand(0); unsigned SrcBitWidth = Src.getScalarValueSizeInBits(); // Being conservative here - only peek through if we only demand bits in the - // non-extended source (even though the extended bits are technically undef). - if (Mask.getActiveBits() > SrcBitWidth) + // non-extended source (even though the extended bits are technically + // undef). + if (DemandedBits.getActiveBits() > SrcBitWidth) break; - APInt SrcMask = Mask.trunc(SrcBitWidth); - if (SDValue DemandedSrc = GetDemandedBits(Src, SrcMask)) + APInt SrcDemandedBits = DemandedBits.trunc(SrcBitWidth); + if (SDValue DemandedSrc = GetDemandedBits(Src, SrcDemandedBits)) return getNode(ISD::ANY_EXTEND, SDLoc(V), V.getValueType(), DemandedSrc); break; } @@ -2125,7 +2208,7 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &Mask) { unsigned ExVTBits = ExVT.getScalarSizeInBits(); // If none of the extended bits are demanded, eliminate the sextinreg. - if (Mask.getActiveBits() <= ExVTBits) + if (DemandedBits.getActiveBits() <= ExVTBits) return V.getOperand(0); break; @@ -2143,9 +2226,28 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use /// this predicate to simplify operations downstream. Mask is known to be zero /// for bits that V cannot have. -bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask, +bool SelectionDAG::MaskedValueIsZero(SDValue V, const APInt &Mask, + unsigned Depth) const { + EVT VT = V.getValueType(); + APInt DemandedElts = VT.isVector() + ? APInt::getAllOnesValue(VT.getVectorNumElements()) + : APInt(1, 1); + return MaskedValueIsZero(V, Mask, DemandedElts, Depth); +} + +/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero in +/// DemandedElts. We use this predicate to simplify operations downstream. +/// Mask is known to be zero for bits that V cannot have. +bool SelectionDAG::MaskedValueIsZero(SDValue V, const APInt &Mask, + const APInt &DemandedElts, unsigned Depth) const { - return Mask.isSubsetOf(computeKnownBits(Op, Depth).Zero); + return Mask.isSubsetOf(computeKnownBits(V, DemandedElts, Depth).Zero); +} + +/// MaskedValueIsAllOnes - Return true if '(Op & Mask) == Mask'. +bool SelectionDAG::MaskedValueIsAllOnes(SDValue V, const APInt &Mask, + unsigned Depth) const { + return Mask.isSubsetOf(computeKnownBits(V, Depth).One); } /// isSplatValue - Return true if the vector V has the same value @@ -2244,28 +2346,50 @@ bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) { (AllowUndefs || !UndefElts); } -/// Helper function that checks to see if a node is a constant or a -/// build vector of splat constants at least within the demanded elts. -static ConstantSDNode *isConstOrDemandedConstSplat(SDValue N, - const APInt &DemandedElts) { - if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) - return CN; - if (N.getOpcode() != ISD::BUILD_VECTOR) - return nullptr; - EVT VT = N.getValueType(); - ConstantSDNode *Cst = nullptr; - unsigned NumElts = VT.getVectorNumElements(); - assert(DemandedElts.getBitWidth() == NumElts && "Unexpected vector size"); - for (unsigned i = 0; i != NumElts; ++i) { - if (!DemandedElts[i]) - continue; - ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(i)); - if (!C || (Cst && Cst->getAPIntValue() != C->getAPIntValue()) || - C->getValueType(0) != VT.getScalarType()) - return nullptr; - Cst = C; +SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) { + V = peekThroughExtractSubvectors(V); + + EVT VT = V.getValueType(); + unsigned Opcode = V.getOpcode(); + switch (Opcode) { + default: { + APInt UndefElts; + APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); + if (isSplatValue(V, DemandedElts, UndefElts)) { + // Handle case where all demanded elements are UNDEF. + if (DemandedElts.isSubsetOf(UndefElts)) { + SplatIdx = 0; + return getUNDEF(VT); + } + SplatIdx = (UndefElts & DemandedElts).countTrailingOnes(); + return V; + } + break; + } + case ISD::VECTOR_SHUFFLE: { + // Check if this is a shuffle node doing a splat. + // TODO - remove this and rely purely on SelectionDAG::isSplatValue, + // getTargetVShiftNode currently struggles without the splat source. + auto *SVN = cast<ShuffleVectorSDNode>(V); + if (!SVN->isSplat()) + break; + int Idx = SVN->getSplatIndex(); + int NumElts = V.getValueType().getVectorNumElements(); + SplatIdx = Idx % NumElts; + return V.getOperand(Idx / NumElts); } - return Cst; + } + + return SDValue(); +} + +SDValue SelectionDAG::getSplatValue(SDValue V) { + int SplatIdx; + if (SDValue SrcVector = getSplatSourceVector(V, SplatIdx)) + return getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V), + SrcVector.getValueType().getScalarType(), SrcVector, + getIntPtrConstant(SplatIdx, SDLoc(V))); + return SDValue(); } /// If a SHL/SRA/SRL node has a constant or splat constant shift amount that @@ -2708,8 +2832,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, break; case ISD::FSHL: case ISD::FSHR: - if (ConstantSDNode *C = - isConstOrDemandedConstSplat(Op.getOperand(2), DemandedElts)) { + if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(2), DemandedElts)) { unsigned Amt = C->getAPIntValue().urem(BitWidth); // For fshl, 0-shift returns the 1st arg. @@ -2801,8 +2924,59 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, } case ISD::LOAD: { LoadSDNode *LD = cast<LoadSDNode>(Op); - // If this is a ZEXTLoad and we are looking at the loaded value. - if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) { + const Constant *Cst = TLI->getTargetConstantFromLoad(LD); + if (ISD::isNON_EXTLoad(LD) && Cst) { + // Determine any common known bits from the loaded constant pool value. + Type *CstTy = Cst->getType(); + if ((NumElts * BitWidth) == CstTy->getPrimitiveSizeInBits()) { + // If its a vector splat, then we can (quickly) reuse the scalar path. + // NOTE: We assume all elements match and none are UNDEF. + if (CstTy->isVectorTy()) { + if (const Constant *Splat = Cst->getSplatValue()) { + Cst = Splat; + CstTy = Cst->getType(); + } + } + // TODO - do we need to handle different bitwidths? + if (CstTy->isVectorTy() && BitWidth == CstTy->getScalarSizeInBits()) { + // Iterate across all vector elements finding common known bits. + Known.One.setAllBits(); + Known.Zero.setAllBits(); + for (unsigned i = 0; i != NumElts; ++i) { + if (!DemandedElts[i]) + continue; + if (Constant *Elt = Cst->getAggregateElement(i)) { + if (auto *CInt = dyn_cast<ConstantInt>(Elt)) { + const APInt &Value = CInt->getValue(); + Known.One &= Value; + Known.Zero &= ~Value; + continue; + } + if (auto *CFP = dyn_cast<ConstantFP>(Elt)) { + APInt Value = CFP->getValueAPF().bitcastToAPInt(); + Known.One &= Value; + Known.Zero &= ~Value; + continue; + } + } + Known.One.clearAllBits(); + Known.Zero.clearAllBits(); + break; + } + } else if (BitWidth == CstTy->getPrimitiveSizeInBits()) { + if (auto *CInt = dyn_cast<ConstantInt>(Cst)) { + const APInt &Value = CInt->getValue(); + Known.One = Value; + Known.Zero = ~Value; + } else if (auto *CFP = dyn_cast<ConstantFP>(Cst)) { + APInt Value = CFP->getValueAPF().bitcastToAPInt(); + Known.One = Value; + Known.Zero = ~Value; + } + } + } + } else if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) { + // If this is a ZEXTLoad and we are looking at the loaded value. EVT VT = LD->getMemoryVT(); unsigned MemBits = VT.getScalarSizeInBits(); Known.Zero.setBitsFrom(MemBits); @@ -2816,15 +2990,12 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, EVT InVT = Op.getOperand(0).getValueType(); APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements()); Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1); - Known = Known.zext(BitWidth); - Known.Zero.setBitsFrom(InVT.getScalarSizeInBits()); + Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */); break; } case ISD::ZERO_EXTEND: { - EVT InVT = Op.getOperand(0).getValueType(); Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - Known = Known.zext(BitWidth); - Known.Zero.setBitsFrom(InVT.getScalarSizeInBits()); + Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */); break; } case ISD::SIGN_EXTEND_VECTOR_INREG: { @@ -2845,7 +3016,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, } case ISD::ANY_EXTEND: { Known = computeKnownBits(Op.getOperand(0), Depth+1); - Known = Known.zext(BitWidth); + Known = Known.zext(BitWidth, false /* ExtendedBitsAreKnownZero */); break; } case ISD::TRUNCATE: { @@ -2878,39 +3049,10 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, LLVM_FALLTHROUGH; case ISD::SUB: case ISD::SUBC: { - if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0))) { - // We know that the top bits of C-X are clear if X contains less bits - // than C (i.e. no wrap-around can happen). For example, 20-X is - // positive if we can prove that X is >= 0 and < 16. - if (CLHS->getAPIntValue().isNonNegative()) { - unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros(); - // NLZ can't be BitWidth with no sign bit - APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); - Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, - Depth + 1); - - // If all of the MaskV bits are known to be zero, then we know the - // output top bits are zero, because we now know that the output is - // from [0-C]. - if ((Known2.Zero & MaskV) == MaskV) { - unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros(); - // Top bits known zero. - Known.Zero.setHighBits(NLZ2); - } - } - } - - // If low bits are know to be zero in both operands, then we know they are - // going to be 0 in the result. Both addition and complement operations - // preserve the low zero bits. - Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - unsigned KnownZeroLow = Known2.countMinTrailingZeros(); - if (KnownZeroLow == 0) - break; - + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); - KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros()); - Known.Zero.setLowBits(KnownZeroLow); + Known = KnownBits::computeForAddSub(/* Add */ false, /* NSW */ false, + Known, Known2); break; } case ISD::UADDO: @@ -2928,34 +3070,26 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, case ISD::ADD: case ISD::ADDC: case ISD::ADDE: { - // Output known-0 bits are known if clear or set in both the low clear bits - // common to both LHS & RHS. For example, 8+(X<<3) is known to have the - // low 3 bits clear. - // Output known-0 bits are also known if the top bits of each input are - // known to be clear. For example, if one input has the top 10 bits clear - // and the other has the top 8 bits clear, we know the top 7 bits of the - // output must be clear. - Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - unsigned KnownZeroHigh = Known2.countMinLeadingZeros(); - unsigned KnownZeroLow = Known2.countMinTrailingZeros(); + assert(Op.getResNo() == 0 && "We only compute knownbits for the sum here."); + + // With ADDE and ADDCARRY, a carry bit may be added in. + KnownBits Carry(1); + if (Opcode == ISD::ADDE) + // Can't track carry from glue, set carry to unknown. + Carry.resetAll(); + else if (Opcode == ISD::ADDCARRY) + // TODO: Compute known bits for the carry operand. Not sure if it is worth + // the trouble (how often will we find a known carry bit). And I haven't + // tested this very much yet, but something like this might work: + // Carry = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1); + // Carry = Carry.zextOrTrunc(1, false); + Carry.resetAll(); + else + Carry.setAllZero(); + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); - KnownZeroHigh = std::min(KnownZeroHigh, Known2.countMinLeadingZeros()); - KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros()); - - if (Opcode == ISD::ADDE || Opcode == ISD::ADDCARRY) { - // With ADDE and ADDCARRY, a carry bit may be added in, so we can only - // use this information if we know (at least) that the low two bits are - // clear. We then return to the caller that the low bit is unknown but - // that other bits are known zero. - if (KnownZeroLow >= 2) - Known.Zero.setBits(1, KnownZeroLow); - break; - } - - Known.Zero.setLowBits(KnownZeroLow); - if (KnownZeroHigh > 1) - Known.Zero.setHighBits(KnownZeroHigh - 1); + Known = KnownBits::computeForAddCarry(Known, Known2, Carry); break; } case ISD::SREM: @@ -3010,21 +3144,20 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, case ISD::EXTRACT_ELEMENT: { Known = computeKnownBits(Op.getOperand(0), Depth+1); const unsigned Index = Op.getConstantOperandVal(1); - const unsigned BitWidth = Op.getValueSizeInBits(); + const unsigned EltBitWidth = Op.getValueSizeInBits(); // Remove low part of known bits mask - Known.Zero = Known.Zero.getHiBits(Known.Zero.getBitWidth() - Index * BitWidth); - Known.One = Known.One.getHiBits(Known.One.getBitWidth() - Index * BitWidth); + Known.Zero = Known.Zero.getHiBits(Known.getBitWidth() - Index * EltBitWidth); + Known.One = Known.One.getHiBits(Known.getBitWidth() - Index * EltBitWidth); // Remove high part of known bit mask - Known = Known.trunc(BitWidth); + Known = Known.trunc(EltBitWidth); break; } case ISD::EXTRACT_VECTOR_ELT: { SDValue InVec = Op.getOperand(0); SDValue EltNo = Op.getOperand(1); EVT VecVT = InVec.getValueType(); - const unsigned BitWidth = Op.getValueSizeInBits(); const unsigned EltBitWidth = VecVT.getScalarSizeInBits(); const unsigned NumSrcElts = VecVT.getVectorNumElements(); // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know @@ -3042,7 +3175,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known = computeKnownBits(InVec, Depth + 1); } if (BitWidth > EltBitWidth) - Known = Known.zext(BitWidth); + Known = Known.zext(BitWidth, false /* => any extend */); break; } case ISD::INSERT_VECTOR_ELT: { @@ -3146,10 +3279,10 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, // the minimum of the clamp min/max range. bool IsMax = (Opcode == ISD::SMAX); ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr; - if ((CstLow = isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts))) + if ((CstLow = isConstOrConstSplat(Op.getOperand(1), DemandedElts))) if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX)) - CstHigh = isConstOrDemandedConstSplat(Op.getOperand(0).getOperand(1), - DemandedElts); + CstHigh = + isConstOrConstSplat(Op.getOperand(0).getOperand(1), DemandedElts); if (CstLow && CstHigh) { if (!IsMax) std::swap(CstLow, CstHigh); @@ -3430,7 +3563,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1); // SRA X, C -> adds C sign bits. if (ConstantSDNode *C = - isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts)) { + isConstOrConstSplat(Op.getOperand(1), DemandedElts)) { APInt ShiftVal = C->getAPIntValue(); ShiftVal += Tmp; Tmp = ShiftVal.uge(VTBits) ? VTBits : ShiftVal.getZExtValue(); @@ -3438,7 +3571,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return Tmp; case ISD::SHL: if (ConstantSDNode *C = - isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts)) { + isConstOrConstSplat(Op.getOperand(1), DemandedElts)) { // shl destroys sign bits. Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1); if (C->getAPIntValue().uge(VTBits) || // Bad shift. @@ -3478,10 +3611,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // the minimum of the clamp min/max range. bool IsMax = (Opcode == ISD::SMAX); ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr; - if ((CstLow = isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts))) + if ((CstLow = isConstOrConstSplat(Op.getOperand(1), DemandedElts))) if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX)) - CstHigh = isConstOrDemandedConstSplat(Op.getOperand(0).getOperand(1), - DemandedElts); + CstHigh = + isConstOrConstSplat(Op.getOperand(0).getOperand(1), DemandedElts); if (CstLow && CstHigh) { if (!IsMax) std::swap(CstLow, CstHigh); @@ -3621,7 +3754,6 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, SDValue InVec = Op.getOperand(0); SDValue InVal = Op.getOperand(1); SDValue EltNo = Op.getOperand(2); - unsigned NumElts = InVec.getValueType().getVectorNumElements(); ConstantSDNode *CEltNo = dyn_cast<ConstantSDNode>(EltNo); if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) { @@ -3752,13 +3884,43 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { unsigned ExtType = LD->getExtensionType(); switch (ExtType) { - default: break; - case ISD::SEXTLOAD: // '17' bits known - Tmp = LD->getMemoryVT().getScalarSizeInBits(); - return VTBits-Tmp+1; - case ISD::ZEXTLOAD: // '16' bits known - Tmp = LD->getMemoryVT().getScalarSizeInBits(); - return VTBits-Tmp; + default: break; + case ISD::SEXTLOAD: // e.g. i16->i32 = '17' bits known. + Tmp = LD->getMemoryVT().getScalarSizeInBits(); + return VTBits - Tmp + 1; + case ISD::ZEXTLOAD: // e.g. i16->i32 = '16' bits known. + Tmp = LD->getMemoryVT().getScalarSizeInBits(); + return VTBits - Tmp; + case ISD::NON_EXTLOAD: + if (const Constant *Cst = TLI->getTargetConstantFromLoad(LD)) { + // We only need to handle vectors - computeKnownBits should handle + // scalar cases. + Type *CstTy = Cst->getType(); + if (CstTy->isVectorTy() && + (NumElts * VTBits) == CstTy->getPrimitiveSizeInBits()) { + Tmp = VTBits; + for (unsigned i = 0; i != NumElts; ++i) { + if (!DemandedElts[i]) + continue; + if (Constant *Elt = Cst->getAggregateElement(i)) { + if (auto *CInt = dyn_cast<ConstantInt>(Elt)) { + const APInt &Value = CInt->getValue(); + Tmp = std::min(Tmp, Value.getNumSignBits()); + continue; + } + if (auto *CFP = dyn_cast<ConstantFP>(Elt)) { + APInt Value = CFP->getValueAPF().bitcastToAPInt(); + Tmp = std::min(Tmp, Value.getNumSignBits()); + continue; + } + } + // Unknown type. Conservatively assume no bits match sign bit. + return 1; + } + return Tmp; + } + } + break; } } } @@ -3803,8 +3965,7 @@ bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const { return false; if (Op.getOpcode() == ISD::OR && - !MaskedValueIsZero(Op.getOperand(0), - cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue())) + !MaskedValueIsZero(Op.getOperand(0), Op.getConstantOperandAPInt(1))) return false; return true; @@ -4013,7 +4174,9 @@ static SDValue FoldBUILD_VECTOR(const SDLoc &DL, EVT VT, return SDValue(); } -static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT, +/// Try to simplify vector concatenation to an input value, undef, or build +/// vector. +static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT, ArrayRef<SDValue> Ops, SelectionDAG &DAG) { assert(!Ops.empty() && "Can't concatenate an empty list of vectors!"); @@ -4033,6 +4196,31 @@ static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT, if (llvm::all_of(Ops, [](SDValue Op) { return Op.isUndef(); })) return DAG.getUNDEF(VT); + // Scan the operands and look for extract operations from a single source + // that correspond to insertion at the same location via this concatenation: + // concat (extract X, 0*subvec_elts), (extract X, 1*subvec_elts), ... + SDValue IdentitySrc; + bool IsIdentity = true; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + SDValue Op = Ops[i]; + unsigned IdentityIndex = i * Op.getValueType().getVectorNumElements(); + if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR || + Op.getOperand(0).getValueType() != VT || + (IdentitySrc && Op.getOperand(0) != IdentitySrc) || + !isa<ConstantSDNode>(Op.getOperand(1)) || + Op.getConstantOperandVal(1) != IdentityIndex) { + IsIdentity = false; + break; + } + assert((!IdentitySrc || IdentitySrc == Op.getOperand(0)) && + "Unexpected identity source vector for concat of extracts"); + IdentitySrc = Op.getOperand(0); + } + if (IsIdentity) { + assert(IdentitySrc && "Failed to set source vector of extracts"); + return IdentitySrc; + } + // A CONCAT_VECTOR with all UNDEF/BUILD_VECTOR operands can be // simplified to one big BUILD_VECTOR. // FIXME: Add support for SCALAR_TO_VECTOR as well. @@ -4288,9 +4476,23 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (Operand.isUndef()) return getUNDEF(VT); break; + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + if (Operand.isUndef()) + return getUNDEF(VT); + break; + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + // [us]itofp(undef) = 0, because the result value is bounded. + if (Operand.isUndef()) + return getConstantFP(0.0, DL, VT); + break; case ISD::SIGN_EXTEND: assert(VT.isInteger() && Operand.getValueType().isInteger() && "Invalid SIGN_EXTEND!"); + assert(VT.isVector() == Operand.getValueType().isVector() && + "SIGN_EXTEND result type type should be vector iff the operand " + "type is vector!"); if (Operand.getValueType() == VT) return Operand; // noop extension assert((!VT.isVector() || VT.getVectorNumElements() == @@ -4307,6 +4509,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::ZERO_EXTEND: assert(VT.isInteger() && Operand.getValueType().isInteger() && "Invalid ZERO_EXTEND!"); + assert(VT.isVector() == Operand.getValueType().isVector() && + "ZERO_EXTEND result type type should be vector iff the operand " + "type is vector!"); if (Operand.getValueType() == VT) return Operand; // noop extension assert((!VT.isVector() || VT.getVectorNumElements() == @@ -4323,6 +4528,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::ANY_EXTEND: assert(VT.isInteger() && Operand.getValueType().isInteger() && "Invalid ANY_EXTEND!"); + assert(VT.isVector() == Operand.getValueType().isVector() && + "ANY_EXTEND result type type should be vector iff the operand " + "type is vector!"); if (Operand.getValueType() == VT) return Operand; // noop extension assert((!VT.isVector() || VT.getVectorNumElements() == @@ -4350,6 +4558,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::TRUNCATE: assert(VT.isInteger() && Operand.getValueType().isInteger() && "Invalid TRUNCATE!"); + assert(VT.isVector() == Operand.getValueType().isVector() && + "TRUNCATE result type type should be vector iff the operand " + "type is vector!"); if (Operand.getValueType() == VT) return Operand; // noop truncate assert((!VT.isVector() || VT.getVectorNumElements() == @@ -4429,6 +4640,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, return Operand.getOperand(0); break; case ISD::FNEG: + // Negation of an unknown bag of bits is still completely undefined. + if (OpOpcode == ISD::UNDEF) + return getUNDEF(VT); + // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 if ((getTarget().Options.UnsafeFPMath || Flags.hasNoSignedZeros()) && OpOpcode == ISD::FSUB) @@ -4513,13 +4728,13 @@ static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1, } SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, - EVT VT, const ConstantSDNode *Cst1, - const ConstantSDNode *Cst2) { - if (Cst1->isOpaque() || Cst2->isOpaque()) + EVT VT, const ConstantSDNode *C1, + const ConstantSDNode *C2) { + if (C1->isOpaque() || C2->isOpaque()) return SDValue(); - std::pair<APInt, bool> Folded = FoldValue(Opcode, Cst1->getAPIntValue(), - Cst2->getAPIntValue()); + std::pair<APInt, bool> Folded = FoldValue(Opcode, C1->getAPIntValue(), + C2->getAPIntValue()); if (!Folded.second) return SDValue(); return getConstant(Folded.first, DL, VT); @@ -4532,16 +4747,16 @@ SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT, return SDValue(); if (!TLI->isOffsetFoldingLegal(GA)) return SDValue(); - const ConstantSDNode *Cst2 = dyn_cast<ConstantSDNode>(N2); - if (!Cst2) + auto *C2 = dyn_cast<ConstantSDNode>(N2); + if (!C2) return SDValue(); - int64_t Offset = Cst2->getSExtValue(); + int64_t Offset = C2->getSExtValue(); switch (Opcode) { case ISD::ADD: break; case ISD::SUB: Offset = -uint64_t(Offset); break; default: return SDValue(); } - return getGlobalAddress(GA->getGlobal(), SDLoc(Cst2), VT, + return getGlobalAddress(GA->getGlobal(), SDLoc(C2), VT, GA->getOffset() + uint64_t(Offset)); } @@ -4571,21 +4786,20 @@ bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef<SDValue> Ops) { } SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, - EVT VT, SDNode *Cst1, - SDNode *Cst2) { + EVT VT, SDNode *N1, SDNode *N2) { // If the opcode is a target-specific ISD node, there's nothing we can // do here and the operand rules may not line up with the below, so // bail early. if (Opcode >= ISD::BUILTIN_OP_END) return SDValue(); - if (isUndef(Opcode, {SDValue(Cst1, 0), SDValue(Cst2, 0)})) + if (isUndef(Opcode, {SDValue(N1, 0), SDValue(N2, 0)})) return getUNDEF(VT); // Handle the case of two scalars. - if (const ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1)) { - if (const ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2)) { - SDValue Folded = FoldConstantArithmetic(Opcode, DL, VT, Scalar1, Scalar2); + if (auto *C1 = dyn_cast<ConstantSDNode>(N1)) { + if (auto *C2 = dyn_cast<ConstantSDNode>(N2)) { + SDValue Folded = FoldConstantArithmetic(Opcode, DL, VT, C1, C2); assert((!Folded || !VT.isVector()) && "Can't fold vectors ops with scalar operands"); return Folded; @@ -4593,19 +4807,19 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, } // fold (add Sym, c) -> Sym+c - if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Cst1)) - return FoldSymbolOffset(Opcode, VT, GA, Cst2); + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N1)) + return FoldSymbolOffset(Opcode, VT, GA, N2); if (TLI->isCommutativeBinOp(Opcode)) - if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Cst2)) - return FoldSymbolOffset(Opcode, VT, GA, Cst1); + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N2)) + return FoldSymbolOffset(Opcode, VT, GA, N1); // For vectors, extract each constant element and fold them individually. // Either input may be an undef value. - auto *BV1 = dyn_cast<BuildVectorSDNode>(Cst1); - if (!BV1 && !Cst1->isUndef()) + auto *BV1 = dyn_cast<BuildVectorSDNode>(N1); + if (!BV1 && !N1->isUndef()) return SDValue(); - auto *BV2 = dyn_cast<BuildVectorSDNode>(Cst2); - if (!BV2 && !Cst2->isUndef()) + auto *BV2 = dyn_cast<BuildVectorSDNode>(N2); + if (!BV2 && !N2->isUndef()) return SDValue(); // If both operands are undef, that's handled the same way as scalars. if (!BV1 && !BV2) @@ -4755,6 +4969,64 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, return V; } +SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL, + EVT VT, SDValue N1, SDValue N2) { + // TODO: We don't do any constant folding for strict FP opcodes here, but we + // should. That will require dealing with a potentially non-default + // rounding mode, checking the "opStatus" return value from the APFloat + // math calculations, and possibly other variations. + auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1.getNode()); + auto *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode()); + if (N1CFP && N2CFP) { + APFloat C1 = N1CFP->getValueAPF(), C2 = N2CFP->getValueAPF(); + switch (Opcode) { + case ISD::FADD: + C1.add(C2, APFloat::rmNearestTiesToEven); + return getConstantFP(C1, DL, VT); + case ISD::FSUB: + C1.subtract(C2, APFloat::rmNearestTiesToEven); + return getConstantFP(C1, DL, VT); + case ISD::FMUL: + C1.multiply(C2, APFloat::rmNearestTiesToEven); + return getConstantFP(C1, DL, VT); + case ISD::FDIV: + C1.divide(C2, APFloat::rmNearestTiesToEven); + return getConstantFP(C1, DL, VT); + case ISD::FREM: + C1.mod(C2); + return getConstantFP(C1, DL, VT); + case ISD::FCOPYSIGN: + C1.copySign(C2); + return getConstantFP(C1, DL, VT); + default: break; + } + } + if (N1CFP && Opcode == ISD::FP_ROUND) { + APFloat C1 = N1CFP->getValueAPF(); // make copy + bool Unused; + // This can return overflow, underflow, or inexact; we don't care. + // FIXME need to be more flexible about rounding mode. + (void) C1.convert(EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven, + &Unused); + return getConstantFP(C1, DL, VT); + } + + switch (Opcode) { + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: + // If both operands are undef, the result is undef. If 1 operand is undef, + // the result is NaN. This should match the behavior of the IR optimizer. + if (N1.isUndef() && N2.isUndef()) + return getUNDEF(VT); + if (N1.isUndef() || N2.isUndef()) + return getConstantFP(APFloat::getNaN(EVTToAPFloatSemantics(VT)), DL, VT); + } + return SDValue(); +} + SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, SDValue N2, const SDNodeFlags Flags) { ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); @@ -4791,9 +5063,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, break; } case ISD::CONCAT_VECTORS: { - // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF. SDValue Ops[] = {N1, N2}; - if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this)) + if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, *this)) return V; break; } @@ -4847,6 +5118,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); assert(N1.getValueType() == N2.getValueType() && N1.getValueType() == VT && "Binary operator types must match!"); + if (SDValue V = simplifyFPBinop(Opcode, N1, N2)) + return V; break; case ISD::FCOPYSIGN: // N1 and result must match. N1/N2 need not match. assert(N1.getValueType() == VT && @@ -5100,73 +5373,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, FoldConstantArithmetic(Opcode, DL, VT, N1.getNode(), N2.getNode())) return SV; - // Constant fold FP operations. - bool HasFPExceptions = TLI->hasFloatingPointExceptions(); - if (N1CFP) { - if (N2CFP) { - APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF(); - APFloat::opStatus s; - switch (Opcode) { - case ISD::FADD: - s = V1.add(V2, APFloat::rmNearestTiesToEven); - if (!HasFPExceptions || s != APFloat::opInvalidOp) - return getConstantFP(V1, DL, VT); - break; - case ISD::FSUB: - s = V1.subtract(V2, APFloat::rmNearestTiesToEven); - if (!HasFPExceptions || s!=APFloat::opInvalidOp) - return getConstantFP(V1, DL, VT); - break; - case ISD::FMUL: - s = V1.multiply(V2, APFloat::rmNearestTiesToEven); - if (!HasFPExceptions || s!=APFloat::opInvalidOp) - return getConstantFP(V1, DL, VT); - break; - case ISD::FDIV: - s = V1.divide(V2, APFloat::rmNearestTiesToEven); - if (!HasFPExceptions || (s!=APFloat::opInvalidOp && - s!=APFloat::opDivByZero)) { - return getConstantFP(V1, DL, VT); - } - break; - case ISD::FREM : - s = V1.mod(V2); - if (!HasFPExceptions || (s!=APFloat::opInvalidOp && - s!=APFloat::opDivByZero)) { - return getConstantFP(V1, DL, VT); - } - break; - case ISD::FCOPYSIGN: - V1.copySign(V2); - return getConstantFP(V1, DL, VT); - default: break; - } - } - - if (Opcode == ISD::FP_ROUND) { - APFloat V = N1CFP->getValueAPF(); // make copy - bool ignored; - // This can return overflow, underflow, or inexact; we don't care. - // FIXME need to be more flexible about rounding mode. - (void)V.convert(EVTToAPFloatSemantics(VT), - APFloat::rmNearestTiesToEven, &ignored); - return getConstantFP(V, DL, VT); - } - } - - switch (Opcode) { - case ISD::FADD: - case ISD::FSUB: - case ISD::FMUL: - case ISD::FDIV: - case ISD::FREM: - // If both operands are undef, the result is undef. If 1 operand is undef, - // the result is NaN. This should match the behavior of the IR optimizer. - if (N1.isUndef() && N2.isUndef()) - return getUNDEF(VT); - if (N1.isUndef() || N2.isUndef()) - return getConstantFP(APFloat::getNaN(EVTToAPFloatSemantics(VT)), DL, VT); - } + if (SDValue V = foldConstantFPMath(Opcode, DL, VT, N1, N2)) + return V; // Canonicalize an UNDEF to the RHS, even over a constant. if (N1.isUndef()) { @@ -5261,10 +5469,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, APFloat V1 = N1CFP->getValueAPF(); const APFloat &V2 = N2CFP->getValueAPF(); const APFloat &V3 = N3CFP->getValueAPF(); - APFloat::opStatus s = - V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven); - if (!TLI->hasFloatingPointExceptions() || s != APFloat::opInvalidOp) - return getConstantFP(V1, DL, VT); + V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven); + return getConstantFP(V1, DL, VT); } break; } @@ -5276,9 +5482,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, break; } case ISD::CONCAT_VECTORS: { - // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF. SDValue Ops[] = {N1, N2, N3}; - if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this)) + if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, *this)) return V; break; } @@ -5317,6 +5522,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, break; } case ISD::INSERT_SUBVECTOR: { + // Inserting undef into undef is still undef. + if (N1.isUndef() && N2.isUndef()) + return getUNDEF(VT); SDValue Index = N3; if (VT.isSimple() && N1.getValueType().isSimple() && N2.getValueType().isSimple()) { @@ -5337,6 +5545,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // Trivial insertion. if (VT.getSimpleVT() == N2.getSimpleValueType()) return N2; + + // If this is an insert of an extracted vector into an undef vector, we + // can just use the input to the extract. + if (N1.isUndef() && N2.getOpcode() == ISD::EXTRACT_SUBVECTOR && + N2.getOperand(1) == N3 && N2.getOperand(0).getValueType() == VT) + return N2.getOperand(0); } break; } @@ -5521,116 +5735,12 @@ static bool isMemSrcFromConstant(SDValue Src, ConstantDataArraySlice &Slice) { SrcDelta + G->getOffset()); } -/// Determines the optimal series of memory ops to replace the memset / memcpy. -/// Return true if the number of memory ops is below the threshold (Limit). -/// It returns the types of the sequence of memory ops to perform -/// memset / memcpy by reference. -static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, - unsigned Limit, uint64_t Size, - unsigned DstAlign, unsigned SrcAlign, - bool IsMemset, - bool ZeroMemset, - bool MemcpyStrSrc, - bool AllowOverlap, - unsigned DstAS, unsigned SrcAS, - SelectionDAG &DAG, - const TargetLowering &TLI) { - assert((SrcAlign == 0 || SrcAlign >= DstAlign) && - "Expecting memcpy / memset source to meet alignment requirement!"); - // If 'SrcAlign' is zero, that means the memory operation does not need to - // load the value, i.e. memset or memcpy from constant string. Otherwise, - // it's the inferred alignment of the source. 'DstAlign', on the other hand, - // is the specified alignment of the memory operation. If it is zero, that - // means it's possible to change the alignment of the destination. - // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does - // not need to be loaded. - EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, - IsMemset, ZeroMemset, MemcpyStrSrc, - DAG.getMachineFunction()); - - if (VT == MVT::Other) { - // Use the largest integer type whose alignment constraints are satisfied. - // We only need to check DstAlign here as SrcAlign is always greater or - // equal to DstAlign (or zero). - VT = MVT::i64; - while (DstAlign && DstAlign < VT.getSizeInBits() / 8 && - !TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign)) - VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1); - assert(VT.isInteger()); - - // Find the largest legal integer type. - MVT LVT = MVT::i64; - while (!TLI.isTypeLegal(LVT)) - LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1); - assert(LVT.isInteger()); - - // If the type we've chosen is larger than the largest legal integer type - // then use that instead. - if (VT.bitsGT(LVT)) - VT = LVT; - } - - unsigned NumMemOps = 0; - while (Size != 0) { - unsigned VTSize = VT.getSizeInBits() / 8; - while (VTSize > Size) { - // For now, only use non-vector load / store's for the left-over pieces. - EVT NewVT = VT; - unsigned NewVTSize; - - bool Found = false; - if (VT.isVector() || VT.isFloatingPoint()) { - NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32; - if (TLI.isOperationLegalOrCustom(ISD::STORE, NewVT) && - TLI.isSafeMemOpType(NewVT.getSimpleVT())) - Found = true; - else if (NewVT == MVT::i64 && - TLI.isOperationLegalOrCustom(ISD::STORE, MVT::f64) && - TLI.isSafeMemOpType(MVT::f64)) { - // i64 is usually not legal on 32-bit targets, but f64 may be. - NewVT = MVT::f64; - Found = true; - } - } - - if (!Found) { - do { - NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1); - if (NewVT == MVT::i8) - break; - } while (!TLI.isSafeMemOpType(NewVT.getSimpleVT())); - } - NewVTSize = NewVT.getSizeInBits() / 8; - - // If the new VT cannot cover all of the remaining bits, then consider - // issuing a (or a pair of) unaligned and overlapping load / store. - bool Fast; - if (NumMemOps && AllowOverlap && NewVTSize < Size && - TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) && - Fast) - VTSize = Size; - else { - VT = NewVT; - VTSize = NewVTSize; - } - } - - if (++NumMemOps > Limit) - return false; - - MemOps.push_back(VT); - Size -= VTSize; - } - - return true; -} - static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { // On Darwin, -Os means optimize for size without hurting performance, so // only really optimize for size when -Oz (MinSize) is used. if (MF.getTarget().getTargetTriple().isOSDarwin()) - return MF.getFunction().optForMinSize(); - return MF.getFunction().optForSize(); + return MF.getFunction().hasMinSize(); + return MF.getFunction().hasOptSize(); } static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl, @@ -5665,6 +5775,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { // Turn a memcpy of undef to nop. + // FIXME: We need to honor volatile even is Src is undef. if (Src.isUndef()) return Chain; @@ -5691,13 +5802,12 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr; unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize); - if (!FindOptimalMemOpLowering(MemOps, Limit, Size, - (DstAlignCanChange ? 0 : Align), - (isZeroConstant ? 0 : SrcAlign), - false, false, CopyFromConstant, true, - DstPtrInfo.getAddrSpace(), - SrcPtrInfo.getAddrSpace(), - DAG, TLI)) + if (!TLI.findOptimalMemOpLowering( + MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), + (isZeroConstant ? 0 : SrcAlign), /*IsMemset=*/false, + /*ZeroMemset=*/false, /*MemcpyStrSrc=*/CopyFromConstant, + /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(), + SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes())) return SDValue(); if (DstAlignCanChange) { @@ -5851,6 +5961,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { // Turn a memmove of undef to nop. + // FIXME: We need to honor volatile even is Src is undef. if (Src.isUndef()) return Chain; @@ -5871,13 +5982,15 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, if (Align > SrcAlign) SrcAlign = Align; unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize); - - if (!FindOptimalMemOpLowering(MemOps, Limit, Size, - (DstAlignCanChange ? 0 : Align), SrcAlign, - false, false, false, false, - DstPtrInfo.getAddrSpace(), - SrcPtrInfo.getAddrSpace(), - DAG, TLI)) + // FIXME: `AllowOverlap` should really be `!isVol` but there is a bug in + // findOptimalMemOpLowering. Meanwhile, setting it to `false` produces the + // correct code. + bool AllowOverlap = false; + if (!TLI.findOptimalMemOpLowering( + MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), SrcAlign, + /*IsMemset=*/false, /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false, + AllowOverlap, DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), + MF.getFunction().getAttributes())) return SDValue(); if (DstAlignCanChange) { @@ -5956,6 +6069,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, uint64_t Size, unsigned Align, bool isVol, MachinePointerInfo DstPtrInfo) { // Turn a memset of undef to nop. + // FIXME: We need to honor volatile even is Src is undef. if (Src.isUndef()) return Chain; @@ -5972,11 +6086,12 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, DstAlignCanChange = true; bool IsZeroVal = isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue(); - if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize), - Size, (DstAlignCanChange ? 0 : Align), 0, - true, IsZeroVal, false, true, - DstPtrInfo.getAddrSpace(), ~0u, - DAG, TLI)) + if (!TLI.findOptimalMemOpLowering( + MemOps, TLI.getMaxStoresPerMemset(OptSize), Size, + (DstAlignCanChange ? 0 : Align), 0, /*IsMemset=*/true, + /*ZeroMemset=*/IsZeroVal, /*MemcpyStrSrc=*/false, + /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(), ~0u, + MF.getFunction().getAttributes())) return SDValue(); if (DstAlignCanChange) { @@ -6097,9 +6212,11 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, // Emit a library call. TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Ty = getDataLayout().getIntPtrType(*getContext()); + Entry.Ty = Type::getInt8PtrTy(*getContext()); Entry.Node = Dst; Args.push_back(Entry); Entry.Node = Src; Args.push_back(Entry); + + Entry.Ty = getDataLayout().getIntPtrType(*getContext()); Entry.Node = Size; Args.push_back(Entry); // FIXME: pass in SDLoc TargetLowering::CallLoweringInfo CLI(*this); @@ -6199,9 +6316,11 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, // Emit a library call. TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Ty = getDataLayout().getIntPtrType(*getContext()); + Entry.Ty = Type::getInt8PtrTy(*getContext()); Entry.Node = Dst; Args.push_back(Entry); Entry.Node = Src; Args.push_back(Entry); + + Entry.Ty = getDataLayout().getIntPtrType(*getContext()); Entry.Node = Size; Args.push_back(Entry); // FIXME: pass in SDLoc TargetLowering::CallLoweringInfo CLI(*this); @@ -6294,16 +6413,15 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); // Emit a library call. - Type *IntPtrTy = getDataLayout().getIntPtrType(*getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Node = Dst; Entry.Ty = IntPtrTy; + Entry.Node = Dst; Entry.Ty = Type::getInt8PtrTy(*getContext()); Args.push_back(Entry); Entry.Node = Src; Entry.Ty = Src.getValueType().getTypeForEVT(*getContext()); Args.push_back(Entry); Entry.Node = Size; - Entry.Ty = IntPtrTy; + Entry.Ty = getDataLayout().getIntPtrType(*getContext()); Args.push_back(Entry); // FIXME: pass in SDLoc @@ -6384,32 +6502,6 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, return SDValue(N, 0); } -SDValue SelectionDAG::getAtomicCmpSwap( - unsigned Opcode, const SDLoc &dl, EVT MemVT, SDVTList VTs, SDValue Chain, - SDValue Ptr, SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo, - unsigned Alignment, AtomicOrdering SuccessOrdering, - AtomicOrdering FailureOrdering, SyncScope::ID SSID) { - assert(Opcode == ISD::ATOMIC_CMP_SWAP || - Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS); - assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types"); - - if (Alignment == 0) // Ensure that codegen never sees alignment 0 - Alignment = getEVTAlignment(MemVT); - - MachineFunction &MF = getMachineFunction(); - - // FIXME: Volatile isn't really correct; we should keep track of atomic - // orderings in the memoperand. - auto Flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad | - MachineMemOperand::MOStore; - MachineMemOperand *MMO = - MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment, - AAMDNodes(), nullptr, SSID, SuccessOrdering, - FailureOrdering); - - return getAtomicCmpSwap(Opcode, dl, MemVT, VTs, Chain, Ptr, Cmp, Swp, MMO); -} - SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDVTList VTs, SDValue Chain, SDValue Ptr, SDValue Cmp, SDValue Swp, @@ -6424,35 +6516,6 @@ SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, const SDLoc &dl, SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, - const Value *PtrVal, unsigned Alignment, - AtomicOrdering Ordering, - SyncScope::ID SSID) { - if (Alignment == 0) // Ensure that codegen never sees alignment 0 - Alignment = getEVTAlignment(MemVT); - - MachineFunction &MF = getMachineFunction(); - // An atomic store does not load. An atomic load does not store. - // (An atomicrmw obviously both loads and stores.) - // For now, atomics are considered to be volatile always, and they are - // chained as such. - // FIXME: Volatile isn't really correct; we should keep track of atomic - // orderings in the memoperand. - auto Flags = MachineMemOperand::MOVolatile; - if (Opcode != ISD::ATOMIC_STORE) - Flags |= MachineMemOperand::MOLoad; - if (Opcode != ISD::ATOMIC_LOAD) - Flags |= MachineMemOperand::MOStore; - - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags, - MemVT.getStoreSize(), Alignment, AAMDNodes(), - nullptr, SSID, Ordering); - - return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO); -} - -SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, - SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO) { assert((Opcode == ISD::ATOMIC_LOAD_ADD || Opcode == ISD::ATOMIC_LOAD_SUB || @@ -6465,6 +6528,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, Opcode == ISD::ATOMIC_LOAD_MAX || Opcode == ISD::ATOMIC_LOAD_UMIN || Opcode == ISD::ATOMIC_LOAD_UMAX || + Opcode == ISD::ATOMIC_LOAD_FADD || + Opcode == ISD::ATOMIC_LOAD_FSUB || Opcode == ISD::ATOMIC_SWAP || Opcode == ISD::ATOMIC_STORE) && "Invalid Atomic Op"); @@ -6502,7 +6567,7 @@ SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl) { SDValue SelectionDAG::getMemIntrinsicNode( unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, - MachineMemOperand::Flags Flags, unsigned Size) { + MachineMemOperand::Flags Flags, unsigned Size, const AAMDNodes &AAInfo) { if (Align == 0) // Ensure that codegen never sees alignment 0 Align = getEVTAlignment(MemVT); @@ -6511,7 +6576,7 @@ SDValue SelectionDAG::getMemIntrinsicNode( MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = - MF.getMachineMemOperand(PtrInfo, Flags, Size, Align); + MF.getMachineMemOperand(PtrInfo, Flags, Size, Align, AAInfo); return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, MMO); } @@ -6557,6 +6622,36 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, return SDValue(N, 0); } +SDValue SelectionDAG::getLifetimeNode(bool IsStart, const SDLoc &dl, + SDValue Chain, int FrameIndex, + int64_t Size, int64_t Offset) { + const unsigned Opcode = IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END; + const auto VTs = getVTList(MVT::Other); + SDValue Ops[2] = { + Chain, + getFrameIndex(FrameIndex, + getTargetLoweringInfo().getFrameIndexTy(getDataLayout()), + true)}; + + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTs, Ops); + ID.AddInteger(FrameIndex); + ID.AddInteger(Size); + ID.AddInteger(Offset); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) + return SDValue(E, 0); + + LifetimeSDNode *N = newSDNode<LifetimeSDNode>( + Opcode, dl.getIROrder(), dl.getDebugLoc(), VTs, Size, Offset); + createOperands(N, Ops); + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + /// InferPointerInfo - If the specified ptr/offset is a frame index, infer a /// MachinePointerInfo record from it. This is particularly useful because the /// code generator has many cases where it doesn't bother passing in a @@ -6875,7 +6970,7 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ops[] = { Chain, Ptr, Mask, PassThru }; FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops); - ID.AddInteger(VT.getRawBits()); + ID.AddInteger(MemVT.getRawBits()); ID.AddInteger(getSyntheticNodeSubclassData<MaskedLoadSDNode>( dl.getIROrder(), VTs, ExtTy, isExpanding, MemVT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); @@ -6901,12 +6996,11 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl, bool IsTruncating, bool IsCompressing) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); - EVT VT = Val.getValueType(); SDVTList VTs = getVTList(MVT::Other); SDValue Ops[] = { Chain, Val, Ptr, Mask }; FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops); - ID.AddInteger(VT.getRawBits()); + ID.AddInteger(MemVT.getRawBits()); ID.AddInteger(getSyntheticNodeSubclassData<MaskedStoreSDNode>( dl.getIROrder(), VTs, IsTruncating, IsCompressing, MemVT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); @@ -7057,6 +7151,31 @@ SDValue SelectionDAG::simplifyShift(SDValue X, SDValue Y) { return SDValue(); } +// TODO: Use fast-math-flags to enable more simplifications. +SDValue SelectionDAG::simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y) { + ConstantFPSDNode *YC = isConstOrConstSplatFP(Y, /* AllowUndefs */ true); + if (!YC) + return SDValue(); + + // X + -0.0 --> X + if (Opcode == ISD::FADD) + if (YC->getValueAPF().isNegZero()) + return X; + + // X - +0.0 --> X + if (Opcode == ISD::FSUB) + if (YC->getValueAPF().isPosZero()) + return X; + + // X * 1.0 --> X + // X / 1.0 --> X + if (Opcode == ISD::FMUL || Opcode == ISD::FDIV) + if (YC->getValueAPF().isExactlyValue(1.0)) + return X; + + return SDValue(); +} + SDValue SelectionDAG::getVAArg(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue SV, unsigned Align) { SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, dl, MVT::i32) }; @@ -7098,8 +7217,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, return V; break; case ISD::CONCAT_VECTORS: - // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF. - if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this)) + if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, *this)) return V; break; case ISD::SELECT_CC: @@ -7629,56 +7747,50 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) { unsigned OrigOpc = Node->getOpcode(); unsigned NewOpc; - bool IsUnary = false; - bool IsTernary = false; switch (OrigOpc) { default: llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!"); - case ISD::STRICT_FADD: NewOpc = ISD::FADD; break; - case ISD::STRICT_FSUB: NewOpc = ISD::FSUB; break; - case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; break; - case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; break; - case ISD::STRICT_FREM: NewOpc = ISD::FREM; break; - case ISD::STRICT_FMA: NewOpc = ISD::FMA; IsTernary = true; break; - case ISD::STRICT_FSQRT: NewOpc = ISD::FSQRT; IsUnary = true; break; - case ISD::STRICT_FPOW: NewOpc = ISD::FPOW; break; - case ISD::STRICT_FPOWI: NewOpc = ISD::FPOWI; break; - case ISD::STRICT_FSIN: NewOpc = ISD::FSIN; IsUnary = true; break; - case ISD::STRICT_FCOS: NewOpc = ISD::FCOS; IsUnary = true; break; - case ISD::STRICT_FEXP: NewOpc = ISD::FEXP; IsUnary = true; break; - case ISD::STRICT_FEXP2: NewOpc = ISD::FEXP2; IsUnary = true; break; - case ISD::STRICT_FLOG: NewOpc = ISD::FLOG; IsUnary = true; break; - case ISD::STRICT_FLOG10: NewOpc = ISD::FLOG10; IsUnary = true; break; - case ISD::STRICT_FLOG2: NewOpc = ISD::FLOG2; IsUnary = true; break; - case ISD::STRICT_FRINT: NewOpc = ISD::FRINT; IsUnary = true; break; - case ISD::STRICT_FNEARBYINT: - NewOpc = ISD::FNEARBYINT; - IsUnary = true; - break; - case ISD::STRICT_FMAXNUM: NewOpc = ISD::FMAXNUM; break; - case ISD::STRICT_FMINNUM: NewOpc = ISD::FMINNUM; break; - case ISD::STRICT_FCEIL: NewOpc = ISD::FCEIL; IsUnary = true; break; - case ISD::STRICT_FFLOOR: NewOpc = ISD::FFLOOR; IsUnary = true; break; - case ISD::STRICT_FROUND: NewOpc = ISD::FROUND; IsUnary = true; break; - case ISD::STRICT_FTRUNC: NewOpc = ISD::FTRUNC; IsUnary = true; break; - } + case ISD::STRICT_FADD: NewOpc = ISD::FADD; break; + case ISD::STRICT_FSUB: NewOpc = ISD::FSUB; break; + case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; break; + case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; break; + case ISD::STRICT_FREM: NewOpc = ISD::FREM; break; + case ISD::STRICT_FMA: NewOpc = ISD::FMA; break; + case ISD::STRICT_FSQRT: NewOpc = ISD::FSQRT; break; + case ISD::STRICT_FPOW: NewOpc = ISD::FPOW; break; + case ISD::STRICT_FPOWI: NewOpc = ISD::FPOWI; break; + case ISD::STRICT_FSIN: NewOpc = ISD::FSIN; break; + case ISD::STRICT_FCOS: NewOpc = ISD::FCOS; break; + case ISD::STRICT_FEXP: NewOpc = ISD::FEXP; break; + case ISD::STRICT_FEXP2: NewOpc = ISD::FEXP2; break; + case ISD::STRICT_FLOG: NewOpc = ISD::FLOG; break; + case ISD::STRICT_FLOG10: NewOpc = ISD::FLOG10; break; + case ISD::STRICT_FLOG2: NewOpc = ISD::FLOG2; break; + case ISD::STRICT_FRINT: NewOpc = ISD::FRINT; break; + case ISD::STRICT_FNEARBYINT: NewOpc = ISD::FNEARBYINT; break; + case ISD::STRICT_FMAXNUM: NewOpc = ISD::FMAXNUM; break; + case ISD::STRICT_FMINNUM: NewOpc = ISD::FMINNUM; break; + case ISD::STRICT_FCEIL: NewOpc = ISD::FCEIL; break; + case ISD::STRICT_FFLOOR: NewOpc = ISD::FFLOOR; break; + case ISD::STRICT_FROUND: NewOpc = ISD::FROUND; break; + case ISD::STRICT_FTRUNC: NewOpc = ISD::FTRUNC; break; + case ISD::STRICT_FP_ROUND: NewOpc = ISD::FP_ROUND; break; + case ISD::STRICT_FP_EXTEND: NewOpc = ISD::FP_EXTEND; break; + } + + assert(Node->getNumValues() == 2 && "Unexpected number of results!"); // We're taking this node out of the chain, so we need to re-link things. SDValue InputChain = Node->getOperand(0); SDValue OutputChain = SDValue(Node, 1); ReplaceAllUsesOfValueWith(OutputChain, InputChain); - SDVTList VTs = getVTList(Node->getOperand(1).getValueType()); - SDNode *Res = nullptr; - if (IsUnary) - Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1) }); - else if (IsTernary) - Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1), - Node->getOperand(2), - Node->getOperand(3)}); - else - Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1), - Node->getOperand(2) }); + SmallVector<SDValue, 3> Ops; + for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) + Ops.push_back(Node->getOperand(i)); + + SDVTList VTs = getVTList(Node->getValueType(0)); + SDNode *Res = MorphNodeTo(Node, NewOpc, VTs, Ops); // MorphNodeTo can operate in two ways: if an existing node with the // specified operands exists, it can just return it. Otherwise, it @@ -7980,9 +8092,8 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) { // DIExpression, we need to mark the expression with a // DW_OP_stack_value. auto *DIExpr = DV->getExpression(); - DIExpr = DIExpression::prepend(DIExpr, DIExpression::NoDeref, Offset, - DIExpression::NoDeref, - DIExpression::WithStackValue); + DIExpr = + DIExpression::prepend(DIExpr, DIExpression::StackValue, Offset); SDDbgValue *Clone = getDbgValue(DV->getVariable(), DIExpr, N0.getNode(), N0.getResNo(), DV->isIndirect(), DV->getDebugLoc(), DV->getOrder()); @@ -8288,19 +8399,17 @@ void SelectionDAG::updateDivergence(SDNode * N) } } - -void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode*>& Order) { +void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode *> &Order) { DenseMap<SDNode *, unsigned> Degree; Order.reserve(AllNodes.size()); - for (auto & N : allnodes()) { + for (auto &N : allnodes()) { unsigned NOps = N.getNumOperands(); Degree[&N] = NOps; if (0 == NOps) Order.push_back(&N); } - for (std::vector<SDNode *>::iterator I = Order.begin(); - I!=Order.end();++I) { - SDNode * N = *I; + for (size_t I = 0; I != Order.size(); ++I) { + SDNode *N = Order[I]; for (auto U : N->uses()) { unsigned &UnsortedOps = Degree[U]; if (0 == --UnsortedOps) @@ -8310,9 +8419,8 @@ void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode*>& Order) { } #ifndef NDEBUG -void SelectionDAG::VerifyDAGDiverence() -{ - std::vector<SDNode*> TopoOrder; +void SelectionDAG::VerifyDAGDiverence() { + std::vector<SDNode *> TopoOrder; CreateTopologicalOrder(TopoOrder); const TargetLowering &TLI = getTargetLoweringInfo(); DenseMap<const SDNode *, bool> DivergenceMap; @@ -8338,7 +8446,6 @@ void SelectionDAG::VerifyDAGDiverence() } #endif - /// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving /// uses of other values produced by From.getNode() alone. The same value /// may appear in both the From and To list. The Deleted vector is @@ -8584,14 +8691,24 @@ SDValue llvm::peekThroughOneUseBitcasts(SDValue V) { return V; } -bool llvm::isBitwiseNot(SDValue V) { +SDValue llvm::peekThroughExtractSubvectors(SDValue V) { + while (V.getOpcode() == ISD::EXTRACT_SUBVECTOR) + V = V.getOperand(0); + return V; +} + +bool llvm::isBitwiseNot(SDValue V, bool AllowUndefs) { if (V.getOpcode() != ISD::XOR) return false; - ConstantSDNode *C = isConstOrConstSplat(peekThroughBitcasts(V.getOperand(1))); - return C && C->isAllOnesValue(); + V = peekThroughBitcasts(V.getOperand(1)); + unsigned NumBits = V.getScalarValueSizeInBits(); + ConstantSDNode *C = + isConstOrConstSplat(V, AllowUndefs, /*AllowTruncation*/ true); + return C && (C->getAPIntValue().countTrailingOnes() >= NumBits); } -ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs) { +ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs, + bool AllowTruncation) { if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) return CN; @@ -8599,10 +8716,39 @@ ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs) { BitVector UndefElements; ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements); - // BuildVectors can truncate their operands. Ignore that case here. - if (CN && (UndefElements.none() || AllowUndefs) && - CN->getValueType(0) == N.getValueType().getScalarType()) - return CN; + // BuildVectors can truncate their operands. Ignore that case here unless + // AllowTruncation is set. + if (CN && (UndefElements.none() || AllowUndefs)) { + EVT CVT = CN->getValueType(0); + EVT NSVT = N.getValueType().getScalarType(); + assert(CVT.bitsGE(NSVT) && "Illegal build vector element extension"); + if (AllowTruncation || (CVT == NSVT)) + return CN; + } + } + + return nullptr; +} + +ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, const APInt &DemandedElts, + bool AllowUndefs, + bool AllowTruncation) { + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) + return CN; + + if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { + BitVector UndefElements; + ConstantSDNode *CN = BV->getConstantSplatNode(DemandedElts, &UndefElements); + + // BuildVectors can truncate their operands. Ignore that case here unless + // AllowTruncation is set. + if (CN && (UndefElements.none() || AllowUndefs)) { + EVT CVT = CN->getValueType(0); + EVT NSVT = N.getValueType().getScalarType(); + assert(CVT.bitsGE(NSVT) && "Illegal build vector element extension"); + if (AllowTruncation || (CVT == NSVT)) + return CN; + } } return nullptr; @@ -8622,9 +8768,26 @@ ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, bool AllowUndefs) { return nullptr; } -bool llvm::isNullOrNullSplat(SDValue N) { +ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, + const APInt &DemandedElts, + bool AllowUndefs) { + if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) + return CN; + + if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { + BitVector UndefElements; + ConstantFPSDNode *CN = + BV->getConstantFPSplatNode(DemandedElts, &UndefElements); + if (CN && (UndefElements.none() || AllowUndefs)) + return CN; + } + + return nullptr; +} + +bool llvm::isNullOrNullSplat(SDValue N, bool AllowUndefs) { // TODO: may want to use peekThroughBitcast() here. - ConstantSDNode *C = isConstOrConstSplat(N); + ConstantSDNode *C = isConstOrConstSplat(N, AllowUndefs); return C && C->isNullValue(); } @@ -8773,17 +8936,12 @@ bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N) { /// isOperand - Return true if this node is an operand of N. bool SDValue::isOperandOf(const SDNode *N) const { - for (const SDValue &Op : N->op_values()) - if (*this == Op) - return true; - return false; + return any_of(N->op_values(), [this](SDValue Op) { return *this == Op; }); } bool SDNode::isOperandOf(const SDNode *N) const { - for (const SDValue &Op : N->op_values()) - if (this == Op.getNode()) - return true; - return false; + return any_of(N->op_values(), + [this](SDValue Op) { return this == Op.getNode(); }); } /// reachesChainWithoutSideEffects - Return true if this operand (which must @@ -8973,6 +9131,56 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { return getBuildVector(VecVT, dl, Scalars); } +std::pair<SDValue, SDValue> SelectionDAG::UnrollVectorOverflowOp( + SDNode *N, unsigned ResNE) { + unsigned Opcode = N->getOpcode(); + assert((Opcode == ISD::UADDO || Opcode == ISD::SADDO || + Opcode == ISD::USUBO || Opcode == ISD::SSUBO || + Opcode == ISD::UMULO || Opcode == ISD::SMULO) && + "Expected an overflow opcode"); + + EVT ResVT = N->getValueType(0); + EVT OvVT = N->getValueType(1); + EVT ResEltVT = ResVT.getVectorElementType(); + EVT OvEltVT = OvVT.getVectorElementType(); + SDLoc dl(N); + + // If ResNE is 0, fully unroll the vector op. + unsigned NE = ResVT.getVectorNumElements(); + if (ResNE == 0) + ResNE = NE; + else if (NE > ResNE) + NE = ResNE; + + SmallVector<SDValue, 8> LHSScalars; + SmallVector<SDValue, 8> RHSScalars; + ExtractVectorElements(N->getOperand(0), LHSScalars, 0, NE); + ExtractVectorElements(N->getOperand(1), RHSScalars, 0, NE); + + EVT SVT = TLI->getSetCCResultType(getDataLayout(), *getContext(), ResEltVT); + SDVTList VTs = getVTList(ResEltVT, SVT); + SmallVector<SDValue, 8> ResScalars; + SmallVector<SDValue, 8> OvScalars; + for (unsigned i = 0; i < NE; ++i) { + SDValue Res = getNode(Opcode, dl, VTs, LHSScalars[i], RHSScalars[i]); + SDValue Ov = + getSelect(dl, OvEltVT, Res.getValue(1), + getBoolConstant(true, dl, OvEltVT, ResVT), + getConstant(0, dl, OvEltVT)); + + ResScalars.push_back(Res); + OvScalars.push_back(Ov); + } + + ResScalars.append(ResNE - NE, getUNDEF(ResEltVT)); + OvScalars.append(ResNE - NE, getUNDEF(OvEltVT)); + + EVT NewResVT = EVT::getVectorVT(*getContext(), ResEltVT, ResNE); + EVT NewOvVT = EVT::getVectorVT(*getContext(), OvEltVT, ResNE); + return std::make_pair(getBuildVector(NewResVT, dl, ResScalars), + getBuildVector(NewOvVT, dl, OvScalars)); +} + bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, @@ -9014,7 +9222,7 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { // If this is a direct reference to a stack slot, use information about the // stack slot's alignment. - int FrameIdx = 1 << 31; + int FrameIdx = INT_MIN; int64_t FrameOffset = 0; if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) { FrameIdx = FI->getIndex(); @@ -9025,7 +9233,7 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { FrameOffset = Ptr.getConstantOperandVal(1); } - if (FrameIdx != (1 << 31)) { + if (FrameIdx != INT_MIN) { const MachineFrameInfo &MFI = getMachineFunction().getFrameInfo(); unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx), FrameOffset); @@ -9065,6 +9273,15 @@ SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, return std::make_pair(Lo, Hi); } +/// Widen the vector up to the next power of two using INSERT_SUBVECTOR. +SDValue SelectionDAG::WidenVector(const SDValue &N, const SDLoc &DL) { + EVT VT = N.getValueType(); + EVT WideVT = EVT::getVectorVT(*getContext(), VT.getVectorElementType(), + NextPowerOf2(VT.getVectorNumElements())); + return getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, getUNDEF(WideVT), N, + getConstant(0, DL, TLI->getVectorIdxTy(getDataLayout()))); +} + void SelectionDAG::ExtractVectorElements(SDValue Op, SmallVectorImpl<SDValue> &Args, unsigned Start, unsigned Count) { @@ -9158,13 +9375,20 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef, return true; } -SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const { +SDValue BuildVectorSDNode::getSplatValue(const APInt &DemandedElts, + BitVector *UndefElements) const { if (UndefElements) { UndefElements->clear(); UndefElements->resize(getNumOperands()); } + assert(getNumOperands() == DemandedElts.getBitWidth() && + "Unexpected vector size"); + if (!DemandedElts) + return SDValue(); SDValue Splatted; for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + if (!DemandedElts[i]) + continue; SDValue Op = getOperand(i); if (Op.isUndef()) { if (UndefElements) @@ -9177,20 +9401,40 @@ SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const { } if (!Splatted) { - assert(getOperand(0).isUndef() && + unsigned FirstDemandedIdx = DemandedElts.countTrailingZeros(); + assert(getOperand(FirstDemandedIdx).isUndef() && "Can only have a splat without a constant for all undefs."); - return getOperand(0); + return getOperand(FirstDemandedIdx); } return Splatted; } +SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const { + APInt DemandedElts = APInt::getAllOnesValue(getNumOperands()); + return getSplatValue(DemandedElts, UndefElements); +} + +ConstantSDNode * +BuildVectorSDNode::getConstantSplatNode(const APInt &DemandedElts, + BitVector *UndefElements) const { + return dyn_cast_or_null<ConstantSDNode>( + getSplatValue(DemandedElts, UndefElements)); +} + ConstantSDNode * BuildVectorSDNode::getConstantSplatNode(BitVector *UndefElements) const { return dyn_cast_or_null<ConstantSDNode>(getSplatValue(UndefElements)); } ConstantFPSDNode * +BuildVectorSDNode::getConstantFPSplatNode(const APInt &DemandedElts, + BitVector *UndefElements) const { + return dyn_cast_or_null<ConstantFPSDNode>( + getSplatValue(DemandedElts, UndefElements)); +} + +ConstantFPSDNode * BuildVectorSDNode::getConstantFPSplatNode(BitVector *UndefElements) const { return dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements)); } @@ -9228,7 +9472,10 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) { for (i = 0, e = VT.getVectorNumElements(); i != e && Mask[i] < 0; ++i) /* search */; - assert(i != e && "VECTOR_SHUFFLE node with all undef indices!"); + // If all elements are undefined, this shuffle can be considered a splat + // (although it should eventually get simplified away completely). + if (i == e) + return true; // Make sure all remaining elements are either undef or the same as the first // non-undef value. @@ -9266,8 +9513,7 @@ SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) { void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) { assert(!Node->OperandList && "Node already has operands"); - assert(std::numeric_limits<decltype(SDNode::NumOperands)>::max() >= - Vals.size() && + assert(SDNode::getMaxNumOperands() >= Vals.size() && "too many operands to fit into SDNode"); SDUse *Ops = OperandRecycler.allocate( ArrayRecycler<SDUse>::Capacity::get(Vals.size()), OperandAllocator); @@ -9287,6 +9533,19 @@ void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) { checkForCycles(Node); } +SDValue SelectionDAG::getTokenFactor(const SDLoc &DL, + SmallVectorImpl<SDValue> &Vals) { + size_t Limit = SDNode::getMaxNumOperands(); + while (Vals.size() > Limit) { + unsigned SliceIdx = Vals.size() - Limit; + auto ExtractedTFs = ArrayRef<SDValue>(Vals).slice(SliceIdx, Limit); + SDValue NewTF = getNode(ISD::TokenFactor, DL, MVT::Other, ExtractedTFs); + Vals.erase(Vals.begin() + SliceIdx, Vals.end()); + Vals.emplace_back(NewTF); + } + return getNode(ISD::TokenFactor, DL, MVT::Other, Vals); +} + #ifndef NDEBUG static void checkForCyclesHelper(const SDNode *N, SmallPtrSetImpl<const SDNode*> &Visited, diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index 488bac1a9a80..9592bc30a4e1 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -1,9 +1,8 @@ //==- llvm/CodeGen/SelectionDAGAddressAnalysis.cpp - DAG Address Analysis --==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -25,8 +24,10 @@ bool BaseIndexOffset::equalBaseIndex(const BaseIndexOffset &Other, // Conservatively fail if we a match failed.. if (!Base.getNode() || !Other.Base.getNode()) return false; + if (!hasValidOffset() || !Other.hasValidOffset()) + return false; // Initial Offset difference. - Off = Other.Offset - Offset; + Off = *Other.Offset - *Offset; if ((Other.Index == Index) && (Other.IsIndexSignExt == IsIndexSignExt)) { // Trivial match. @@ -60,24 +61,110 @@ bool BaseIndexOffset::equalBaseIndex(const BaseIndexOffset &Other, const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); - // Match non-equal FrameIndexes - If both frame indices are fixed - // we know their relative offsets and can compare them. Otherwise - // we must be conservative. + // Match FrameIndexes. if (auto *A = dyn_cast<FrameIndexSDNode>(Base)) - if (auto *B = dyn_cast<FrameIndexSDNode>(Other.Base)) + if (auto *B = dyn_cast<FrameIndexSDNode>(Other.Base)) { + // Equal FrameIndexes - offsets are directly comparable. + if (A->getIndex() == B->getIndex()) + return true; + // Non-equal FrameIndexes - If both frame indices are fixed + // we know their relative offsets and can compare them. Otherwise + // we must be conservative. if (MFI.isFixedObjectIndex(A->getIndex()) && MFI.isFixedObjectIndex(B->getIndex())) { Off += MFI.getObjectOffset(B->getIndex()) - MFI.getObjectOffset(A->getIndex()); return true; } + } } return false; } +bool BaseIndexOffset::computeAliasing(const SDNode *Op0, + const Optional<int64_t> NumBytes0, + const SDNode *Op1, + const Optional<int64_t> NumBytes1, + const SelectionDAG &DAG, bool &IsAlias) { + + BaseIndexOffset BasePtr0 = match(Op0, DAG); + BaseIndexOffset BasePtr1 = match(Op1, DAG); + + if (!(BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode())) + return false; + int64_t PtrDiff; + if (NumBytes0.hasValue() && NumBytes1.hasValue() && + BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) { + // BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the + // following situations arise: + IsAlias = !( + // [----BasePtr0----] + // [---BasePtr1--] + // ========PtrDiff========> + (*NumBytes0 <= PtrDiff) || + // [----BasePtr0----] + // [---BasePtr1--] + // =====(-PtrDiff)====> + (PtrDiff + *NumBytes1 <= 0)); // i.e. *NumBytes1 < -PtrDiff. + return true; + } + // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be + // able to calculate their relative offset if at least one arises + // from an alloca. However, these allocas cannot overlap and we + // can infer there is no alias. + if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase())) + if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) { + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + // If the base are the same frame index but the we couldn't find a + // constant offset, (indices are different) be conservative. + if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) || + !MFI.isFixedObjectIndex(B->getIndex()))) { + IsAlias = false; + return true; + } + } + + bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase()); + bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase()); + bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase()); + bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase()); + bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase()); + bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase()); + + // If of mismatched base types or checkable indices we can check + // they do not alias. + if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) || + (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) && + (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1)) { + IsAlias = false; + return true; + } + return false; // Cannot determine whether the pointers alias. +} + +bool BaseIndexOffset::contains(const SelectionDAG &DAG, int64_t BitSize, + const BaseIndexOffset &Other, + int64_t OtherBitSize, int64_t &BitOffset) const { + int64_t Offset; + if (!equalBaseIndex(Other, DAG, Offset)) + return false; + if (Offset >= 0) { + // Other is after *this: + // [-------*this---------] + // [---Other--] + // ==Offset==> + BitOffset = 8 * Offset; + return BitOffset + OtherBitSize <= BitSize; + } + // Other starts strictly before *this, it cannot be fully contained. + // [-------*this---------] + // [--Other--] + return false; +} + /// Parses tree in Ptr for base, index, offset addresses. -BaseIndexOffset BaseIndexOffset::match(const LSBaseSDNode *N, - const SelectionDAG &DAG) { +static BaseIndexOffset matchLSNode(const LSBaseSDNode *N, + const SelectionDAG &DAG) { SDValue Ptr = N->getBasePtr(); // (((B + I*M) + c)) + c ... @@ -178,3 +265,33 @@ BaseIndexOffset BaseIndexOffset::match(const LSBaseSDNode *N, } return BaseIndexOffset(Base, Index, Offset, IsIndexSignExt); } + +BaseIndexOffset BaseIndexOffset::match(const SDNode *N, + const SelectionDAG &DAG) { + if (const auto *LS0 = dyn_cast<LSBaseSDNode>(N)) + return matchLSNode(LS0, DAG); + if (const auto *LN = dyn_cast<LifetimeSDNode>(N)) { + if (LN->hasOffset()) + return BaseIndexOffset(LN->getOperand(1), SDValue(), LN->getOffset(), + false); + return BaseIndexOffset(LN->getOperand(1), SDValue(), false); + } + return BaseIndexOffset(); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + +LLVM_DUMP_METHOD void BaseIndexOffset::dump() const { + print(dbgs()); +} + +void BaseIndexOffset::print(raw_ostream& OS) const { + OS << "BaseIndexOffset base=["; + Base->print(OS); + OS << "] index=["; + if (Index) + Index->print(OS); + OS << "] offset=" << Offset; +} + +#endif diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 871ab9b29881..e818dd27c05e 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1,9 +1,8 @@ //===- SelectionDAGBuilder.cpp - Selection-DAG building -------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -55,6 +54,7 @@ #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/SwiftErrorValueTracking.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" @@ -109,6 +109,7 @@ #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Transforms/Utils/Local.h" #include <algorithm> #include <cassert> #include <cstddef> @@ -123,6 +124,7 @@ using namespace llvm; using namespace PatternMatch; +using namespace SwitchCG; #define DEBUG_TYPE "isel" @@ -215,8 +217,8 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, unsigned ValueBits = ValueVT.getSizeInBits(); // Assemble the power of 2 part. - unsigned RoundParts = NumParts & (NumParts - 1) ? - 1 << Log2_32(NumParts) : NumParts; + unsigned RoundParts = + (NumParts & (NumParts - 1)) ? 1 << Log2_32(NumParts) : NumParts; unsigned RoundBits = PartBits * RoundParts; EVT RoundVT = RoundBits == ValueBits ? ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits); @@ -322,7 +324,15 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val); } - llvm_unreachable("Unknown mismatch!"); + // Handle MMX to a narrower integer type by bitcasting MMX to integer and + // then truncating. + if (PartEVT == MVT::x86mmx && ValueVT.isInteger() && + ValueVT.bitsLT(PartEVT)) { + Val = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Val); + return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); + } + + report_fatal_error("Unknown mismatch in getCopyFromParts!"); } static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V, @@ -573,7 +583,8 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, unsigned RoundBits = RoundParts * PartBits; unsigned OddParts = NumParts - RoundParts; SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val, - DAG.getIntPtrConstant(RoundBits, DL)); + DAG.getShiftAmountConstant(RoundBits, ValueVT, DL, /*LegalTypes*/false)); + getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V, CallConv); @@ -1003,6 +1014,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa, DL = &DAG.getDataLayout(); Context = DAG.getContext(); LPadToCallSiteMap.clear(); + SL->init(DAG.getTargetLoweringInfo(), TM, DAG.getDataLayout()); } void SelectionDAGBuilder::clear() { @@ -1032,19 +1044,7 @@ SDValue SelectionDAGBuilder::getRoot() { } // Otherwise, we have to make a token factor node. - // If we have >= 2^16 loads then split across multiple token factors as - // there's a 64k limit on the number of SDNode operands. - SDValue Root; - size_t Limit = (1 << 16) - 1; - while (PendingLoads.size() > Limit) { - unsigned SliceIdx = PendingLoads.size() - Limit; - auto ExtractedTFs = ArrayRef<SDValue>(PendingLoads).slice(SliceIdx, Limit); - SDValue NewTF = - DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, ExtractedTFs); - PendingLoads.erase(PendingLoads.begin() + SliceIdx, PendingLoads.end()); - PendingLoads.emplace_back(NewTF); - } - Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, PendingLoads); + SDValue Root = DAG.getTokenFactor(getCurSDLoc(), PendingLoads); PendingLoads.clear(); DAG.setRoot(Root); return Root; @@ -1144,6 +1144,13 @@ void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable, for (auto &DDIMI : DanglingDebugInfoMap) { DanglingDebugInfoVector &DDIV = DDIMI.second; + + // If debug info is to be dropped, run it through final checks to see + // whether it can be salvaged. + for (auto &DDI : DDIV) + if (isMatchingDbgValue(DDI)) + salvageUnresolvedDbgValue(DDI); + DDIV.erase(remove_if(DDIV, isMatchingDbgValue), DDIV.end()); } } @@ -1169,6 +1176,12 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, "Expected inlined-at fields to agree"); SDDbgValue *SDV; if (Val.getNode()) { + // FIXME: I doubt that it is correct to resolve a dangling DbgValue as a + // FuncArgumentDbgValue (it would be hoisted to the function entry, and if + // we couldn't resolve it directly when examining the DbgValue intrinsic + // in the first place we should not be more successful here). Unless we + // have some test case that prove this to be correct we should avoid + // calling EmitFuncArgumentDbgValue here. if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, false, Val)) { LLVM_DEBUG(dbgs() << "Resolve dangling debug info [order=" << DbgSDNodeOrder << "] for:\n " << *DI << "\n"); @@ -1186,12 +1199,173 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, } else LLVM_DEBUG(dbgs() << "Resolved dangling debug info for " << *DI << "in EmitFuncArgumentDbgValue\n"); - } else + } else { LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + auto Undef = + UndefValue::get(DDI.getDI()->getVariableLocation()->getType()); + auto SDV = + DAG.getConstantDbgValue(Variable, Expr, Undef, dl, DbgSDNodeOrder); + DAG.AddDbgValue(SDV, nullptr, false); + } } DDIV.clear(); } +void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) { + Value *V = DDI.getDI()->getValue(); + DILocalVariable *Var = DDI.getDI()->getVariable(); + DIExpression *Expr = DDI.getDI()->getExpression(); + DebugLoc DL = DDI.getdl(); + DebugLoc InstDL = DDI.getDI()->getDebugLoc(); + unsigned SDOrder = DDI.getSDNodeOrder(); + + // Currently we consider only dbg.value intrinsics -- we tell the salvager + // that DW_OP_stack_value is desired. + assert(isa<DbgValueInst>(DDI.getDI())); + bool StackValue = true; + + // Can this Value can be encoded without any further work? + if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder)) + return; + + // Attempt to salvage back through as many instructions as possible. Bail if + // a non-instruction is seen, such as a constant expression or global + // variable. FIXME: Further work could recover those too. + while (isa<Instruction>(V)) { + Instruction &VAsInst = *cast<Instruction>(V); + DIExpression *NewExpr = salvageDebugInfoImpl(VAsInst, Expr, StackValue); + + // If we cannot salvage any further, and haven't yet found a suitable debug + // expression, bail out. + if (!NewExpr) + break; + + // New value and expr now represent this debuginfo. + V = VAsInst.getOperand(0); + Expr = NewExpr; + + // Some kind of simplification occurred: check whether the operand of the + // salvaged debug expression can be encoded in this DAG. + if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder)) { + LLVM_DEBUG(dbgs() << "Salvaged debug location info for:\n " + << DDI.getDI() << "\nBy stripping back to:\n " << V); + return; + } + } + + // This was the final opportunity to salvage this debug information, and it + // couldn't be done. Place an undef DBG_VALUE at this location to terminate + // any earlier variable location. + auto Undef = UndefValue::get(DDI.getDI()->getVariableLocation()->getType()); + auto SDV = DAG.getConstantDbgValue(Var, Expr, Undef, DL, SDNodeOrder); + DAG.AddDbgValue(SDV, nullptr, false); + + LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n " << DDI.getDI() + << "\n"); + LLVM_DEBUG(dbgs() << " Last seen at:\n " << *DDI.getDI()->getOperand(0) + << "\n"); +} + +bool SelectionDAGBuilder::handleDebugValue(const Value *V, DILocalVariable *Var, + DIExpression *Expr, DebugLoc dl, + DebugLoc InstDL, unsigned Order) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDDbgValue *SDV; + if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V) || + isa<ConstantPointerNull>(V)) { + SDV = DAG.getConstantDbgValue(Var, Expr, V, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, nullptr, false); + return true; + } + + // If the Value is a frame index, we can create a FrameIndex debug value + // without relying on the DAG at all. + if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) { + auto SI = FuncInfo.StaticAllocaMap.find(AI); + if (SI != FuncInfo.StaticAllocaMap.end()) { + auto SDV = + DAG.getFrameIndexDbgValue(Var, Expr, SI->second, + /*IsIndirect*/ false, dl, SDNodeOrder); + // Do not attach the SDNodeDbgValue to an SDNode: this variable location + // is still available even if the SDNode gets optimized out. + DAG.AddDbgValue(SDV, nullptr, false); + return true; + } + } + + // Do not use getValue() in here; we don't want to generate code at + // this point if it hasn't been done yet. + SDValue N = NodeMap[V]; + if (!N.getNode() && isa<Argument>(V)) // Check unused arguments map. + N = UnusedArgNodeMap[V]; + if (N.getNode()) { + if (EmitFuncArgumentDbgValue(V, Var, Expr, dl, false, N)) + return true; + SDV = getDbgValue(N, Var, Expr, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, N.getNode(), false); + return true; + } + + // Special rules apply for the first dbg.values of parameter variables in a + // function. Identify them by the fact they reference Argument Values, that + // they're parameters, and they are parameters of the current function. We + // need to let them dangle until they get an SDNode. + bool IsParamOfFunc = isa<Argument>(V) && Var->isParameter() && + !InstDL.getInlinedAt(); + if (!IsParamOfFunc) { + // The value is not used in this block yet (or it would have an SDNode). + // We still want the value to appear for the user if possible -- if it has + // an associated VReg, we can refer to that instead. + auto VMI = FuncInfo.ValueMap.find(V); + if (VMI != FuncInfo.ValueMap.end()) { + unsigned Reg = VMI->second; + // If this is a PHI node, it may be split up into several MI PHI nodes + // (in FunctionLoweringInfo::set). + RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, + V->getType(), None); + if (RFV.occupiesMultipleRegs()) { + unsigned Offset = 0; + unsigned BitsToDescribe = 0; + if (auto VarSize = Var->getSizeInBits()) + BitsToDescribe = *VarSize; + if (auto Fragment = Expr->getFragmentInfo()) + BitsToDescribe = Fragment->SizeInBits; + for (auto RegAndSize : RFV.getRegsAndSizes()) { + unsigned RegisterSize = RegAndSize.second; + // Bail out if all bits are described already. + if (Offset >= BitsToDescribe) + break; + unsigned FragmentSize = (Offset + RegisterSize > BitsToDescribe) + ? BitsToDescribe - Offset + : RegisterSize; + auto FragmentExpr = DIExpression::createFragmentExpression( + Expr, Offset, FragmentSize); + if (!FragmentExpr) + continue; + SDV = DAG.getVRegDbgValue(Var, *FragmentExpr, RegAndSize.first, + false, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, nullptr, false); + Offset += RegisterSize; + } + } else { + SDV = DAG.getVRegDbgValue(Var, Expr, Reg, false, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, nullptr, false); + } + return true; + } + } + + return false; +} + +void SelectionDAGBuilder::resolveOrClearDbgInfo() { + // Try to fixup any remaining dangling debug info -- and drop it if we can't. + for (auto &Pair : DanglingDebugInfoMap) + for (auto &DDI : Pair.second) + salvageUnresolvedDbgValue(DDI); + clearDanglingDebugInfo(); +} + /// getCopyFromRegs - If there was virtual register allocated for the value V /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise. SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) { @@ -1469,6 +1643,36 @@ void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) { } } +// For wasm, there's alwyas a single catch pad attached to a catchswitch, and +// the control flow always stops at the single catch pad, as it does for a +// cleanup pad. In case the exception caught is not of the types the catch pad +// catches, it will be rethrown by a rethrow. +static void findWasmUnwindDestinations( + FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB, + BranchProbability Prob, + SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>> + &UnwindDests) { + while (EHPadBB) { + const Instruction *Pad = EHPadBB->getFirstNonPHI(); + if (isa<CleanupPadInst>(Pad)) { + // Stop on cleanup pads. + UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob); + UnwindDests.back().first->setIsEHScopeEntry(); + break; + } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) { + // Add the catchpad handlers to the possible destinations. We don't + // continue to the unwind destination of the catchswitch for wasm. + for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) { + UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob); + UnwindDests.back().first->setIsEHScopeEntry(); + } + break; + } else { + continue; + } + } +} + /// When an invoke or a cleanupret unwinds to the next EH pad, there are /// many places it could ultimately go. In the IR, we have a single unwind /// destination, but in the machine CFG, we enumerate all the possible blocks. @@ -1489,6 +1693,13 @@ static void findUnwindDestinations( bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX; bool IsSEH = isAsynchronousEHPersonality(Personality); + if (IsWasmCXX) { + findWasmUnwindDestinations(FuncInfo, EHPadBB, Prob, UnwindDests); + assert(UnwindDests.size() <= 1 && + "There should be at most one unwind destination for wasm"); + return; + } + while (EHPadBB) { const Instruction *Pad = EHPadBB->getFirstNonPHI(); BasicBlock *NewEHPadBB = nullptr; @@ -1501,8 +1712,7 @@ static void findUnwindDestinations( // personalities. UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob); UnwindDests.back().first->setIsEHScopeEntry(); - if (!IsWasmCXX) - UnwindDests.back().first->setIsEHFuncletEntry(); + UnwindDests.back().first->setIsEHFuncletEntry(); break; } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) { // Add the catchpad handlers to the possible destinations. @@ -1588,9 +1798,10 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { DemoteReg, PtrValueVTs[0]); SDValue RetOp = getValue(I.getOperand(0)); - SmallVector<EVT, 4> ValueVTs; + SmallVector<EVT, 4> ValueVTs, MemVTs; SmallVector<uint64_t, 4> Offsets; - ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &Offsets); + ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &MemVTs, + &Offsets); unsigned NumValues = ValueVTs.size(); SmallVector<SDValue, 4> Chains(NumValues); @@ -1598,8 +1809,11 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { // An aggregate return value cannot wrap around the address space, so // offsets to its parts don't wrap either. SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr, Offsets[i]); - Chains[i] = DAG.getStore( - Chain, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i), + + SDValue Val = RetOp.getValue(i); + if (MemVTs[i] != ValueVTs[i]) + Val = DAG.getPtrExtOrTrunc(Val, getCurSDLoc(), MemVTs[i]); + Chains[i] = DAG.getStore(Chain, getCurSDLoc(), Val, // FIXME: better loc info would be nice. Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction())); } @@ -1615,6 +1829,10 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { const Function *F = I.getParent()->getParent(); + bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters( + I.getOperand(0)->getType(), F->getCallingConv(), + /*IsVarArg*/ false); + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex, Attribute::SExt)) @@ -1647,6 +1865,18 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { if (RetInReg) Flags.setInReg(); + if (I.getOperand(0)->getType()->isPointerTy()) { + Flags.setPointer(); + Flags.setPointerAddrSpace( + cast<PointerType>(I.getOperand(0)->getType())->getAddressSpace()); + } + + if (NeedsRegBlock) { + Flags.setInConsecutiveRegs(); + if (j == NumValues - 1) + Flags.setInConsecutiveRegsLast(); + } + // Propagate extension type if any if (ExtendKind == ISD::SIGN_EXTEND) Flags.setSExt(); @@ -1668,7 +1898,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { const Function *F = I.getParent()->getParent(); if (TLI.supportSwiftError() && F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) { - assert(FuncInfo.SwiftErrorArg && "Need a swift error argument"); + assert(SwiftError.getFunctionArg() && "Need a swift error argument"); ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); Flags.setSwiftError(); Outs.push_back(ISD::OutputArg(Flags, EVT(TLI.getPointerTy(DL)) /*vt*/, @@ -1677,8 +1907,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { 0 /*partOffs*/)); // Create SDNode for the swifterror virtual register. OutVals.push_back( - DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVRegUseAt( - &I, FuncInfo.MBB, FuncInfo.SwiftErrorArg).first, + DAG.getRegister(SwiftError.getOrCreateVRegUseAt( + &I, FuncInfo.MBB, SwiftError.getFunctionArg()), EVT(TLI.getPointerTy(DL)))); } @@ -1825,7 +2055,7 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr, TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb); - SwitchCases.push_back(CB); + SL->SwitchCases.push_back(CB); return; } } @@ -1834,7 +2064,7 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, ISD::CondCode Opc = InvertCond ? ISD::SETNE : ISD::SETEQ; CaseBlock CB(Opc, Cond, ConstantInt::getTrue(*DAG.getContext()), nullptr, TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb); - SwitchCases.push_back(CB); + SL->SwitchCases.push_back(CB); } void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, @@ -2043,27 +2273,27 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // If the compares in later blocks need to use values not currently // exported from this block, export them now. This block should always // be the first entry. - assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!"); + assert(SL->SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!"); // Allow some cases to be rejected. - if (ShouldEmitAsBranches(SwitchCases)) { - for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) { - ExportFromCurrentBlock(SwitchCases[i].CmpLHS); - ExportFromCurrentBlock(SwitchCases[i].CmpRHS); + if (ShouldEmitAsBranches(SL->SwitchCases)) { + for (unsigned i = 1, e = SL->SwitchCases.size(); i != e; ++i) { + ExportFromCurrentBlock(SL->SwitchCases[i].CmpLHS); + ExportFromCurrentBlock(SL->SwitchCases[i].CmpRHS); } // Emit the branch for this block. - visitSwitchCase(SwitchCases[0], BrMBB); - SwitchCases.erase(SwitchCases.begin()); + visitSwitchCase(SL->SwitchCases[0], BrMBB); + SL->SwitchCases.erase(SL->SwitchCases.begin()); return; } // Okay, we decided not to do this, remove any inserted MBB's and clear // SwitchCases. - for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) - FuncInfo.MF->erase(SwitchCases[i].ThisBB); + for (unsigned i = 1, e = SL->SwitchCases.size(); i != e; ++i) + FuncInfo.MF->erase(SL->SwitchCases[i].ThisBB); - SwitchCases.clear(); + SL->SwitchCases.clear(); } } @@ -2084,6 +2314,20 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, SDValue CondLHS = getValue(CB.CmpLHS); SDLoc dl = CB.DL; + if (CB.CC == ISD::SETTRUE) { + // Branch or fall through to TrueBB. + addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb); + SwitchBB->normalizeSuccProbs(); + if (CB.TrueBB != NextBlock(SwitchBB)) { + DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, getControlRoot(), + DAG.getBasicBlock(CB.TrueBB))); + } + return; + } + + auto &TLI = DAG.getTargetLoweringInfo(); + EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), CB.CmpLHS->getType()); + // Build the setcc now. if (!CB.CmpMHS) { // Fold "(X == true)" to X and "(X == false)" to !X to @@ -2095,8 +2339,18 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, CB.CC == ISD::SETEQ) { SDValue True = DAG.getConstant(1, dl, CondLHS.getValueType()); Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True); - } else - Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC); + } else { + SDValue CondRHS = getValue(CB.CmpRHS); + + // If a pointer's DAG type is larger than its memory type then the DAG + // values are zero-extended. This breaks signed comparisons so truncate + // back to the underlying type before doing the compare. + if (CondLHS.getValueType() != MemVT) { + CondLHS = DAG.getPtrExtOrTrunc(CondLHS, getCurSDLoc(), MemVT); + CondRHS = DAG.getPtrExtOrTrunc(CondRHS, getCurSDLoc(), MemVT); + } + Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, CondRHS, CB.CC); + } } else { assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now"); @@ -2147,7 +2401,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, } /// visitJumpTable - Emit JumpTable node in the current MBB -void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) { +void SelectionDAGBuilder::visitJumpTable(SwitchCG::JumpTable &JT) { // Emit the code for the jump table assert(JT.Reg != -1U && "Should lower JT Header first!"); EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); @@ -2162,14 +2416,12 @@ void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) { /// visitJumpTableHeader - This function emits necessary code to produce index /// in the JumpTable from switch case. -void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, +void SelectionDAGBuilder::visitJumpTableHeader(SwitchCG::JumpTable &JT, JumpTableHeader &JTH, MachineBasicBlock *SwitchBB) { SDLoc dl = getCurSDLoc(); - // Subtract the lowest switch case value from the value being switched on and - // conditional branch to default mbb if the result is greater than the - // difference between smallest and largest cases. + // Subtract the lowest switch case value from the value being switched on. SDValue SwitchOp = getValue(JTH.SValue); EVT VT = SwitchOp.getValueType(); SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp, @@ -2189,24 +2441,33 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, JumpTableReg, SwitchOp); JT.Reg = JumpTableReg; - // Emit the range check for the jump table, and branch to the default block - // for the switch statement if the value being switched on exceeds the largest - // case in the switch. - SDValue CMP = DAG.getSetCC( - dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), - Sub.getValueType()), - Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), ISD::SETUGT); - - SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, - MVT::Other, CopyTo, CMP, - DAG.getBasicBlock(JT.Default)); - - // Avoid emitting unnecessary branches to the next block. - if (JT.MBB != NextBlock(SwitchBB)) - BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, - DAG.getBasicBlock(JT.MBB)); - - DAG.setRoot(BrCond); + if (!JTH.OmitRangeCheck) { + // Emit the range check for the jump table, and branch to the default block + // for the switch statement if the value being switched on exceeds the + // largest case in the switch. + SDValue CMP = DAG.getSetCC( + dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), + Sub.getValueType()), + Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), ISD::SETUGT); + + SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, + MVT::Other, CopyTo, CMP, + DAG.getBasicBlock(JT.Default)); + + // Avoid emitting unnecessary branches to the next block. + if (JT.MBB != NextBlock(SwitchBB)) + BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, + DAG.getBasicBlock(JT.MBB)); + + DAG.setRoot(BrCond); + } else { + // Avoid emitting unnecessary branches to the next block. + if (JT.MBB != NextBlock(SwitchBB)) + DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, CopyTo, + DAG.getBasicBlock(JT.MBB))); + else + DAG.setRoot(CopyTo); + } } /// Create a LOAD_STACK_GUARD node, and let it carry the target specific global @@ -2215,6 +2476,7 @@ static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL, SDValue &Chain) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); + EVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout()); MachineFunction &MF = DAG.getMachineFunction(); Value *Global = TLI.getSDagStackGuard(*MF.getFunction().getParent()); MachineSDNode *Node = @@ -2227,6 +2489,8 @@ static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL, MPInfo, Flags, PtrTy.getSizeInBits() / 8, DAG.getEVTAlignment(PtrTy)); DAG.setNodeMemRefs(Node, {MemRef}); } + if (PtrTy != PtrMemTy) + return DAG.getPtrExtOrTrunc(SDValue(Node, 0), DL, PtrMemTy); return SDValue(Node, 0); } @@ -2242,6 +2506,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, // First create the loads to the guard/stack slot for the comparison. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); + EVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout()); MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo(); int FI = MFI.getStackProtectorIndex(); @@ -2254,7 +2519,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, // Generate code to load the content of the guard slot. SDValue GuardVal = DAG.getLoad( - PtrTy, dl, DAG.getEntryNode(), StackSlotPtr, + PtrMemTy, dl, DAG.getEntryNode(), StackSlotPtr, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), Align, MachineMemOperand::MOVolatile); @@ -2262,27 +2527,26 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, GuardVal = TLI.emitStackGuardXorFP(DAG, GuardVal, dl); // Retrieve guard check function, nullptr if instrumentation is inlined. - if (const Value *GuardCheck = TLI.getSSPStackGuardCheck(M)) { + if (const Function *GuardCheckFn = TLI.getSSPStackGuardCheck(M)) { // The target provides a guard check function to validate the guard value. // Generate a call to that function with the content of the guard slot as // argument. - auto *Fn = cast<Function>(GuardCheck); - FunctionType *FnTy = Fn->getFunctionType(); + FunctionType *FnTy = GuardCheckFn->getFunctionType(); assert(FnTy->getNumParams() == 1 && "Invalid function signature"); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = GuardVal; Entry.Ty = FnTy->getParamType(0); - if (Fn->hasAttribute(1, Attribute::AttrKind::InReg)) + if (GuardCheckFn->hasAttribute(1, Attribute::AttrKind::InReg)) Entry.IsInReg = true; Args.push_back(Entry); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(getCurSDLoc()) - .setChain(DAG.getEntryNode()) - .setCallee(Fn->getCallingConv(), FnTy->getReturnType(), - getValue(GuardCheck), std::move(Args)); + .setChain(DAG.getEntryNode()) + .setCallee(GuardCheckFn->getCallingConv(), FnTy->getReturnType(), + getValue(GuardCheckFn), std::move(Args)); std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); DAG.setRoot(Result.second); @@ -2298,9 +2562,9 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, const Value *IRGuard = TLI.getSDagStackGuard(M); SDValue GuardPtr = getValue(IRGuard); - Guard = - DAG.getLoad(PtrTy, dl, Chain, GuardPtr, MachinePointerInfo(IRGuard, 0), - Align, MachineMemOperand::MOVolatile); + Guard = DAG.getLoad(PtrMemTy, dl, Chain, GuardPtr, + MachinePointerInfo(IRGuard, 0), Align, + MachineMemOperand::MOVolatile); } // Perform the comparison via a subtract/getsetcc. @@ -2339,6 +2603,12 @@ SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { SDValue Chain = TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid, None, false, getCurSDLoc(), false, false).second; + // On PS4, the "return address" must still be within the calling function, + // even if it's at the very end, so emit an explicit TRAP here. + // Passing 'true' for doesNotReturn above won't generate the trap for us. + if (TM.getTargetTriple().isPS4CPU()) + Chain = DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, Chain); + DAG.setRoot(Chain); } @@ -2493,6 +2763,20 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { case Intrinsic::experimental_gc_statepoint: LowerStatepoint(ImmutableStatepoint(&I), EHPadBB); break; + case Intrinsic::wasm_rethrow_in_catch: { + // This is usually done in visitTargetIntrinsic, but this intrinsic is + // special because it can be invoked, so we manually lower it to a DAG + // node here. + SmallVector<SDValue, 8> Ops; + Ops.push_back(getRoot()); // inchain + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + Ops.push_back( + DAG.getTargetConstant(Intrinsic::wasm_rethrow_in_catch, getCurSDLoc(), + TLI.getPointerTy(DAG.getDataLayout()))); + SDVTList VTs = DAG.getVTList(ArrayRef<EVT>({MVT::Other})); // outchain + DAG.setRoot(DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops)); + break; + } } } else if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) { // Currently we do not lower any intrinsic calls with deopt operand bundles. @@ -2528,6 +2812,35 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { InvokeMBB->normalizeSuccProbs(); // Drop into normal successor. + DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), + DAG.getBasicBlock(Return))); +} + +void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) { + MachineBasicBlock *CallBrMBB = FuncInfo.MBB; + + // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't + // have to do anything here to lower funclet bundles. + assert(!I.hasOperandBundlesOtherThan( + {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) && + "Cannot lower callbrs with arbitrary operand bundles yet!"); + + assert(isa<InlineAsm>(I.getCalledValue()) && + "Only know how to handle inlineasm callbr"); + visitInlineAsm(&I); + + // Retrieve successors. + MachineBasicBlock *Return = FuncInfo.MBBMap[I.getDefaultDest()]; + + // Update successor info. + addSuccessorWithProb(CallBrMBB, Return); + for (unsigned i = 0, e = I.getNumIndirectDests(); i < e; ++i) { + MachineBasicBlock *Target = FuncInfo.MBBMap[I.getIndirectDest(i)]; + addSuccessorWithProb(CallBrMBB, Target); + } + CallBrMBB->normalizeSuccProbs(); + + // Drop into default successor. DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(Return))); @@ -2585,49 +2898,17 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { setValue(&LP, Res); } -void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) { -#ifndef NDEBUG - for (const CaseCluster &CC : Clusters) - assert(CC.Low == CC.High && "Input clusters must be single-case"); -#endif - - llvm::sort(Clusters, [](const CaseCluster &a, const CaseCluster &b) { - return a.Low->getValue().slt(b.Low->getValue()); - }); - - // Merge adjacent clusters with the same destination. - const unsigned N = Clusters.size(); - unsigned DstIndex = 0; - for (unsigned SrcIndex = 0; SrcIndex < N; ++SrcIndex) { - CaseCluster &CC = Clusters[SrcIndex]; - const ConstantInt *CaseVal = CC.Low; - MachineBasicBlock *Succ = CC.MBB; - - if (DstIndex != 0 && Clusters[DstIndex - 1].MBB == Succ && - (CaseVal->getValue() - Clusters[DstIndex - 1].High->getValue()) == 1) { - // If this case has the same successor and is a neighbour, merge it into - // the previous cluster. - Clusters[DstIndex - 1].High = CaseVal; - Clusters[DstIndex - 1].Prob += CC.Prob; - } else { - std::memmove(&Clusters[DstIndex++], &Clusters[SrcIndex], - sizeof(Clusters[SrcIndex])); - } - } - Clusters.resize(DstIndex); -} - void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last) { // Update JTCases. - for (unsigned i = 0, e = JTCases.size(); i != e; ++i) - if (JTCases[i].first.HeaderBB == First) - JTCases[i].first.HeaderBB = Last; + for (unsigned i = 0, e = SL->JTCases.size(); i != e; ++i) + if (SL->JTCases[i].first.HeaderBB == First) + SL->JTCases[i].first.HeaderBB = Last; // Update BitTestCases. - for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i) - if (BitTestCases[i].Parent == First) - BitTestCases[i].Parent = Last; + for (unsigned i = 0, e = SL->BitTestCases.size(); i != e; ++i) + if (SL->BitTestCases[i].Parent == First) + SL->BitTestCases[i].Parent = Last; } void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { @@ -2916,6 +3197,18 @@ void SelectionDAGBuilder::visitICmp(const User &I) { SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Opcode = getICmpCondCode(predicate); + auto &TLI = DAG.getTargetLoweringInfo(); + EVT MemVT = + TLI.getMemValueType(DAG.getDataLayout(), I.getOperand(0)->getType()); + + // If a pointer's DAG type is larger than its memory type then the DAG values + // are zero-extended. This breaks signed comparisons so truncate back to the + // underlying type before doing the compare. + if (Op1.getValueType() != MemVT) { + Op1 = DAG.getPtrExtOrTrunc(Op1, getCurSDLoc(), MemVT); + Op2 = DAG.getPtrExtOrTrunc(Op2, getCurSDLoc(), MemVT); + } + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode)); @@ -2963,6 +3256,8 @@ void SelectionDAGBuilder::visitSelect(const User &I) { ISD::NodeType OpCode = Cond.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT; + bool IsUnaryAbs = false; + // Min/max matching is only viable if all output VTs are the same. if (is_splat(ValueVTs)) { EVT VT = ValueVTs[0]; @@ -3023,10 +3318,16 @@ void SelectionDAGBuilder::visitSelect(const User &I) { break; } break; + case SPF_ABS: + IsUnaryAbs = true; + Opc = ISD::ABS; + break; + case SPF_NABS: + // TODO: we need to produce sub(0, abs(X)). default: break; } - if (Opc != ISD::DELETED_NODE && + if (!IsUnaryAbs && Opc != ISD::DELETED_NODE && (TLI.isOperationLegalOrCustom(Opc, VT) || (UseScalarMinMax && TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) && @@ -3039,15 +3340,30 @@ void SelectionDAGBuilder::visitSelect(const User &I) { RHSVal = getValue(RHS); BaseOps = {}; } + + if (IsUnaryAbs) { + OpCode = Opc; + LHSVal = getValue(LHS); + BaseOps = {}; + } } - for (unsigned i = 0; i != NumValues; ++i) { - SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end()); - Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i)); - Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i)); - Values[i] = DAG.getNode(OpCode, getCurSDLoc(), - LHSVal.getNode()->getValueType(LHSVal.getResNo()+i), - Ops); + if (IsUnaryAbs) { + for (unsigned i = 0; i != NumValues; ++i) { + Values[i] = + DAG.getNode(OpCode, getCurSDLoc(), + LHSVal.getNode()->getValueType(LHSVal.getResNo() + i), + SDValue(LHSVal.getNode(), LHSVal.getResNo() + i)); + } + } else { + for (unsigned i = 0; i != NumValues; ++i) { + SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end()); + Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i)); + Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i)); + Values[i] = DAG.getNode( + OpCode, getCurSDLoc(), + LHSVal.getNode()->getValueType(LHSVal.getResNo() + i), Ops); + } } setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), @@ -3135,18 +3451,26 @@ void SelectionDAGBuilder::visitPtrToInt(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); + auto &TLI = DAG.getTargetLoweringInfo(); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); - setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT)); + EVT PtrMemVT = + TLI.getMemValueType(DAG.getDataLayout(), I.getOperand(0)->getType()); + N = DAG.getPtrExtOrTrunc(N, getCurSDLoc(), PtrMemVT); + N = DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT); + setValue(&I, N); } void SelectionDAGBuilder::visitIntToPtr(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); - EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), - I.getType()); - setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT)); + auto &TLI = DAG.getTargetLoweringInfo(); + EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + EVT PtrMemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getType()); + N = DAG.getZExtOrTrunc(N, getCurSDLoc(), PtrMemVT); + N = DAG.getPtrExtOrTrunc(N, getCurSDLoc(), DestVT); + setValue(&I, N); } void SelectionDAGBuilder::visitBitCast(const User &I) { @@ -3284,12 +3608,8 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { MOps1[0] = Src1; MOps2[0] = Src2; - Src1 = Src1.isUndef() - ? DAG.getUNDEF(PaddedVT) - : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps1); - Src2 = Src2.isUndef() - ? DAG.getUNDEF(PaddedVT) - : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps2); + Src1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps1); + Src2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps2); // Readjust mask for new input vector length. SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1); @@ -3498,6 +3818,9 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { unsigned AS = Op0->getType()->getScalarType()->getPointerAddressSpace(); SDValue N = getValue(Op0); SDLoc dl = getCurSDLoc(); + auto &TLI = DAG.getTargetLoweringInfo(); + MVT PtrTy = TLI.getPointerTy(DAG.getDataLayout(), AS); + MVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout(), AS); // Normalize Vector GEP - all scalar operands should be converted to the // splat vector. @@ -3555,6 +3878,8 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds()) Flags.setNoUnsignedWrap(true); + OffsVal = DAG.getSExtOrTrunc(OffsVal, dl, N.getValueType()); + N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, Flags); continue; } @@ -3580,7 +3905,8 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { N.getValueType(), IdxN, DAG.getConstant(Amt, dl, IdxN.getValueType())); } else { - SDValue Scale = DAG.getConstant(ElementSize, dl, IdxN.getValueType()); + SDValue Scale = DAG.getConstant(ElementSize.getZExtValue(), dl, + IdxN.getValueType()); IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, Scale); } @@ -3591,6 +3917,9 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { } } + if (PtrMemTy != PtrTy && !cast<GEPOperator>(I).isInBounds()) + N = DAG.getPtrExtendInReg(N, dl, PtrMemTy); + setValue(&I, N); } @@ -3675,16 +4004,17 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { bool isVolatile = I.isVolatile(); bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr; - bool isDereferenceable = isDereferenceablePointer(SV, DAG.getDataLayout()); + bool isDereferenceable = + isDereferenceablePointer(SV, I.getType(), DAG.getDataLayout()); unsigned Alignment = I.getAlignment(); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); - SmallVector<EVT, 4> ValueVTs; + SmallVector<EVT, 4> ValueVTs, MemVTs; SmallVector<uint64_t, 4> Offsets; - ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &Offsets); + ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &MemVTs, &Offsets); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; @@ -3750,12 +4080,15 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { MMOFlags |= MachineMemOperand::MODereferenceable; MMOFlags |= TLI.getMMOFlags(I); - SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, A, + SDValue L = DAG.getLoad(MemVTs[i], dl, Root, A, MachinePointerInfo(SV, Offsets[i]), Alignment, MMOFlags, AAInfo, Ranges); + Chains[ChainI] = L.getValue(1); + + if (MemVTs[i] != ValueVTs[i]) + L = DAG.getZExtOrTrunc(L, dl, ValueVTs[i]); Values[i] = L; - Chains[ChainI] = L.getValue(1); } if (!ConstantMemory) { @@ -3785,15 +4118,13 @@ void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) { SDValue Src = getValue(SrcV); // Create a virtual register, then update the virtual register. - unsigned VReg; bool CreatedVReg; - std::tie(VReg, CreatedVReg) = FuncInfo.getOrCreateSwiftErrorVRegDefAt(&I); + unsigned VReg = + SwiftError.getOrCreateVRegDefAt(&I, FuncInfo.MBB, I.getPointerOperand()); // Chain, DL, Reg, N or Chain, DL, Reg, N, Glue // Chain can be getRoot or getControlRoot. SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg, SDValue(Src.getNode(), Src.getResNo())); DAG.setRoot(CopyNode); - if (CreatedVReg) - FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg); } void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) { @@ -3826,8 +4157,7 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) { // Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT SDValue L = DAG.getCopyFromReg( getRoot(), getCurSDLoc(), - FuncInfo.getOrCreateSwiftErrorVRegUseAt(&I, FuncInfo.MBB, SV).first, - ValueVTs[0]); + SwiftError.getOrCreateVRegUseAt(&I, FuncInfo.MBB, SV), ValueVTs[0]); setValue(&I, L); } @@ -3854,10 +4184,10 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { } } - SmallVector<EVT, 4> ValueVTs; + SmallVector<EVT, 4> ValueVTs, MemVTs; SmallVector<uint64_t, 4> Offsets; ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), - SrcV->getType(), ValueVTs, &Offsets); + SrcV->getType(), ValueVTs, &MemVTs, &Offsets); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; @@ -3899,9 +4229,12 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { } SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, DAG.getConstant(Offsets[i], dl, PtrVT), Flags); - SDValue St = DAG.getStore( - Root, dl, SDValue(Src.getNode(), Src.getResNo() + i), Add, - MachinePointerInfo(PtrV, Offsets[i]), Alignment, MMOFlags, AAInfo); + SDValue Val = SDValue(Src.getNode(), Src.getResNo() + i); + if (MemVTs[i] != ValueVTs[i]) + Val = DAG.getPtrExtOrTrunc(Val, dl, MemVTs[i]); + SDValue St = + DAG.getStore(Root, dl, Val, Add, MachinePointerInfo(PtrV, Offsets[i]), + Alignment, MMOFlags, AAInfo); Chains[ChainI] = St; } @@ -4181,19 +4514,34 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { SDLoc dl = getCurSDLoc(); - AtomicOrdering SuccessOrder = I.getSuccessOrdering(); - AtomicOrdering FailureOrder = I.getFailureOrdering(); + AtomicOrdering SuccessOrdering = I.getSuccessOrdering(); + AtomicOrdering FailureOrdering = I.getFailureOrdering(); SyncScope::ID SSID = I.getSyncScopeID(); SDValue InChain = getRoot(); MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType(); SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other); - SDValue L = DAG.getAtomicCmpSwap( - ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain, - getValue(I.getPointerOperand()), getValue(I.getCompareOperand()), - getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()), - /*Alignment=*/ 0, SuccessOrder, FailureOrder, SSID); + + auto Alignment = DAG.getEVTAlignment(MemVT); + + auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; + if (I.isVolatile()) + Flags |= MachineMemOperand::MOVolatile; + Flags |= DAG.getTargetLoweringInfo().getMMOFlags(I); + + MachineFunction &MF = DAG.getMachineFunction(); + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), + Flags, MemVT.getStoreSize(), Alignment, + AAMDNodes(), nullptr, SSID, SuccessOrdering, + FailureOrdering); + + SDValue L = DAG.getAtomicCmpSwap(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, + dl, MemVT, VTs, InChain, + getValue(I.getPointerOperand()), + getValue(I.getCompareOperand()), + getValue(I.getNewValOperand()), MMO); SDValue OutChain = L.getValue(2); @@ -4217,20 +4565,32 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break; case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break; case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break; + case AtomicRMWInst::FAdd: NT = ISD::ATOMIC_LOAD_FADD; break; + case AtomicRMWInst::FSub: NT = ISD::ATOMIC_LOAD_FSUB; break; } - AtomicOrdering Order = I.getOrdering(); + AtomicOrdering Ordering = I.getOrdering(); SyncScope::ID SSID = I.getSyncScopeID(); SDValue InChain = getRoot(); + auto MemVT = getValue(I.getValOperand()).getSimpleValueType(); + auto Alignment = DAG.getEVTAlignment(MemVT); + + auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; + if (I.isVolatile()) + Flags |= MachineMemOperand::MOVolatile; + Flags |= DAG.getTargetLoweringInfo().getMMOFlags(I); + + MachineFunction &MF = DAG.getMachineFunction(); + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), Flags, + MemVT.getStoreSize(), Alignment, AAMDNodes(), + nullptr, SSID, Ordering); + SDValue L = - DAG.getAtomic(NT, dl, - getValue(I.getValOperand()).getSimpleValueType(), - InChain, - getValue(I.getPointerOperand()), - getValue(I.getValOperand()), - I.getPointerOperand(), - /* Alignment=*/ 0, Order, SSID); + DAG.getAtomic(NT, dl, MemVT, InChain, + getValue(I.getPointerOperand()), getValue(I.getValOperand()), + MMO); SDValue OutChain = L.getValue(1); @@ -4259,27 +4619,39 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getType()); if (!TLI.supportsUnalignedAtomics() && - I.getAlignment() < VT.getStoreSize()) + I.getAlignment() < MemVT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic load"); + auto Flags = MachineMemOperand::MOLoad; + if (I.isVolatile()) + Flags |= MachineMemOperand::MOVolatile; + if (I.getMetadata(LLVMContext::MD_invariant_load) != nullptr) + Flags |= MachineMemOperand::MOInvariant; + if (isDereferenceablePointer(I.getPointerOperand(), I.getType(), + DAG.getDataLayout())) + Flags |= MachineMemOperand::MODereferenceable; + + Flags |= TLI.getMMOFlags(I); + MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), - MachineMemOperand::MOVolatile | - MachineMemOperand::MOLoad, - VT.getStoreSize(), + Flags, MemVT.getStoreSize(), I.getAlignment() ? I.getAlignment() : - DAG.getEVTAlignment(VT), + DAG.getEVTAlignment(MemVT), AAMDNodes(), nullptr, SSID, Order); InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG); SDValue L = - DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain, + DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain, getValue(I.getPointerOperand()), MMO); SDValue OutChain = L.getValue(1); + if (MemVT != VT) + L = DAG.getPtrExtOrTrunc(L, dl, VT); setValue(&I, L); DAG.setRoot(OutChain); @@ -4288,25 +4660,36 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { SDLoc dl = getCurSDLoc(); - AtomicOrdering Order = I.getOrdering(); + AtomicOrdering Ordering = I.getOrdering(); SyncScope::ID SSID = I.getSyncScopeID(); SDValue InChain = getRoot(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT VT = - TLI.getValueType(DAG.getDataLayout(), I.getValueOperand()->getType()); + EVT MemVT = + TLI.getMemValueType(DAG.getDataLayout(), I.getValueOperand()->getType()); - if (I.getAlignment() < VT.getStoreSize()) + if (I.getAlignment() < MemVT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic store"); - SDValue OutChain = - DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT, - InChain, - getValue(I.getPointerOperand()), - getValue(I.getValueOperand()), - I.getPointerOperand(), I.getAlignment(), - Order, SSID); + auto Flags = MachineMemOperand::MOStore; + if (I.isVolatile()) + Flags |= MachineMemOperand::MOVolatile; + Flags |= TLI.getMMOFlags(I); + + MachineFunction &MF = DAG.getMachineFunction(); + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), Flags, + MemVT.getStoreSize(), I.getAlignment(), AAMDNodes(), + nullptr, SSID, Ordering); + + SDValue Val = getValue(I.getValueOperand()); + if (Val.getValueType() != MemVT) + Val = DAG.getPtrExtOrTrunc(Val, dl, MemVT); + + SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, MemVT, InChain, + getValue(I.getPointerOperand()), Val, MMO); + DAG.setRoot(OutChain); } @@ -4364,10 +4747,12 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, SDValue Result; if (IsTgtIntrinsic) { // This is target intrinsic that touches memory - Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, - Ops, Info.memVT, - MachinePointerInfo(Info.ptrVal, Info.offset), Info.align, - Info.flags, Info.size); + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + Result = + DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT, + MachinePointerInfo(Info.ptrVal, Info.offset), + Info.align, Info.flags, Info.size, AAInfo); } else if (!HasChain) { Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops); } else if (!I.getType()->isVoidTy()) { @@ -4889,7 +5274,7 @@ static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS, return DAG.getConstantFP(1.0, DL, LHS.getValueType()); const Function &F = DAG.getMachineFunction().getFunction(); - if (!F.optForSize() || + if (!F.hasOptSize() || // If optimizing for size, don't insert too many multiplies. // This inserts up to 5 multiplies. countPopulation(Val) + Log2_32(Val) < 7) { @@ -4952,6 +5337,71 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( if (!Arg) return false; + if (!IsDbgDeclare) { + // ArgDbgValues are hoisted to the beginning of the entry block. So we + // should only emit as ArgDbgValue if the dbg.value intrinsic is found in + // the entry block. + bool IsInEntryBlock = FuncInfo.MBB == &FuncInfo.MF->front(); + if (!IsInEntryBlock) + return false; + + // ArgDbgValues are hoisted to the beginning of the entry block. So we + // should only emit as ArgDbgValue if the dbg.value intrinsic describes a + // variable that also is a param. + // + // Although, if we are at the top of the entry block already, we can still + // emit using ArgDbgValue. This might catch some situations when the + // dbg.value refers to an argument that isn't used in the entry block, so + // any CopyToReg node would be optimized out and the only way to express + // this DBG_VALUE is by using the physical reg (or FI) as done in this + // method. ArgDbgValues are hoisted to the beginning of the entry block. So + // we should only emit as ArgDbgValue if the Variable is an argument to the + // current function, and the dbg.value intrinsic is found in the entry + // block. + bool VariableIsFunctionInputArg = Variable->isParameter() && + !DL->getInlinedAt(); + bool IsInPrologue = SDNodeOrder == LowestSDNodeOrder; + if (!IsInPrologue && !VariableIsFunctionInputArg) + return false; + + // Here we assume that a function argument on IR level only can be used to + // describe one input parameter on source level. If we for example have + // source code like this + // + // struct A { long x, y; }; + // void foo(struct A a, long b) { + // ... + // b = a.x; + // ... + // } + // + // and IR like this + // + // define void @foo(i32 %a1, i32 %a2, i32 %b) { + // entry: + // call void @llvm.dbg.value(metadata i32 %a1, "a", DW_OP_LLVM_fragment + // call void @llvm.dbg.value(metadata i32 %a2, "a", DW_OP_LLVM_fragment + // call void @llvm.dbg.value(metadata i32 %b, "b", + // ... + // call void @llvm.dbg.value(metadata i32 %a1, "b" + // ... + // + // then the last dbg.value is describing a parameter "b" using a value that + // is an argument. But since we already has used %a1 to describe a parameter + // we should not handle that last dbg.value here (that would result in an + // incorrect hoisting of the DBG_VALUE to the function entry). + // Notice that we allow one dbg.value per IR level argument, to accomodate + // for the situation with fragments above. + if (VariableIsFunctionInputArg) { + unsigned ArgNo = Arg->getArgNo(); + if (ArgNo >= FuncInfo.DescribedArgs.size()) + FuncInfo.DescribedArgs.resize(ArgNo + 1, false); + else if (!IsInPrologue && FuncInfo.DescribedArgs.test(ArgNo)) + return false; + FuncInfo.DescribedArgs.set(ArgNo); + } + } + MachineFunction &MF = DAG.getMachineFunction(); const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); @@ -4976,12 +5426,14 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( } } - if (!Op && N.getNode()) + if (!Op && N.getNode()) { // Check if frame index is available. - if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode())) + SDValue LCandidate = peekThroughBitcasts(N); + if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(LCandidate.getNode())) if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) Op = MachineOperand::CreateFI(FINode->getIndex()); + } if (!Op) { // Check if ValueMap has reg number. @@ -5055,11 +5507,29 @@ SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N, # define setjmp_undefined_for_msvc #endif -/// Lower the call to the specified intrinsic function. If we want to emit this -/// as a call to a named external function, return the name. Otherwise, lower it -/// and return null. -const char * -SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { +static unsigned FixedPointIntrinsicToOpcode(unsigned Intrinsic) { + switch (Intrinsic) { + case Intrinsic::smul_fix: + return ISD::SMULFIX; + case Intrinsic::umul_fix: + return ISD::UMULFIX; + default: + llvm_unreachable("Unhandled fixed point intrinsic"); + } +} + +void SelectionDAGBuilder::lowerCallToExternalSymbol(const CallInst &I, + const char *FunctionName) { + assert(FunctionName && "FunctionName must not be nullptr"); + SDValue Callee = DAG.getExternalSymbol( + FunctionName, + DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout())); + LowerCallTo(&I, Callee, I.isTailCall()); +} + +/// Lower the call to the specified intrinsic function. +void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, + unsigned Intrinsic) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDLoc sdl = getCurSDLoc(); DebugLoc dl = getCurDebugLoc(); @@ -5069,28 +5539,28 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { default: // By default, turn this into a target intrinsic node. visitTargetIntrinsic(I, Intrinsic); - return nullptr; - case Intrinsic::vastart: visitVAStart(I); return nullptr; - case Intrinsic::vaend: visitVAEnd(I); return nullptr; - case Intrinsic::vacopy: visitVACopy(I); return nullptr; + return; + case Intrinsic::vastart: visitVAStart(I); return; + case Intrinsic::vaend: visitVAEnd(I); return; + case Intrinsic::vacopy: visitVACopy(I); return; case Intrinsic::returnaddress: setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, TLI.getPointerTy(DAG.getDataLayout()), getValue(I.getArgOperand(0)))); - return nullptr; + return; case Intrinsic::addressofreturnaddress: setValue(&I, DAG.getNode(ISD::ADDROFRETURNADDR, sdl, TLI.getPointerTy(DAG.getDataLayout()))); - return nullptr; + return; case Intrinsic::sponentry: setValue(&I, DAG.getNode(ISD::SPONENTRY, sdl, TLI.getPointerTy(DAG.getDataLayout()))); - return nullptr; + return; case Intrinsic::frameaddress: setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getPointerTy(DAG.getDataLayout()), getValue(I.getArgOperand(0)))); - return nullptr; + return; case Intrinsic::read_register: { Value *Reg = I.getArgOperand(0); SDValue Chain = getRoot(); @@ -5101,7 +5571,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.getVTList(VT, MVT::Other), Chain, RegName); setValue(&I, Res); DAG.setRoot(Res.getValue(1)); - return nullptr; + return; } case Intrinsic::write_register: { Value *Reg = I.getArgOperand(0); @@ -5111,12 +5581,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata())); DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain, RegName, getValue(RegValue))); - return nullptr; + return; } case Intrinsic::setjmp: - return &"_setjmp"[!TLI.usesUnderscoreSetJmp()]; + lowerCallToExternalSymbol(I, &"_setjmp"[!TLI.usesUnderscoreSetJmp()]); + return; case Intrinsic::longjmp: - return &"_longjmp"[!TLI.usesUnderscoreLongJmp()]; + lowerCallToExternalSymbol(I, &"_longjmp"[!TLI.usesUnderscoreLongJmp()]); + return; case Intrinsic::memcpy: { const auto &MCI = cast<MemCpyInst>(I); SDValue Op1 = getValue(I.getArgOperand(0)); @@ -5135,7 +5607,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1))); updateDAGForMaybeTailCall(MC); - return nullptr; + return; } case Intrinsic::memset: { const auto &MSI = cast<MemSetInst>(I); @@ -5149,7 +5621,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, isTC, MachinePointerInfo(I.getArgOperand(0))); updateDAGForMaybeTailCall(MS); - return nullptr; + return; } case Intrinsic::memmove: { const auto &MMI = cast<MemMoveInst>(I); @@ -5168,7 +5640,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { isTC, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1))); updateDAGForMaybeTailCall(MM); - return nullptr; + return; } case Intrinsic::memcpy_element_unordered_atomic: { const AtomicMemCpyInst &MI = cast<AtomicMemCpyInst>(I); @@ -5186,7 +5658,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { MachinePointerInfo(MI.getRawDest()), MachinePointerInfo(MI.getRawSource())); updateDAGForMaybeTailCall(MC); - return nullptr; + return; } case Intrinsic::memmove_element_unordered_atomic: { auto &MI = cast<AtomicMemMoveInst>(I); @@ -5204,7 +5676,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { MachinePointerInfo(MI.getRawDest()), MachinePointerInfo(MI.getRawSource())); updateDAGForMaybeTailCall(MC); - return nullptr; + return; } case Intrinsic::memset_element_unordered_atomic: { auto &MI = cast<AtomicMemSetInst>(I); @@ -5220,7 +5692,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { LengthTy, ElemSz, isTC, MachinePointerInfo(MI.getRawDest())); updateDAGForMaybeTailCall(MC); - return nullptr; + return; } case Intrinsic::dbg_addr: case Intrinsic::dbg_declare: { @@ -5235,7 +5707,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (!Address || isa<UndefValue>(Address) || (Address->use_empty() && !isa<Argument>(Address))) { LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); - return nullptr; + return; } bool isParameter = Variable->isParameter() || isa<Argument>(Address); @@ -5264,7 +5736,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Variable, Expression, FI, /*IsIndirect*/ true, dl, SDNodeOrder); DAG.AddDbgValue(SDV, getRoot().getNode(), isParameter); } - return nullptr; + return; } SDValue &N = NodeMap[Address]; @@ -5286,7 +5758,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Address is an argument, so try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true, N); - return nullptr; + return; } else { SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), true, dl, SDNodeOrder); @@ -5300,7 +5772,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); } } - return nullptr; + return; } case Intrinsic::dbg_label: { const DbgLabelInst &DI = cast<DbgLabelInst>(I); @@ -5310,7 +5782,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDDbgLabel *SDV; SDV = DAG.getDbgLabel(Label, dl, SDNodeOrder); DAG.AddDbgLabel(SDV); - return nullptr; + return; } case Intrinsic::dbg_value: { const DbgValueInst &DI = cast<DbgValueInst>(I); @@ -5321,88 +5793,19 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { dropDanglingDebugInfo(Variable, Expression); const Value *V = DI.getValue(); if (!V) - return nullptr; - - SDDbgValue *SDV; - if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V) || - isa<ConstantPointerNull>(V)) { - SDV = DAG.getConstantDbgValue(Variable, Expression, V, dl, SDNodeOrder); - DAG.AddDbgValue(SDV, nullptr, false); - return nullptr; - } - - // Do not use getValue() in here; we don't want to generate code at - // this point if it hasn't been done yet. - SDValue N = NodeMap[V]; - if (!N.getNode() && isa<Argument>(V)) // Check unused arguments map. - N = UnusedArgNodeMap[V]; - if (N.getNode()) { - if (EmitFuncArgumentDbgValue(V, Variable, Expression, dl, false, N)) - return nullptr; - SDV = getDbgValue(N, Variable, Expression, dl, SDNodeOrder); - DAG.AddDbgValue(SDV, N.getNode(), false); - return nullptr; - } - - // PHI nodes have already been selected, so we should know which VReg that - // is assigns to already. - if (isa<PHINode>(V)) { - auto VMI = FuncInfo.ValueMap.find(V); - if (VMI != FuncInfo.ValueMap.end()) { - unsigned Reg = VMI->second; - // The PHI node may be split up into several MI PHI nodes (in - // FunctionLoweringInfo::set). - RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, - V->getType(), None); - if (RFV.occupiesMultipleRegs()) { - unsigned Offset = 0; - unsigned BitsToDescribe = 0; - if (auto VarSize = Variable->getSizeInBits()) - BitsToDescribe = *VarSize; - if (auto Fragment = Expression->getFragmentInfo()) - BitsToDescribe = Fragment->SizeInBits; - for (auto RegAndSize : RFV.getRegsAndSizes()) { - unsigned RegisterSize = RegAndSize.second; - // Bail out if all bits are described already. - if (Offset >= BitsToDescribe) - break; - unsigned FragmentSize = (Offset + RegisterSize > BitsToDescribe) - ? BitsToDescribe - Offset - : RegisterSize; - auto FragmentExpr = DIExpression::createFragmentExpression( - Expression, Offset, FragmentSize); - if (!FragmentExpr) - continue; - SDV = DAG.getVRegDbgValue(Variable, *FragmentExpr, RegAndSize.first, - false, dl, SDNodeOrder); - DAG.AddDbgValue(SDV, nullptr, false); - Offset += RegisterSize; - } - } else { - SDV = DAG.getVRegDbgValue(Variable, Expression, Reg, false, dl, - SDNodeOrder); - DAG.AddDbgValue(SDV, nullptr, false); - } - return nullptr; - } - } + return; - // TODO: When we get here we will either drop the dbg.value completely, or - // we try to move it forward by letting it dangle for awhile. So we should - // probably add an extra DbgValue to the DAG here, with a reference to - // "noreg", to indicate that we have lost the debug location for the - // variable. + if (handleDebugValue(V, Variable, Expression, dl, DI.getDebugLoc(), + SDNodeOrder)) + return; - if (!V->use_empty() ) { - // Do not call getValue(V) yet, as we don't want to generate code. - // Remember it for later. - DanglingDebugInfoMap[V].emplace_back(&DI, dl, SDNodeOrder); - return nullptr; - } + // TODO: Dangling debug info will eventually either be resolved or produce + // an Undef DBG_VALUE. However in the resolution case, a gap may appear + // between the original dbg.value location and its resolved DBG_VALUE, which + // we should ideally fill with an extra Undef DBG_VALUE. - LLVM_DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n"); - LLVM_DEBUG(dbgs() << " Last seen at:\n " << *V << "\n"); - return nullptr; + DanglingDebugInfoMap[V].emplace_back(&DI, dl, SDNodeOrder); + return; } case Intrinsic::eh_typeid_for: { @@ -5411,7 +5814,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { unsigned TypeID = DAG.getMachineFunction().getTypeIDFor(GV); Res = DAG.getConstant(TypeID, sdl, MVT::i32); setValue(&I, Res); - return nullptr; + return; } case Intrinsic::eh_return_i32: @@ -5422,15 +5825,15 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getControlRoot(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)))); - return nullptr; + return; case Intrinsic::eh_unwind_init: DAG.getMachineFunction().setCallsUnwindInit(true); - return nullptr; + return; case Intrinsic::eh_dwarf_cfa: setValue(&I, DAG.getNode(ISD::EH_DWARF_CFA, sdl, TLI.getPointerTy(DAG.getDataLayout()), getValue(I.getArgOperand(0)))); - return nullptr; + return; case Intrinsic::eh_sjlj_callsite: { MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0)); @@ -5438,7 +5841,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!"); MMI.setCurrentCallSite(CI->getZExtValue()); - return nullptr; + return; } case Intrinsic::eh_sjlj_functioncontext: { // Get and store the index of the function context. @@ -5447,7 +5850,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts()); int FI = FuncInfo.StaticAllocaMap[FnCtx]; MFI.setFunctionContextIndex(FI); - return nullptr; + return; } case Intrinsic::eh_sjlj_setjmp: { SDValue Ops[2]; @@ -5457,34 +5860,34 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.getVTList(MVT::i32, MVT::Other), Ops); setValue(&I, Op.getValue(0)); DAG.setRoot(Op.getValue(1)); - return nullptr; + return; } case Intrinsic::eh_sjlj_longjmp: DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other, getRoot(), getValue(I.getArgOperand(0)))); - return nullptr; + return; case Intrinsic::eh_sjlj_setup_dispatch: DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other, getRoot())); - return nullptr; + return; case Intrinsic::masked_gather: visitMaskedGather(I); - return nullptr; + return; case Intrinsic::masked_load: visitMaskedLoad(I); - return nullptr; + return; case Intrinsic::masked_scatter: visitMaskedScatter(I); - return nullptr; + return; case Intrinsic::masked_store: visitMaskedStore(I); - return nullptr; + return; case Intrinsic::masked_expandload: visitMaskedLoad(I, true /* IsExpanding */); - return nullptr; + return; case Intrinsic::masked_compressstore: visitMaskedStore(I, true /* IsCompressing */); - return nullptr; + return; case Intrinsic::x86_mmx_pslli_w: case Intrinsic::x86_mmx_pslli_d: case Intrinsic::x86_mmx_pslli_q: @@ -5496,7 +5899,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue ShAmt = getValue(I.getArgOperand(1)); if (isa<ConstantSDNode>(ShAmt)) { visitTargetIntrinsic(I, Intrinsic); - return nullptr; + return; } unsigned NewIntrinsic = 0; EVT ShAmtVT = MVT::v2i32; @@ -5542,31 +5945,31 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.getConstant(NewIntrinsic, sdl, MVT::i32), getValue(I.getArgOperand(0)), ShAmt); setValue(&I, Res); - return nullptr; + return; } case Intrinsic::powi: setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), DAG)); - return nullptr; + return; case Intrinsic::log: setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); - return nullptr; + return; case Intrinsic::log2: setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); - return nullptr; + return; case Intrinsic::log10: setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); - return nullptr; + return; case Intrinsic::exp: setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); - return nullptr; + return; case Intrinsic::exp2: setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); - return nullptr; + return; case Intrinsic::pow: setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), DAG, TLI)); - return nullptr; + return; case Intrinsic::sqrt: case Intrinsic::fabs: case Intrinsic::sin: @@ -5597,61 +6000,71 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, DAG.getNode(Opcode, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)))); - return nullptr; + return; } - case Intrinsic::minnum: { - auto VT = getValue(I.getArgOperand(0)).getValueType(); - unsigned Opc = - I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMINIMUM, VT) - ? ISD::FMINIMUM - : ISD::FMINNUM; - setValue(&I, DAG.getNode(Opc, sdl, VT, + case Intrinsic::lround: + case Intrinsic::llround: + case Intrinsic::lrint: + case Intrinsic::llrint: { + unsigned Opcode; + switch (Intrinsic) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::lround: Opcode = ISD::LROUND; break; + case Intrinsic::llround: Opcode = ISD::LLROUND; break; + case Intrinsic::lrint: Opcode = ISD::LRINT; break; + case Intrinsic::llrint: Opcode = ISD::LLRINT; break; + } + + EVT RetVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + setValue(&I, DAG.getNode(Opcode, sdl, RetVT, + getValue(I.getArgOperand(0)))); + return; + } + case Intrinsic::minnum: + setValue(&I, DAG.getNode(ISD::FMINNUM, sdl, + getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)))); - return nullptr; - } - case Intrinsic::maxnum: { - auto VT = getValue(I.getArgOperand(0)).getValueType(); - unsigned Opc = - I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMAXIMUM, VT) - ? ISD::FMAXIMUM - : ISD::FMAXNUM; - setValue(&I, DAG.getNode(Opc, sdl, VT, + return; + case Intrinsic::maxnum: + setValue(&I, DAG.getNode(ISD::FMAXNUM, sdl, + getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)))); - return nullptr; - } + return; case Intrinsic::minimum: setValue(&I, DAG.getNode(ISD::FMINIMUM, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)))); - return nullptr; + return; case Intrinsic::maximum: setValue(&I, DAG.getNode(ISD::FMAXIMUM, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)))); - return nullptr; + return; case Intrinsic::copysign: setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)))); - return nullptr; + return; case Intrinsic::fma: setValue(&I, DAG.getNode(ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)))); - return nullptr; + return; case Intrinsic::experimental_constrained_fadd: case Intrinsic::experimental_constrained_fsub: case Intrinsic::experimental_constrained_fmul: case Intrinsic::experimental_constrained_fdiv: case Intrinsic::experimental_constrained_frem: case Intrinsic::experimental_constrained_fma: + case Intrinsic::experimental_constrained_fptrunc: + case Intrinsic::experimental_constrained_fpext: case Intrinsic::experimental_constrained_sqrt: case Intrinsic::experimental_constrained_pow: case Intrinsic::experimental_constrained_powi: @@ -5671,7 +6084,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::experimental_constrained_round: case Intrinsic::experimental_constrained_trunc: visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I)); - return nullptr; + return; case Intrinsic::fmuladd: { EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && @@ -5693,7 +6106,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(2))); setValue(&I, Add); } - return nullptr; + return; } case Intrinsic::convert_to_fp16: setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16, @@ -5701,17 +6114,17 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(0)), DAG.getTargetConstant(0, sdl, MVT::i32)))); - return nullptr; + return; case Intrinsic::convert_from_fp16: setValue(&I, DAG.getNode(ISD::FP_EXTEND, sdl, TLI.getValueType(DAG.getDataLayout(), I.getType()), DAG.getNode(ISD::BITCAST, sdl, MVT::f16, getValue(I.getArgOperand(0))))); - return nullptr; + return; case Intrinsic::pcmarker: { SDValue Tmp = getValue(I.getArgOperand(0)); DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp)); - return nullptr; + return; } case Intrinsic::readcyclecounter: { SDValue Op = getRoot(); @@ -5719,25 +6132,25 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.getVTList(MVT::i64, MVT::Other), Op); setValue(&I, Res); DAG.setRoot(Res.getValue(1)); - return nullptr; + return; } case Intrinsic::bitreverse: setValue(&I, DAG.getNode(ISD::BITREVERSE, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)))); - return nullptr; + return; case Intrinsic::bswap: setValue(&I, DAG.getNode(ISD::BSWAP, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)))); - return nullptr; + return; case Intrinsic::cttz: { SDValue Arg = getValue(I.getArgOperand(0)); ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1)); EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF, sdl, Ty, Arg)); - return nullptr; + return; } case Intrinsic::ctlz: { SDValue Arg = getValue(I.getArgOperand(0)); @@ -5745,13 +6158,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF, sdl, Ty, Arg)); - return nullptr; + return; } case Intrinsic::ctpop: { SDValue Arg = getValue(I.getArgOperand(0)); EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg)); - return nullptr; + return; } case Intrinsic::fshl: case Intrinsic::fshr: { @@ -5767,7 +6180,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { auto FunnelOpcode = IsFSHL ? ISD::FSHL : ISD::FSHR; if (TLI.isOperationLegalOrCustom(FunnelOpcode, VT)) { setValue(&I, DAG.getNode(FunnelOpcode, sdl, VT, X, Y, Z)); - return nullptr; + return; } // When X == Y, this is rotate. If the data type has a power-of-2 size, we @@ -5777,7 +6190,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { auto RotateOpcode = IsFSHL ? ISD::ROTL : ISD::ROTR; if (TLI.isOperationLegalOrCustom(RotateOpcode, VT)) { setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z)); - return nullptr; + return; } // Some targets only rotate one way. Try the opposite direction. @@ -5786,7 +6199,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Negate the shift amount because it is safe to ignore the high bits. SDValue NegShAmt = DAG.getNode(ISD::SUB, sdl, VT, Zero, Z); setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, NegShAmt)); - return nullptr; + return; } // fshl (rotl): (X << (Z % BW)) | (X >> ((0 - Z) % BW)) @@ -5796,7 +6209,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue ShX = DAG.getNode(ISD::SHL, sdl, VT, X, IsFSHL ? ShAmt : NShAmt); SDValue ShY = DAG.getNode(ISD::SRL, sdl, VT, X, IsFSHL ? NShAmt : ShAmt); setValue(&I, DAG.getNode(ISD::OR, sdl, VT, ShX, ShY)); - return nullptr; + return; } // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) @@ -5816,39 +6229,48 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // For fshr, 0-shift returns the 2nd arg (Y). SDValue IsZeroShift = DAG.getSetCC(sdl, CCVT, ShAmt, Zero, ISD::SETEQ); setValue(&I, DAG.getSelect(sdl, VT, IsZeroShift, IsFSHL ? X : Y, Or)); - return nullptr; + return; } case Intrinsic::sadd_sat: { SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); setValue(&I, DAG.getNode(ISD::SADDSAT, sdl, Op1.getValueType(), Op1, Op2)); - return nullptr; + return; } case Intrinsic::uadd_sat: { SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); setValue(&I, DAG.getNode(ISD::UADDSAT, sdl, Op1.getValueType(), Op1, Op2)); - return nullptr; + return; } case Intrinsic::ssub_sat: { SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); setValue(&I, DAG.getNode(ISD::SSUBSAT, sdl, Op1.getValueType(), Op1, Op2)); - return nullptr; + return; } case Intrinsic::usub_sat: { SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); setValue(&I, DAG.getNode(ISD::USUBSAT, sdl, Op1.getValueType(), Op1, Op2)); - return nullptr; + return; } - case Intrinsic::smul_fix: { + case Intrinsic::smul_fix: + case Intrinsic::umul_fix: { SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); - setValue(&I, - DAG.getNode(ISD::SMULFIX, sdl, Op1.getValueType(), Op1, Op2, Op3)); - return nullptr; + setValue(&I, DAG.getNode(FixedPointIntrinsicToOpcode(Intrinsic), sdl, + Op1.getValueType(), Op1, Op2, Op3)); + return; + } + case Intrinsic::smul_fix_sat: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + SDValue Op3 = getValue(I.getArgOperand(2)); + setValue(&I, DAG.getNode(ISD::SMULFIXSAT, sdl, Op1.getValueType(), Op1, Op2, + Op3)); + return; } case Intrinsic::stacksave: { SDValue Op = getRoot(); @@ -5857,26 +6279,26 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Op); setValue(&I, Res); DAG.setRoot(Res.getValue(1)); - return nullptr; + return; } case Intrinsic::stackrestore: Res = getValue(I.getArgOperand(0)); DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res)); - return nullptr; + return; case Intrinsic::get_dynamic_area_offset: { SDValue Op = getRoot(); EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); EVT ResTy = TLI.getValueType(DAG.getDataLayout(), I.getType()); // Result type for @llvm.get.dynamic.area.offset should match PtrTy for // target. - if (PtrTy != ResTy) + if (PtrTy.getSizeInBits() < ResTy.getSizeInBits()) report_fatal_error("Wrong result type for @llvm.get.dynamic.area.offset" " intrinsic!"); Res = DAG.getNode(ISD::GET_DYNAMIC_AREA_OFFSET, sdl, DAG.getVTList(ResTy), Op); DAG.setRoot(Op); setValue(&I, Res); - return nullptr; + return; } case Intrinsic::stackguard: { EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); @@ -5896,7 +6318,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Res = TLI.emitStackGuardXorFP(DAG, Res, sdl); DAG.setRoot(Chain); setValue(&I, Res); - return nullptr; + return; } case Intrinsic::stackprotector: { // Emit code into the DAG to store the stack guard onto the stack. @@ -5923,7 +6345,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { /* Alignment = */ 0, MachineMemOperand::MOVolatile); setValue(&I, Res); DAG.setRoot(Res); - return nullptr; + return; } case Intrinsic::objectsize: { // If we don't know by now, we're never going to know. @@ -5940,14 +6362,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Res = DAG.getConstant(0, sdl, Ty); setValue(&I, Res); - return nullptr; + return; } case Intrinsic::is_constant: // If this wasn't constant-folded away by now, then it's not a // constant. setValue(&I, DAG.getConstant(0, sdl, MVT::i1)); - return nullptr; + return; case Intrinsic::annotation: case Intrinsic::ptr_annotation: @@ -5955,12 +6377,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::strip_invariant_group: // Drop the intrinsic, but forward the value setValue(&I, getValue(I.getOperand(0))); - return nullptr; + return; case Intrinsic::assume: case Intrinsic::var_annotation: case Intrinsic::sideeffect: // Discard annotate attributes, assumptions, and artificial side-effects. - return nullptr; + return; case Intrinsic::codeview_annotation: { // Emit a label associated with this metadata. @@ -5971,7 +6393,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { MF.addCodeViewAnnotation(Label, cast<MDNode>(MD)); Res = DAG.getLabelNode(ISD::ANNOTATION_LABEL, sdl, getRoot(), Label); DAG.setRoot(Res); - return nullptr; + return; } case Intrinsic::init_trampoline: { @@ -5988,13 +6410,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Res = DAG.getNode(ISD::INIT_TRAMPOLINE, sdl, MVT::Other, Ops); DAG.setRoot(Res); - return nullptr; + return; } case Intrinsic::adjust_trampoline: setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl, TLI.getPointerTy(DAG.getDataLayout()), getValue(I.getArgOperand(0)))); - return nullptr; + return; case Intrinsic::gcroot: { assert(DAG.getMachineFunction().getFunction().hasGC() && "only valid in functions with gc specified, enforced by Verifier"); @@ -6004,19 +6426,19 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode()); GFI->addStackRoot(FI->getIndex(), TypeMap); - return nullptr; + return; } case Intrinsic::gcread: case Intrinsic::gcwrite: llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!"); case Intrinsic::flt_rounds: setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, sdl, MVT::i32)); - return nullptr; + return; case Intrinsic::expect: // Just replace __builtin_expect(exp, c) with EXP. setValue(&I, getValue(I.getArgOperand(0))); - return nullptr; + return; case Intrinsic::debugtrap: case Intrinsic::trap: { @@ -6028,7 +6450,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ? ISD::TRAP : ISD::DEBUGTRAP; DAG.setRoot(DAG.getNode(Op, sdl,MVT::Other, getRoot())); - return nullptr; + return; } TargetLowering::ArgListTy Args; @@ -6041,7 +6463,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); DAG.setRoot(Result.second); - return nullptr; + return; } case Intrinsic::uadd_with_overflow: @@ -6063,9 +6485,15 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); - SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); + EVT ResultVT = Op1.getValueType(); + EVT OverflowVT = MVT::i1; + if (ResultVT.isVector()) + OverflowVT = EVT::getVectorVT( + *Context, OverflowVT, ResultVT.getVectorNumElements()); + + SDVTList VTs = DAG.getVTList(ResultVT, OverflowVT); setValue(&I, DAG.getNode(Op, sdl, VTs, Op1, Op2)); - return nullptr; + return; } case Intrinsic::prefetch: { SDValue Ops[5]; @@ -6088,21 +6516,24 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { PendingLoads.push_back(Result); Result = getRoot(); DAG.setRoot(Result); - return nullptr; + return; } case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: { bool IsStart = (Intrinsic == Intrinsic::lifetime_start); // Stack coloring is not enabled in O0, discard region information. if (TM.getOptLevel() == CodeGenOpt::None) - return nullptr; + return; - SmallVector<Value *, 4> Allocas; - GetUnderlyingObjects(I.getArgOperand(1), Allocas, *DL); + const int64_t ObjectSize = + cast<ConstantInt>(I.getArgOperand(0))->getSExtValue(); + Value *const ObjectPtr = I.getArgOperand(1); + SmallVector<const Value *, 4> Allocas; + GetUnderlyingObjects(ObjectPtr, Allocas, *DL); - for (SmallVectorImpl<Value*>::iterator Object = Allocas.begin(), + for (SmallVectorImpl<const Value*>::iterator Object = Allocas.begin(), E = Allocas.end(); Object != E; ++Object) { - AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object); + const AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object); // Could not find an Alloca. if (!LifetimeObject) @@ -6112,49 +6543,50 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // valid frame index. auto SI = FuncInfo.StaticAllocaMap.find(LifetimeObject); if (SI == FuncInfo.StaticAllocaMap.end()) - return nullptr; - - int FI = SI->second; - - SDValue Ops[2]; - Ops[0] = getRoot(); - Ops[1] = - DAG.getFrameIndex(FI, TLI.getFrameIndexTy(DAG.getDataLayout()), true); - unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END); + return; - Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops); + const int FrameIndex = SI->second; + int64_t Offset; + if (GetPointerBaseWithConstantOffset( + ObjectPtr, Offset, DAG.getDataLayout()) != LifetimeObject) + Offset = -1; // Cannot determine offset from alloca to lifetime object. + Res = DAG.getLifetimeNode(IsStart, sdl, getRoot(), FrameIndex, ObjectSize, + Offset); DAG.setRoot(Res); } - return nullptr; + return; } case Intrinsic::invariant_start: // Discard region information. setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout()))); - return nullptr; + return; case Intrinsic::invariant_end: // Discard region information. - return nullptr; + return; case Intrinsic::clear_cache: - return TLI.getClearCacheBuiltinName(); + /// FunctionName may be null. + if (const char *FunctionName = TLI.getClearCacheBuiltinName()) + lowerCallToExternalSymbol(I, FunctionName); + return; case Intrinsic::donothing: // ignore - return nullptr; + return; case Intrinsic::experimental_stackmap: visitStackmap(I); - return nullptr; + return; case Intrinsic::experimental_patchpoint_void: case Intrinsic::experimental_patchpoint_i64: visitPatchpoint(&I); - return nullptr; + return; case Intrinsic::experimental_gc_statepoint: LowerStatepoint(ImmutableStatepoint(&I)); - return nullptr; + return; case Intrinsic::experimental_gc_result: visitGCResult(cast<GCResultInst>(I)); - return nullptr; + return; case Intrinsic::experimental_gc_relocate: visitGCRelocate(cast<GCRelocateInst>(I)); - return nullptr; + return; case Intrinsic::instrprof_increment: llvm_unreachable("instrprof failed to lower an increment"); case Intrinsic::instrprof_value_profile: @@ -6182,7 +6614,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { .addFrameIndex(FI); } - return nullptr; + return; } case Intrinsic::localrecover: { @@ -6211,7 +6643,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Add = DAG.getNode(ISD::ADD, sdl, PtrVT, FPVal, OffsetVal); setValue(&I, Add); - return nullptr; + return; } case Intrinsic::eh_exceptionpointer: @@ -6226,7 +6658,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (Intrinsic == Intrinsic::eh_exceptioncode) N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32); setValue(&I, N); - return nullptr; + return; } case Intrinsic::xray_customevent: { // Here we want to make sure that the intrinsic behaves as if it has a @@ -6234,7 +6666,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // FIXME: Support other platforms later. const auto &Triple = DAG.getTarget().getTargetTriple(); if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux()) - return nullptr; + return; SDLoc DL = getCurSDLoc(); SmallVector<SDValue, 8> Ops; @@ -6257,7 +6689,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue patchableNode = SDValue(MN, 0); DAG.setRoot(patchableNode); setValue(&I, patchableNode); - return nullptr; + return; } case Intrinsic::xray_typedevent: { // Here we want to make sure that the intrinsic behaves as if it has a @@ -6265,7 +6697,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // FIXME: Support other platforms later. const auto &Triple = DAG.getTarget().getTargetTriple(); if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux()) - return nullptr; + return; SDLoc DL = getCurSDLoc(); SmallVector<SDValue, 8> Ops; @@ -6292,14 +6724,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue patchableNode = SDValue(MN, 0); DAG.setRoot(patchableNode); setValue(&I, patchableNode); - return nullptr; + return; } case Intrinsic::experimental_deoptimize: LowerDeoptimizeCall(&I); - return nullptr; + return; - case Intrinsic::experimental_vector_reduce_fadd: - case Intrinsic::experimental_vector_reduce_fmul: + case Intrinsic::experimental_vector_reduce_v2_fadd: + case Intrinsic::experimental_vector_reduce_v2_fmul: case Intrinsic::experimental_vector_reduce_add: case Intrinsic::experimental_vector_reduce_mul: case Intrinsic::experimental_vector_reduce_and: @@ -6312,11 +6744,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::experimental_vector_reduce_fmax: case Intrinsic::experimental_vector_reduce_fmin: visitVectorReduce(I, Intrinsic); - return nullptr; + return; case Intrinsic::icall_branch_funnel: { SmallVector<SDValue, 16> Ops; - Ops.push_back(DAG.getRoot()); Ops.push_back(getValue(I.getArgOperand(0))); int64_t Offset; @@ -6359,20 +6790,34 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Ops.push_back(T.Target); } + Ops.push_back(DAG.getRoot()); // Chain SDValue N(DAG.getMachineNode(TargetOpcode::ICALL_BRANCH_FUNNEL, getCurSDLoc(), MVT::Other, Ops), 0); DAG.setRoot(N); setValue(&I, N); HasTailCall = true; - return nullptr; + return; } case Intrinsic::wasm_landingpad_index: // Information this intrinsic contained has been transferred to // MachineFunction in SelectionDAGISel::PrepareEHLandingPad. We can safely // delete it now. - return nullptr; + return; + + case Intrinsic::aarch64_settag: + case Intrinsic::aarch64_settag_zero: { + const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); + bool ZeroMemory = Intrinsic == Intrinsic::aarch64_settag_zero; + SDValue Val = TSI.EmitTargetCodeForSetTag( + DAG, getCurSDLoc(), getRoot(), getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), MachinePointerInfo(I.getArgOperand(0)), + ZeroMemory); + DAG.setRoot(Val); + setValue(&I, Val); + return; + } } } @@ -6400,6 +6845,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( case Intrinsic::experimental_constrained_fma: Opcode = ISD::STRICT_FMA; break; + case Intrinsic::experimental_constrained_fptrunc: + Opcode = ISD::STRICT_FP_ROUND; + break; + case Intrinsic::experimental_constrained_fpext: + Opcode = ISD::STRICT_FP_EXTEND; + break; case Intrinsic::experimental_constrained_sqrt: Opcode = ISD::STRICT_FSQRT; break; @@ -6463,7 +6914,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( SDVTList VTs = DAG.getVTList(ValueVTs); SDValue Result; - if (FPI.isUnaryOp()) + if (Opcode == ISD::STRICT_FP_ROUND) + Result = DAG.getNode(Opcode, sdl, VTs, + { Chain, getValue(FPI.getArgOperand(0)), + DAG.getTargetConstant(0, sdl, + TLI.getPointerTy(DAG.getDataLayout())) }); + else if (FPI.isUnaryOp()) Result = DAG.getNode(Opcode, sdl, VTs, { Chain, getValue(FPI.getArgOperand(0)) }); else if (FPI.isTernaryOp()) @@ -6476,6 +6932,13 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( { Chain, getValue(FPI.getArgOperand(0)), getValue(FPI.getArgOperand(1)) }); + if (FPI.getExceptionBehavior() != + ConstrainedFPIntrinsic::ExceptionBehavior::ebIgnore) { + SDNodeFlags Flags; + Flags.setFPExcept(true); + Result->setFlags(Flags); + } + assert(Result.getNode()->getNumValues() == 2); SDValue OutChain = Result.getValue(1); DAG.setRoot(OutChain); @@ -6596,11 +7059,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, SwiftErrorVal = V; // We find the virtual register for the actual swifterror argument. // Instead of using the Value, we use the virtual register instead. - Entry.Node = DAG.getRegister(FuncInfo - .getOrCreateSwiftErrorVRegUseAt( - CS.getInstruction(), FuncInfo.MBB, V) - .first, - EVT(TLI.getPointerTy(DL))); + Entry.Node = DAG.getRegister( + SwiftError.getOrCreateVRegUseAt(CS.getInstruction(), FuncInfo.MBB, V), + EVT(TLI.getPointerTy(DL))); } Args.push_back(Entry); @@ -6641,13 +7102,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, if (SwiftErrorVal && TLI.supportSwiftError()) { // Get the last element of InVals. SDValue Src = CLI.InVals.back(); - unsigned VReg; bool CreatedVReg; - std::tie(VReg, CreatedVReg) = - FuncInfo.getOrCreateSwiftErrorVRegDefAt(CS.getInstruction()); + unsigned VReg = SwiftError.getOrCreateVRegDefAt( + CS.getInstruction(), FuncInfo.MBB, SwiftErrorVal); SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src); - // We update the virtual register for the actual swifterror argument. - if (CreatedVReg) - FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, SwiftErrorVal, VReg); DAG.setRoot(CopyNode); } } @@ -6995,10 +7452,6 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { return; } - MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); - computeUsesVAFloatArgument(I, MMI); - - const char *RenameFn = nullptr; if (Function *F = I.getCalledFunction()) { if (F->isDeclaration()) { // Is this an LLVM intrinsic or a target-specific intrinsic? @@ -7008,9 +7461,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { IID = II->getIntrinsicID(F); if (IID) { - RenameFn = visitIntrinsicCall(I, IID); - if (!RenameFn) - return; + visitIntrinsicCall(I, IID); + return; } } @@ -7159,20 +7611,14 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { } } - SDValue Callee; - if (!RenameFn) - Callee = getValue(I.getCalledValue()); - else - Callee = DAG.getExternalSymbol( - RenameFn, - DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout())); - // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't // have to do anything here to lower funclet bundles. assert(!I.hasOperandBundlesOtherThan( {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) && "Cannot lower calls with arbitrary operand bundles!"); + SDValue Callee = getValue(I.getCalledValue()); + if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) LowerCallSiteWithDeoptBundle(&I, Callee, nullptr); else @@ -7328,8 +7774,9 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location, MachineFunction &MF = DAG.getMachineFunction(); int SSFI = MF.getFrameInfo().CreateStackObject(TySize, Align, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getFrameIndexTy(DL)); - Chain = DAG.getStore(Chain, Location, OpInfo.CallOperand, StackSlot, - MachinePointerInfo::getFixedStack(MF, SSFI)); + Chain = DAG.getTruncStore(Chain, Location, OpInfo.CallOperand, StackSlot, + MachinePointerInfo::getFixedStack(MF, SSFI), + TLI.getMemValueType(DL, Ty)); OpInfo.CallOperand = StackSlot; return Chain; @@ -7353,6 +7800,10 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL, SmallVector<unsigned, 4> Regs; const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + // No work to do for memory operations. + if (OpInfo.ConstraintType == TargetLowering::C_Memory) + return; + // If this is a constraint for a single physreg, or a constraint for a // register class, find it. unsigned AssignedReg; @@ -7435,7 +7886,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL, for (; NumRegs; --NumRegs, ++I) { assert(I != RC->end() && "Ran out of registers to allocate!"); - auto R = (AssignedReg) ? *I : RegInfo.createVirtualRegister(RC); + Register R = AssignedReg ? Register(*I) : RegInfo.createVirtualRegister(RC); Regs.push_back(R); } @@ -7509,9 +7960,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints( DAG.getDataLayout(), DAG.getSubtarget().getRegisterInfo(), CS); - bool hasMemory = false; - - // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore + // First Pass: Calculate HasSideEffects and ExtraFlags (AlignStack, + // AsmDialect, MayLoad, MayStore). + bool HasSideEffect = IA->hasSideEffects(); ExtraFlags ExtraInfo(CS); unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. @@ -7527,7 +7978,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Process the call argument. BasicBlocks are labels, currently appearing // only in asm's. - if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) { + const Instruction *I = CS.getInstruction(); + if (isa<CallBrInst>(I) && + (ArgNo - 1) >= (cast<CallBrInst>(I)->getNumArgOperands() - + cast<CallBrInst>(I)->getNumIndirectDests())) { + const auto *BA = cast<BlockAddress>(OpInfo.CallOperandVal); + EVT VT = TLI.getValueType(DAG.getDataLayout(), BA->getType(), true); + OpInfo.CallOperand = DAG.getTargetBlockAddress(BA, VT); + } else if (const auto *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) { OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]); } else { OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); @@ -7554,8 +8012,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { OpInfo.ConstraintVT = MVT::Other; } - if (!hasMemory) - hasMemory = OpInfo.hasMemory(TLI); + if (!HasSideEffect) + HasSideEffect = OpInfo.hasMemory(TLI); // Determine if this InlineAsm MayLoad or MayStore based on the constraints. // FIXME: Could we compute this on OpInfo rather than T? @@ -7566,17 +8024,20 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { ExtraInfo.update(T); } - SDValue Chain, Flag; // We won't need to flush pending loads if this asm doesn't touch // memory and is nonvolatile. - if (hasMemory || IA->hasSideEffects()) - Chain = getRoot(); - else - Chain = DAG.getRoot(); + SDValue Flag, Chain = (HasSideEffect) ? getRoot() : DAG.getRoot(); + + bool IsCallBr = isa<CallBrInst>(CS.getInstruction()); + if (IsCallBr) { + // If this is a callbr we need to flush pending exports since inlineasm_br + // is a terminator. We need to do this before nodes are glued to + // the inlineasm_br node. + Chain = getControlRoot(); + } - // Second pass over the constraints: compute which constraint option to use - // and assign registers to constraints that want a specific physreg. + // Second pass over the constraints: compute which constraint option to use. for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) { // If this is an output operand with a matching input operand, look up the // matching input. If their types mismatch, e.g. one is an integer, the @@ -7612,28 +8073,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { OpInfo.isIndirect = true; } - // If this constraint is for a specific register, allocate it before - // anything else. - SDISelAsmOperandInfo &RefOpInfo = - OpInfo.isMatchingInputConstraint() - ? ConstraintOperands[OpInfo.getMatchedOperand()] - : OpInfo; - if (RefOpInfo.ConstraintType == TargetLowering::C_Register) - GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo); - } - - // Third pass - Loop over all of the operands, assigning virtual or physregs - // to register class operands. - for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) { - SDISelAsmOperandInfo &RefOpInfo = - OpInfo.isMatchingInputConstraint() - ? ConstraintOperands[OpInfo.getMatchedOperand()] - : OpInfo; - - // C_Register operands have already been allocated, Other/Memory don't need - // to be. - if (RefOpInfo.ConstraintType == TargetLowering::C_RegisterClass) - GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo); } // AsmNodeOperands - The operands for the ISD::INLINEASM node. @@ -7653,21 +8092,21 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { AsmNodeOperands.push_back(DAG.getTargetConstant( ExtraInfo.get(), getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); - // Loop over all of the inputs, copying the operand values into the - // appropriate registers and processing the output regs. - RegsForValue RetValRegs; - - // IndirectStoresToEmit - The set of stores to emit after the inline asm node. - std::vector<std::pair<RegsForValue, Value *>> IndirectStoresToEmit; - + // Third pass: Loop over operands to prepare DAG-level operands.. As part of + // this, assign virtual and physical registers for inputs and otput. for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) { + // Assign Registers. + SDISelAsmOperandInfo &RefOpInfo = + OpInfo.isMatchingInputConstraint() + ? ConstraintOperands[OpInfo.getMatchedOperand()] + : OpInfo; + GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo); + switch (OpInfo.Type) { case InlineAsm::isOutput: - if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass && - OpInfo.ConstraintType != TargetLowering::C_Register) { - // Memory output, or 'other' output (e.g. 'X' constraint). - assert(OpInfo.isIndirect && "Memory output must be indirect operand"); - + if (OpInfo.ConstraintType == TargetLowering::C_Memory || + (OpInfo.ConstraintType == TargetLowering::C_Other && + OpInfo.isIndirect)) { unsigned ConstraintID = TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); assert(ConstraintID != InlineAsm::Constraint_Unknown && @@ -7680,38 +8119,27 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { MVT::i32)); AsmNodeOperands.push_back(OpInfo.CallOperand); break; - } - - // Otherwise, this is a register or register class output. - - // Copy the output from the appropriate register. Find a register that - // we can use. - if (OpInfo.AssignedRegs.Regs.empty()) { - emitInlineAsmError( - CS, "couldn't allocate output register for constraint '" + - Twine(OpInfo.ConstraintCode) + "'"); - return; - } + } else if ((OpInfo.ConstraintType == TargetLowering::C_Other && + !OpInfo.isIndirect) || + OpInfo.ConstraintType == TargetLowering::C_Register || + OpInfo.ConstraintType == TargetLowering::C_RegisterClass) { + // Otherwise, this outputs to a register (directly for C_Register / + // C_RegisterClass, and a target-defined fashion for C_Other). Find a + // register that we can use. + if (OpInfo.AssignedRegs.Regs.empty()) { + emitInlineAsmError( + CS, "couldn't allocate output register for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); + return; + } - // If this is an indirect operand, store through the pointer after the - // asm. - if (OpInfo.isIndirect) { - IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs, - OpInfo.CallOperandVal)); - } else { - // This is the result value of the call. - assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); - // Concatenate this output onto the outputs list. - RetValRegs.append(OpInfo.AssignedRegs); + // Add information to the INLINEASM node to know that this register is + // set. + OpInfo.AssignedRegs.AddInlineAsmOperands( + OpInfo.isEarlyClobber ? InlineAsm::Kind_RegDefEarlyClobber + : InlineAsm::Kind_RegDef, + false, 0, getCurSDLoc(), DAG, AsmNodeOperands); } - - // Add information to the INLINEASM node to know that this register is - // set. - OpInfo.AssignedRegs - .AddInlineAsmOperands(OpInfo.isEarlyClobber - ? InlineAsm::Kind_RegDefEarlyClobber - : InlineAsm::Kind_RegDef, - false, 0, getCurSDLoc(), DAG, AsmNodeOperands); break; case InlineAsm::isInput: { @@ -7865,98 +8293,117 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; if (Flag.getNode()) AsmNodeOperands.push_back(Flag); - Chain = DAG.getNode(ISD::INLINEASM, getCurSDLoc(), + unsigned ISDOpc = IsCallBr ? ISD::INLINEASM_BR : ISD::INLINEASM; + Chain = DAG.getNode(ISDOpc, getCurSDLoc(), DAG.getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); Flag = Chain.getValue(1); - // If this asm returns a register value, copy the result from that register - // and set it as the value of the call. - if (!RetValRegs.Regs.empty()) { - SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), - Chain, &Flag, CS.getInstruction()); - - llvm::Type *CSResultType = CS.getType(); - unsigned numRet; - ArrayRef<Type *> ResultTypes; - SmallVector<SDValue, 1> ResultValues(1); - if (StructType *StructResult = dyn_cast<StructType>(CSResultType)) { - numRet = StructResult->getNumElements(); - assert(Val->getNumOperands() == numRet && - "Mismatch in number of output operands in asm result"); - ResultTypes = StructResult->elements(); - ArrayRef<SDUse> ValueUses = Val->ops(); - ResultValues.resize(numRet); - std::transform(ValueUses.begin(), ValueUses.end(), ResultValues.begin(), - [](const SDUse &u) -> SDValue { return u.get(); }); - } else { - numRet = 1; - ResultValues[0] = Val; - ResultTypes = makeArrayRef(CSResultType); - } - SmallVector<EVT, 1> ResultVTs(numRet); - for (unsigned i = 0; i < numRet; i++) { - EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), ResultTypes[i]); - SDValue Val = ResultValues[i]; - assert(ResultTypes[i]->isSized() && "Unexpected unsized type"); - // If the type of the inline asm call site return value is different but - // has same size as the type of the asm output bitcast it. One example - // of this is for vectors with different width / number of elements. - // This can happen for register classes that can contain multiple - // different value types. The preg or vreg allocated may not have the - // same VT as was expected. - // - // This can also happen for a return value that disagrees with the - // register class it is put in, eg. a double in a general-purpose - // register on a 32-bit machine. - if (ResultVT != Val.getValueType() && - ResultVT.getSizeInBits() == Val.getValueSizeInBits()) - Val = DAG.getNode(ISD::BITCAST, getCurSDLoc(), ResultVT, Val); - else if (ResultVT != Val.getValueType() && ResultVT.isInteger() && - Val.getValueType().isInteger()) { - // If a result value was tied to an input value, the computed result - // may have a wider width than the expected result. Extract the - // relevant portion. - Val = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultVT, Val); - } + // Do additional work to generate outputs. - assert(ResultVT == Val.getValueType() && "Asm result value mismatch!"); - ResultVTs[i] = ResultVT; - ResultValues[i] = Val; - } + SmallVector<EVT, 1> ResultVTs; + SmallVector<SDValue, 1> ResultValues; + SmallVector<SDValue, 8> OutChains; - Val = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), - DAG.getVTList(ResultVTs), ResultValues); - setValue(CS.getInstruction(), Val); - // Don't need to use this as a chain in this case. - if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty()) - return; - } + llvm::Type *CSResultType = CS.getType(); + ArrayRef<Type *> ResultTypes; + if (StructType *StructResult = dyn_cast<StructType>(CSResultType)) + ResultTypes = StructResult->elements(); + else if (!CSResultType->isVoidTy()) + ResultTypes = makeArrayRef(CSResultType); + + auto CurResultType = ResultTypes.begin(); + auto handleRegAssign = [&](SDValue V) { + assert(CurResultType != ResultTypes.end() && "Unexpected value"); + assert((*CurResultType)->isSized() && "Unexpected unsized type"); + EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), *CurResultType); + ++CurResultType; + // If the type of the inline asm call site return value is different but has + // same size as the type of the asm output bitcast it. One example of this + // is for vectors with different width / number of elements. This can + // happen for register classes that can contain multiple different value + // types. The preg or vreg allocated may not have the same VT as was + // expected. + // + // This can also happen for a return value that disagrees with the register + // class it is put in, eg. a double in a general-purpose register on a + // 32-bit machine. + if (ResultVT != V.getValueType() && + ResultVT.getSizeInBits() == V.getValueSizeInBits()) + V = DAG.getNode(ISD::BITCAST, getCurSDLoc(), ResultVT, V); + else if (ResultVT != V.getValueType() && ResultVT.isInteger() && + V.getValueType().isInteger()) { + // If a result value was tied to an input value, the computed result + // may have a wider width than the expected result. Extract the + // relevant portion. + V = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultVT, V); + } + assert(ResultVT == V.getValueType() && "Asm result value mismatch!"); + ResultVTs.push_back(ResultVT); + ResultValues.push_back(V); + }; - std::vector<std::pair<SDValue, const Value *>> StoresToEmit; + // Deal with output operands. + for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) { + if (OpInfo.Type == InlineAsm::isOutput) { + SDValue Val; + // Skip trivial output operands. + if (OpInfo.AssignedRegs.Regs.empty()) + continue; - // Process indirect outputs, first output all of the flagged copies out of - // physregs. - for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) { - RegsForValue &OutRegs = IndirectStoresToEmit[i].first; - const Value *Ptr = IndirectStoresToEmit[i].second; - SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), - Chain, &Flag, IA); - StoresToEmit.push_back(std::make_pair(OutVal, Ptr)); + switch (OpInfo.ConstraintType) { + case TargetLowering::C_Register: + case TargetLowering::C_RegisterClass: + Val = OpInfo.AssignedRegs.getCopyFromRegs( + DAG, FuncInfo, getCurSDLoc(), Chain, &Flag, CS.getInstruction()); + break; + case TargetLowering::C_Other: + Val = TLI.LowerAsmOutputForConstraint(Chain, Flag, getCurSDLoc(), + OpInfo, DAG); + break; + case TargetLowering::C_Memory: + break; // Already handled. + case TargetLowering::C_Unknown: + assert(false && "Unexpected unknown constraint"); + } + + // Indirect output manifest as stores. Record output chains. + if (OpInfo.isIndirect) { + const Value *Ptr = OpInfo.CallOperandVal; + assert(Ptr && "Expected value CallOperandVal for indirect asm operand"); + SDValue Store = DAG.getStore(Chain, getCurSDLoc(), Val, getValue(Ptr), + MachinePointerInfo(Ptr)); + OutChains.push_back(Store); + } else { + // generate CopyFromRegs to associated registers. + assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); + if (Val.getOpcode() == ISD::MERGE_VALUES) { + for (const SDValue &V : Val->op_values()) + handleRegAssign(V); + } else + handleRegAssign(Val); + } + } } - // Emit the non-flagged stores from the physregs. - SmallVector<SDValue, 8> OutChains; - for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) { - SDValue Val = DAG.getStore(Chain, getCurSDLoc(), StoresToEmit[i].first, - getValue(StoresToEmit[i].second), - MachinePointerInfo(StoresToEmit[i].second)); - OutChains.push_back(Val); + // Set results. + if (!ResultValues.empty()) { + assert(CurResultType == ResultTypes.end() && + "Mismatch in number of ResultTypes"); + assert(ResultValues.size() == ResultTypes.size() && + "Mismatch in number of output operands in asm result"); + + SDValue V = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), + DAG.getVTList(ResultVTs), ResultValues); + setValue(CS.getInstruction(), V); } + // Collect store chains. if (!OutChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, OutChains); - DAG.setRoot(Chain); + // Only Update Root if inline assembly has a memory effect. + if (ResultValues.empty() || HasSideEffect || !OutChains.empty() || IsCallBr) + DAG.setRoot(Chain); } void SelectionDAGBuilder::emitInlineAsmError(ImmutableCallSite CS, @@ -7989,12 +8436,16 @@ void SelectionDAGBuilder::visitVAStart(const CallInst &I) { void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const DataLayout &DL = DAG.getDataLayout(); - SDValue V = DAG.getVAArg(TLI.getValueType(DAG.getDataLayout(), I.getType()), - getCurSDLoc(), getRoot(), getValue(I.getOperand(0)), - DAG.getSrcValue(I.getOperand(0)), - DL.getABITypeAlignment(I.getType())); - setValue(&I, V); + SDValue V = DAG.getVAArg( + TLI.getMemValueType(DAG.getDataLayout(), I.getType()), getCurSDLoc(), + getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)), + DL.getABITypeAlignment(I.getType())); DAG.setRoot(V.getValue(1)); + + if (I.getType()->isPointerTy()) + V = DAG.getPtrExtOrTrunc( + V, getCurSDLoc(), TLI.getValueType(DAG.getDataLayout(), I.getType())); + setValue(&I, V); } void SelectionDAGBuilder::visitVAEnd(const CallInst &I) { @@ -8021,7 +8472,7 @@ SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG, return Op; ConstantRange CR = getConstantRangeFromMetadata(*Range); - if (CR.isFullSet() || CR.isEmptySet() || CR.isWrappedSet()) + if (CR.isFullSet() || CR.isEmptySet() || CR.isUpperWrapped()) return Op; APInt Lo = CR.getUnsignedMin(); @@ -8058,7 +8509,7 @@ SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG, /// convention or require stack pointer adjustment. Only a subset of the /// intrinsic's operands need to participate in the calling convention. void SelectionDAGBuilder::populateCallLoweringInfo( - TargetLowering::CallLoweringInfo &CLI, ImmutableCallSite CS, + TargetLowering::CallLoweringInfo &CLI, const CallBase *Call, unsigned ArgIdx, unsigned NumArgs, SDValue Callee, Type *ReturnTy, bool IsPatchPoint) { TargetLowering::ArgListTy Args; @@ -8068,21 +8519,21 @@ void SelectionDAGBuilder::populateCallLoweringInfo( // Attributes for args start at offset 1, after the return attribute. for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs; ArgI != ArgE; ++ArgI) { - const Value *V = CS->getOperand(ArgI); + const Value *V = Call->getOperand(ArgI); assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic."); TargetLowering::ArgListEntry Entry; Entry.Node = getValue(V); Entry.Ty = V->getType(); - Entry.setAttributes(&CS, ArgI); + Entry.setAttributes(Call, ArgI); Args.push_back(Entry); } CLI.setDebugLoc(getCurSDLoc()) .setChain(getRoot()) - .setCallee(CS.getCallingConv(), ReturnTy, Callee, std::move(Args)) - .setDiscardResult(CS->use_empty()) + .setCallee(Call->getCallingConv(), ReturnTy, Callee, std::move(Args)) + .setDiscardResult(Call->use_empty()) .setIsPatchPoint(IsPatchPoint); } @@ -8093,7 +8544,7 @@ void SelectionDAGBuilder::populateCallLoweringInfo( /// avoid constant materialization and register allocation. /// /// FrameIndex operands are converted to TargetFrameIndex so that ISEL does not -/// generate addess computation nodes, and so ExpandISelPseudo can convert the +/// generate addess computation nodes, and so FinalizeISel can convert the /// TargetFrameIndex into a DirectMemRefOp StackMap location. This avoids /// address materialization and register allocation, but may also be required /// for correctness. If a StackMap (or PatchPoint) intrinsic directly uses an @@ -8226,8 +8677,8 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CS->getType(); TargetLowering::CallLoweringInfo CLI(DAG); - populateCallLoweringInfo(CLI, CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy, - true); + populateCallLoweringInfo(CLI, cast<CallBase>(CS.getInstruction()), + NumMetaOpers, NumCallArgs, Callee, ReturnTy, true); std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB); SDNode *CallEnd = Result.second.getNode(); @@ -8351,15 +8802,17 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, FMF = I.getFastMathFlags(); switch (Intrinsic) { - case Intrinsic::experimental_vector_reduce_fadd: - if (FMF.isFast()) - Res = DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2); + case Intrinsic::experimental_vector_reduce_v2_fadd: + if (FMF.allowReassoc()) + Res = DAG.getNode(ISD::FADD, dl, VT, Op1, + DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2)); else Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2); break; - case Intrinsic::experimental_vector_reduce_fmul: - if (FMF.isFast()) - Res = DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2); + case Intrinsic::experimental_vector_reduce_v2_fmul: + if (FMF.allowReassoc()) + Res = DAG.getNode(ISD::FMUL, dl, VT, Op1, + DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2)); else Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2); break; @@ -8433,8 +8886,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { if (CLI.IsPostTypeLegalization) { // If we are lowering a libcall after legalization, split the return type. - SmallVector<EVT, 4> OldRetTys = std::move(RetTys); - SmallVector<uint64_t, 4> OldOffsets = std::move(Offsets); + SmallVector<EVT, 4> OldRetTys; + SmallVector<uint64_t, 4> OldOffsets; + RetTys.swap(OldRetTys); + Offsets.swap(OldOffsets); + for (size_t i = 0, e = OldRetTys.size(); i != e; ++i) { EVT RetVT = OldRetTys[i]; uint64_t Offset = OldOffsets[i]; @@ -8489,7 +8945,15 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // points into the callers stack frame. CLI.IsTailCall = false; } else { + bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters( + CLI.RetTy, CLI.CallConv, CLI.IsVarArg); for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { + ISD::ArgFlagsTy Flags; + if (NeedsRegBlock) { + Flags.setInConsecutiveRegs(); + if (I == RetTys.size() - 1) + Flags.setInConsecutiveRegsLast(); + } EVT VT = RetTys[I]; MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), CLI.CallConv, VT); @@ -8497,9 +8961,15 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { CLI.CallConv, VT); for (unsigned i = 0; i != NumRegs; ++i) { ISD::InputArg MyFlags; + MyFlags.Flags = Flags; MyFlags.VT = RegisterVT; MyFlags.ArgVT = VT; MyFlags.Used = CLI.IsReturnValueUsed; + if (CLI.RetTy->isPointerTy()) { + MyFlags.Flags.setPointer(); + MyFlags.Flags.setPointerAddrSpace( + cast<PointerType>(CLI.RetTy)->getAddressSpace()); + } if (CLI.RetSExt) MyFlags.Flags.setSExt(); if (CLI.RetZExt) @@ -8550,6 +9020,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // specify the alignment it wants. unsigned OriginalAlignment = getABIAlignmentForCallingConv(ArgTy, DL); + if (Args[i].Ty->isPointerTy()) { + Flags.setPointer(); + Flags.setPointerAddrSpace( + cast<PointerType>(Args[i].Ty)->getAddressSpace()); + } if (Args[i].IsZExt) Flags.setZExt(); if (Args[i].IsSExt) @@ -8587,8 +9062,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { if (Args[i].IsByVal || Args[i].IsInAlloca) { PointerType *Ty = cast<PointerType>(Args[i].Ty); Type *ElementTy = Ty->getElementType(); - Flags.setByValSize(DL.getTypeAllocSize(ElementTy)); - // For ByVal, alignment should come from FE. BE will guess if this + + unsigned FrameSize = DL.getTypeAllocSize( + Args[i].ByValType ? Args[i].ByValType : ElementTy); + Flags.setByValSize(FrameSize); + // info is not there but there are cases it cannot get right. unsigned FrameAlign; if (Args[i].Alignment) @@ -8619,8 +9097,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // for now. if (Args[i].IsReturned && !Op.getValueType().isVector() && CanLowerReturn) { - assert(CLI.RetTy == Args[i].Ty && RetTys.size() == NumValues && - "unexpected use of 'returned'"); + assert((CLI.RetTy == Args[i].Ty || + (CLI.RetTy->isPointerTy() && Args[i].Ty->isPointerTy() && + CLI.RetTy->getPointerAddressSpace() == + Args[i].Ty->getPointerAddressSpace())) && + RetTys.size() == NumValues && "unexpected use of 'returned'"); // Before passing 'returned' to the target lowering code, ensure that // either the register MVT and the actual EVT are the same size or that // the return value and argument are extended in the same way; in these @@ -9023,7 +9504,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { unsigned PartBase = 0; Type *FinalType = Arg.getType(); if (Arg.hasAttribute(Attribute::ByVal)) - FinalType = cast<PointerType>(FinalType)->getElementType(); + FinalType = Arg.getParamByValType(); bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters( FinalType, F.getCallingConv(), F.isVarArg()); for (unsigned Value = 0, NumValues = ValueVTs.size(); @@ -9038,6 +9519,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) { unsigned OriginalAlignment = TLI->getABIAlignmentForCallingConv(ArgTy, DL); + if (Arg.getType()->isPointerTy()) { + Flags.setPointer(); + Flags.setPointerAddrSpace( + cast<PointerType>(Arg.getType())->getAddressSpace()); + } if (Arg.hasAttribute(Attribute::ZExt)) Flags.setZExt(); if (Arg.hasAttribute(Attribute::SExt)) @@ -9078,11 +9564,14 @@ void SelectionDAGISel::LowerArguments(const Function &F) { Flags.setByVal(); } if (Flags.isByVal() || Flags.isInAlloca()) { - PointerType *Ty = cast<PointerType>(Arg.getType()); - Type *ElementTy = Ty->getElementType(); - Flags.setByValSize(DL.getTypeAllocSize(ElementTy)); - // For ByVal, alignment should be passed from FE. BE will guess if - // this info is not there but there are cases it cannot get right. + Type *ElementTy = Arg.getParamByValType(); + + // For ByVal, size and alignment should be passed from FE. BE will + // guess if this info is not there but there are cases it cannot get + // right. + unsigned FrameSize = DL.getTypeAllocSize(Arg.getParamByValType()); + Flags.setByValSize(FrameSize); + unsigned FrameAlign; if (Arg.getParamAlignment()) FrameAlign = Arg.getParamAlignment(); @@ -9263,17 +9752,16 @@ void SelectionDAGISel::LowerArguments(const Function &F) { if (Res.getOpcode() == ISD::CopyFromReg && isSwiftErrorArg) { unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) - FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, - FuncInfo->SwiftErrorArg, Reg); + SwiftError->setCurrentVReg(FuncInfo->MBB, SwiftError->getFunctionArg(), + Reg); } // If this argument is live outside of the entry block, insert a copy from // wherever we got it to the vreg that other BB's will reference it as. - if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) { + if (Res.getOpcode() == ISD::CopyFromReg) { // If we can, though, try to skip creating an unnecessary vreg. // FIXME: This isn't very clean... it would be nice to make this more - // general. It's also subtly incompatible with the hacks FastISel - // uses with vregs. + // general. unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) { FuncInfo->ValueMap[&Arg] = Reg; @@ -9354,7 +9842,7 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { if (const Constant *C = dyn_cast<Constant>(PHIOp)) { unsigned &RegOut = ConstantsOut[C]; if (RegOut == 0) { - RegOut = FuncInfo.CreateRegs(C->getType()); + RegOut = FuncInfo.CreateRegs(C); CopyValueToVirtualRegister(C, RegOut); } Reg = RegOut; @@ -9367,7 +9855,7 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { assert(isa<AllocaInst>(PHIOp) && FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) && "Didn't codegen value into a register!??"); - Reg = FuncInfo.CreateRegs(PHIOp->getType()); + Reg = FuncInfo.CreateRegs(PHIOp); CopyValueToVirtualRegister(PHIOp, Reg); } } @@ -9432,450 +9920,6 @@ void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) { HasTailCall = true; } -uint64_t -SelectionDAGBuilder::getJumpTableRange(const CaseClusterVector &Clusters, - unsigned First, unsigned Last) const { - assert(Last >= First); - const APInt &LowCase = Clusters[First].Low->getValue(); - const APInt &HighCase = Clusters[Last].High->getValue(); - assert(LowCase.getBitWidth() == HighCase.getBitWidth()); - - // FIXME: A range of consecutive cases has 100% density, but only requires one - // comparison to lower. We should discriminate against such consecutive ranges - // in jump tables. - - return (HighCase - LowCase).getLimitedValue((UINT64_MAX - 1) / 100) + 1; -} - -uint64_t SelectionDAGBuilder::getJumpTableNumCases( - const SmallVectorImpl<unsigned> &TotalCases, unsigned First, - unsigned Last) const { - assert(Last >= First); - assert(TotalCases[Last] >= TotalCases[First]); - uint64_t NumCases = - TotalCases[Last] - (First == 0 ? 0 : TotalCases[First - 1]); - return NumCases; -} - -bool SelectionDAGBuilder::buildJumpTable(const CaseClusterVector &Clusters, - unsigned First, unsigned Last, - const SwitchInst *SI, - MachineBasicBlock *DefaultMBB, - CaseCluster &JTCluster) { - assert(First <= Last); - - auto Prob = BranchProbability::getZero(); - unsigned NumCmps = 0; - std::vector<MachineBasicBlock*> Table; - DenseMap<MachineBasicBlock*, BranchProbability> JTProbs; - - // Initialize probabilities in JTProbs. - for (unsigned I = First; I <= Last; ++I) - JTProbs[Clusters[I].MBB] = BranchProbability::getZero(); - - for (unsigned I = First; I <= Last; ++I) { - assert(Clusters[I].Kind == CC_Range); - Prob += Clusters[I].Prob; - const APInt &Low = Clusters[I].Low->getValue(); - const APInt &High = Clusters[I].High->getValue(); - NumCmps += (Low == High) ? 1 : 2; - if (I != First) { - // Fill the gap between this and the previous cluster. - const APInt &PreviousHigh = Clusters[I - 1].High->getValue(); - assert(PreviousHigh.slt(Low)); - uint64_t Gap = (Low - PreviousHigh).getLimitedValue() - 1; - for (uint64_t J = 0; J < Gap; J++) - Table.push_back(DefaultMBB); - } - uint64_t ClusterSize = (High - Low).getLimitedValue() + 1; - for (uint64_t J = 0; J < ClusterSize; ++J) - Table.push_back(Clusters[I].MBB); - JTProbs[Clusters[I].MBB] += Clusters[I].Prob; - } - - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - unsigned NumDests = JTProbs.size(); - if (TLI.isSuitableForBitTests( - NumDests, NumCmps, Clusters[First].Low->getValue(), - Clusters[Last].High->getValue(), DAG.getDataLayout())) { - // Clusters[First..Last] should be lowered as bit tests instead. - return false; - } - - // Create the MBB that will load from and jump through the table. - // Note: We create it here, but it's not inserted into the function yet. - MachineFunction *CurMF = FuncInfo.MF; - MachineBasicBlock *JumpTableMBB = - CurMF->CreateMachineBasicBlock(SI->getParent()); - - // Add successors. Note: use table order for determinism. - SmallPtrSet<MachineBasicBlock *, 8> Done; - for (MachineBasicBlock *Succ : Table) { - if (Done.count(Succ)) - continue; - addSuccessorWithProb(JumpTableMBB, Succ, JTProbs[Succ]); - Done.insert(Succ); - } - JumpTableMBB->normalizeSuccProbs(); - - unsigned JTI = CurMF->getOrCreateJumpTableInfo(TLI.getJumpTableEncoding()) - ->createJumpTableIndex(Table); - - // Set up the jump table info. - JumpTable JT(-1U, JTI, JumpTableMBB, nullptr); - JumpTableHeader JTH(Clusters[First].Low->getValue(), - Clusters[Last].High->getValue(), SI->getCondition(), - nullptr, false); - JTCases.emplace_back(std::move(JTH), std::move(JT)); - - JTCluster = CaseCluster::jumpTable(Clusters[First].Low, Clusters[Last].High, - JTCases.size() - 1, Prob); - return true; -} - -void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, - const SwitchInst *SI, - MachineBasicBlock *DefaultMBB) { -#ifndef NDEBUG - // Clusters must be non-empty, sorted, and only contain Range clusters. - assert(!Clusters.empty()); - for (CaseCluster &C : Clusters) - assert(C.Kind == CC_Range); - for (unsigned i = 1, e = Clusters.size(); i < e; ++i) - assert(Clusters[i - 1].High->getValue().slt(Clusters[i].Low->getValue())); -#endif - - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (!TLI.areJTsAllowed(SI->getParent()->getParent())) - return; - - const int64_t N = Clusters.size(); - const unsigned MinJumpTableEntries = TLI.getMinimumJumpTableEntries(); - const unsigned SmallNumberOfEntries = MinJumpTableEntries / 2; - - if (N < 2 || N < MinJumpTableEntries) - return; - - // TotalCases[i]: Total nbr of cases in Clusters[0..i]. - SmallVector<unsigned, 8> TotalCases(N); - for (unsigned i = 0; i < N; ++i) { - const APInt &Hi = Clusters[i].High->getValue(); - const APInt &Lo = Clusters[i].Low->getValue(); - TotalCases[i] = (Hi - Lo).getLimitedValue() + 1; - if (i != 0) - TotalCases[i] += TotalCases[i - 1]; - } - - // Cheap case: the whole range may be suitable for jump table. - uint64_t Range = getJumpTableRange(Clusters,0, N - 1); - uint64_t NumCases = getJumpTableNumCases(TotalCases, 0, N - 1); - assert(NumCases < UINT64_MAX / 100); - assert(Range >= NumCases); - if (TLI.isSuitableForJumpTable(SI, NumCases, Range)) { - CaseCluster JTCluster; - if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) { - Clusters[0] = JTCluster; - Clusters.resize(1); - return; - } - } - - // The algorithm below is not suitable for -O0. - if (TM.getOptLevel() == CodeGenOpt::None) - return; - - // Split Clusters into minimum number of dense partitions. The algorithm uses - // the same idea as Kannan & Proebsting "Correction to 'Producing Good Code - // for the Case Statement'" (1994), but builds the MinPartitions array in - // reverse order to make it easier to reconstruct the partitions in ascending - // order. In the choice between two optimal partitionings, it picks the one - // which yields more jump tables. - - // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1]. - SmallVector<unsigned, 8> MinPartitions(N); - // LastElement[i] is the last element of the partition starting at i. - SmallVector<unsigned, 8> LastElement(N); - // PartitionsScore[i] is used to break ties when choosing between two - // partitionings resulting in the same number of partitions. - SmallVector<unsigned, 8> PartitionsScore(N); - // For PartitionsScore, a small number of comparisons is considered as good as - // a jump table and a single comparison is considered better than a jump - // table. - enum PartitionScores : unsigned { - NoTable = 0, - Table = 1, - FewCases = 1, - SingleCase = 2 - }; - - // Base case: There is only one way to partition Clusters[N-1]. - MinPartitions[N - 1] = 1; - LastElement[N - 1] = N - 1; - PartitionsScore[N - 1] = PartitionScores::SingleCase; - - // Note: loop indexes are signed to avoid underflow. - for (int64_t i = N - 2; i >= 0; i--) { - // Find optimal partitioning of Clusters[i..N-1]. - // Baseline: Put Clusters[i] into a partition on its own. - MinPartitions[i] = MinPartitions[i + 1] + 1; - LastElement[i] = i; - PartitionsScore[i] = PartitionsScore[i + 1] + PartitionScores::SingleCase; - - // Search for a solution that results in fewer partitions. - for (int64_t j = N - 1; j > i; j--) { - // Try building a partition from Clusters[i..j]. - uint64_t Range = getJumpTableRange(Clusters, i, j); - uint64_t NumCases = getJumpTableNumCases(TotalCases, i, j); - assert(NumCases < UINT64_MAX / 100); - assert(Range >= NumCases); - if (TLI.isSuitableForJumpTable(SI, NumCases, Range)) { - unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]); - unsigned Score = j == N - 1 ? 0 : PartitionsScore[j + 1]; - int64_t NumEntries = j - i + 1; - - if (NumEntries == 1) - Score += PartitionScores::SingleCase; - else if (NumEntries <= SmallNumberOfEntries) - Score += PartitionScores::FewCases; - else if (NumEntries >= MinJumpTableEntries) - Score += PartitionScores::Table; - - // If this leads to fewer partitions, or to the same number of - // partitions with better score, it is a better partitioning. - if (NumPartitions < MinPartitions[i] || - (NumPartitions == MinPartitions[i] && Score > PartitionsScore[i])) { - MinPartitions[i] = NumPartitions; - LastElement[i] = j; - PartitionsScore[i] = Score; - } - } - } - } - - // Iterate over the partitions, replacing some with jump tables in-place. - unsigned DstIndex = 0; - for (unsigned First = 0, Last; First < N; First = Last + 1) { - Last = LastElement[First]; - assert(Last >= First); - assert(DstIndex <= First); - unsigned NumClusters = Last - First + 1; - - CaseCluster JTCluster; - if (NumClusters >= MinJumpTableEntries && - buildJumpTable(Clusters, First, Last, SI, DefaultMBB, JTCluster)) { - Clusters[DstIndex++] = JTCluster; - } else { - for (unsigned I = First; I <= Last; ++I) - std::memmove(&Clusters[DstIndex++], &Clusters[I], sizeof(Clusters[I])); - } - } - Clusters.resize(DstIndex); -} - -bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, - unsigned First, unsigned Last, - const SwitchInst *SI, - CaseCluster &BTCluster) { - assert(First <= Last); - if (First == Last) - return false; - - BitVector Dests(FuncInfo.MF->getNumBlockIDs()); - unsigned NumCmps = 0; - for (int64_t I = First; I <= Last; ++I) { - assert(Clusters[I].Kind == CC_Range); - Dests.set(Clusters[I].MBB->getNumber()); - NumCmps += (Clusters[I].Low == Clusters[I].High) ? 1 : 2; - } - unsigned NumDests = Dests.count(); - - APInt Low = Clusters[First].Low->getValue(); - APInt High = Clusters[Last].High->getValue(); - assert(Low.slt(High)); - - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - const DataLayout &DL = DAG.getDataLayout(); - if (!TLI.isSuitableForBitTests(NumDests, NumCmps, Low, High, DL)) - return false; - - APInt LowBound; - APInt CmpRange; - - const int BitWidth = TLI.getPointerTy(DL).getSizeInBits(); - assert(TLI.rangeFitsInWord(Low, High, DL) && - "Case range must fit in bit mask!"); - - // Check if the clusters cover a contiguous range such that no value in the - // range will jump to the default statement. - bool ContiguousRange = true; - for (int64_t I = First + 1; I <= Last; ++I) { - if (Clusters[I].Low->getValue() != Clusters[I - 1].High->getValue() + 1) { - ContiguousRange = false; - break; - } - } - - if (Low.isStrictlyPositive() && High.slt(BitWidth)) { - // Optimize the case where all the case values fit in a word without having - // to subtract minValue. In this case, we can optimize away the subtraction. - LowBound = APInt::getNullValue(Low.getBitWidth()); - CmpRange = High; - ContiguousRange = false; - } else { - LowBound = Low; - CmpRange = High - Low; - } - - CaseBitsVector CBV; - auto TotalProb = BranchProbability::getZero(); - for (unsigned i = First; i <= Last; ++i) { - // Find the CaseBits for this destination. - unsigned j; - for (j = 0; j < CBV.size(); ++j) - if (CBV[j].BB == Clusters[i].MBB) - break; - if (j == CBV.size()) - CBV.push_back( - CaseBits(0, Clusters[i].MBB, 0, BranchProbability::getZero())); - CaseBits *CB = &CBV[j]; - - // Update Mask, Bits and ExtraProb. - uint64_t Lo = (Clusters[i].Low->getValue() - LowBound).getZExtValue(); - uint64_t Hi = (Clusters[i].High->getValue() - LowBound).getZExtValue(); - assert(Hi >= Lo && Hi < 64 && "Invalid bit case!"); - CB->Mask |= (-1ULL >> (63 - (Hi - Lo))) << Lo; - CB->Bits += Hi - Lo + 1; - CB->ExtraProb += Clusters[i].Prob; - TotalProb += Clusters[i].Prob; - } - - BitTestInfo BTI; - llvm::sort(CBV, [](const CaseBits &a, const CaseBits &b) { - // Sort by probability first, number of bits second, bit mask third. - if (a.ExtraProb != b.ExtraProb) - return a.ExtraProb > b.ExtraProb; - if (a.Bits != b.Bits) - return a.Bits > b.Bits; - return a.Mask < b.Mask; - }); - - for (auto &CB : CBV) { - MachineBasicBlock *BitTestBB = - FuncInfo.MF->CreateMachineBasicBlock(SI->getParent()); - BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraProb)); - } - BitTestCases.emplace_back(std::move(LowBound), std::move(CmpRange), - SI->getCondition(), -1U, MVT::Other, false, - ContiguousRange, nullptr, nullptr, std::move(BTI), - TotalProb); - - BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High, - BitTestCases.size() - 1, TotalProb); - return true; -} - -void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters, - const SwitchInst *SI) { -// Partition Clusters into as few subsets as possible, where each subset has a -// range that fits in a machine word and has <= 3 unique destinations. - -#ifndef NDEBUG - // Clusters must be sorted and contain Range or JumpTable clusters. - assert(!Clusters.empty()); - assert(Clusters[0].Kind == CC_Range || Clusters[0].Kind == CC_JumpTable); - for (const CaseCluster &C : Clusters) - assert(C.Kind == CC_Range || C.Kind == CC_JumpTable); - for (unsigned i = 1; i < Clusters.size(); ++i) - assert(Clusters[i-1].High->getValue().slt(Clusters[i].Low->getValue())); -#endif - - // The algorithm below is not suitable for -O0. - if (TM.getOptLevel() == CodeGenOpt::None) - return; - - // If target does not have legal shift left, do not emit bit tests at all. - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - const DataLayout &DL = DAG.getDataLayout(); - - EVT PTy = TLI.getPointerTy(DL); - if (!TLI.isOperationLegal(ISD::SHL, PTy)) - return; - - int BitWidth = PTy.getSizeInBits(); - const int64_t N = Clusters.size(); - - // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1]. - SmallVector<unsigned, 8> MinPartitions(N); - // LastElement[i] is the last element of the partition starting at i. - SmallVector<unsigned, 8> LastElement(N); - - // FIXME: This might not be the best algorithm for finding bit test clusters. - - // Base case: There is only one way to partition Clusters[N-1]. - MinPartitions[N - 1] = 1; - LastElement[N - 1] = N - 1; - - // Note: loop indexes are signed to avoid underflow. - for (int64_t i = N - 2; i >= 0; --i) { - // Find optimal partitioning of Clusters[i..N-1]. - // Baseline: Put Clusters[i] into a partition on its own. - MinPartitions[i] = MinPartitions[i + 1] + 1; - LastElement[i] = i; - - // Search for a solution that results in fewer partitions. - // Note: the search is limited by BitWidth, reducing time complexity. - for (int64_t j = std::min(N - 1, i + BitWidth - 1); j > i; --j) { - // Try building a partition from Clusters[i..j]. - - // Check the range. - if (!TLI.rangeFitsInWord(Clusters[i].Low->getValue(), - Clusters[j].High->getValue(), DL)) - continue; - - // Check nbr of destinations and cluster types. - // FIXME: This works, but doesn't seem very efficient. - bool RangesOnly = true; - BitVector Dests(FuncInfo.MF->getNumBlockIDs()); - for (int64_t k = i; k <= j; k++) { - if (Clusters[k].Kind != CC_Range) { - RangesOnly = false; - break; - } - Dests.set(Clusters[k].MBB->getNumber()); - } - if (!RangesOnly || Dests.count() > 3) - break; - - // Check if it's a better partition. - unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]); - if (NumPartitions < MinPartitions[i]) { - // Found a better partition. - MinPartitions[i] = NumPartitions; - LastElement[i] = j; - } - } - } - - // Iterate over the partitions, replacing with bit-test clusters in-place. - unsigned DstIndex = 0; - for (unsigned First = 0, Last; First < N; First = Last + 1) { - Last = LastElement[First]; - assert(First <= Last); - assert(DstIndex <= First); - - CaseCluster BitTestCluster; - if (buildBitTests(Clusters, First, Last, SI, BitTestCluster)) { - Clusters[DstIndex++] = BitTestCluster; - } else { - size_t NumClusters = Last - First + 1; - std::memmove(&Clusters[DstIndex], &Clusters[First], - sizeof(Clusters[0]) * NumClusters); - DstIndex += NumClusters; - } - } - Clusters.resize(DstIndex); -} - void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, MachineBasicBlock *SwitchMBB, MachineBasicBlock *DefaultMBB) { @@ -9977,10 +10021,13 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, MachineBasicBlock *CurMBB = W.MBB; for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) { + bool FallthroughUnreachable = false; MachineBasicBlock *Fallthrough; if (I == W.LastCluster) { // For the last cluster, fall through to the default destination. Fallthrough = DefaultMBB; + FallthroughUnreachable = isa<UnreachableInst>( + DefaultMBB->getBasicBlock()->getFirstNonPHIOrDbg()); } else { Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock()); CurMF->insert(BBI, Fallthrough); @@ -9992,8 +10039,8 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, switch (I->Kind) { case CC_JumpTable: { // FIXME: Optimize away range check based on pivot comparisons. - JumpTableHeader *JTH = &JTCases[I->JTCasesIndex].first; - JumpTable *JT = &JTCases[I->JTCasesIndex].second; + JumpTableHeader *JTH = &SL->JTCases[I->JTCasesIndex].first; + SwitchCG::JumpTable *JT = &SL->JTCases[I->JTCasesIndex].second; // The jump block hasn't been inserted yet; insert it here. MachineBasicBlock *JumpMBB = JT->MBB; @@ -10017,7 +10064,13 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, } } - addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb); + if (FallthroughUnreachable) { + // Skip the range check if the fallthrough block is unreachable. + JTH->OmitRangeCheck = true; + } + + if (!JTH->OmitRangeCheck) + addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb); addSuccessorWithProb(CurMBB, JumpMBB, JumpProb); CurMBB->normalizeSuccProbs(); @@ -10034,8 +10087,10 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, break; } case CC_BitTests: { + // FIXME: If Fallthrough is unreachable, skip the range check. + // FIXME: Optimize away range check based on pivot comparisons. - BitTestBlock *BTB = &BitTestCases[I->BTCasesIndex]; + BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex]; // The bit test blocks haven't been inserted yet; insert them here. for (BitTestCase &BTC : BTB->Cases) @@ -10078,6 +10133,10 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, RHS = I->High; } + // If Fallthrough is unreachable, fold away the comparison. + if (FallthroughUnreachable) + CC = ISD::SETTRUE; + // The false probability is the sum of all unhandled cases. CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, getCurSDLoc(), I->Prob, UnhandledProbs); @@ -10085,7 +10144,7 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, if (CurMBB == SwitchMBB) visitSwitchCase(CB, SwitchMBB); else - SwitchCases.push_back(CB); + SL->SwitchCases.push_back(CB); break; } @@ -10236,7 +10295,7 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, if (W.MBB == SwitchMBB) visitSwitchCase(CB, SwitchMBB); else - SwitchCases.push_back(CB); + SL->SwitchCases.push_back(CB); } // Scale CaseProb after peeling a case with the probablity of PeeledCaseProb @@ -10265,7 +10324,7 @@ MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster( // Don't perform if there is only one cluster or optimizing for size. if (SwitchPeelThreshold > 100 || !FuncInfo.BPI || Clusters.size() < 2 || TM.getOptLevel() == CodeGenOpt::None || - SwitchMBB->getParent()->getFunction().optForMinSize()) + SwitchMBB->getParent()->getFunction().hasMinSize()) return SwitchMBB; BranchProbability TopCaseProb = BranchProbability(SwitchPeelThreshold, 100); @@ -10331,38 +10390,6 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { // if there are many clusters. sortAndRangeify(Clusters); - if (TM.getOptLevel() != CodeGenOpt::None) { - // Replace an unreachable default with the most popular destination. - // FIXME: Exploit unreachable default more aggressively. - bool UnreachableDefault = - isa<UnreachableInst>(SI.getDefaultDest()->getFirstNonPHIOrDbg()); - if (UnreachableDefault && !Clusters.empty()) { - DenseMap<const BasicBlock *, unsigned> Popularity; - unsigned MaxPop = 0; - const BasicBlock *MaxBB = nullptr; - for (auto I : SI.cases()) { - const BasicBlock *BB = I.getCaseSuccessor(); - if (++Popularity[BB] > MaxPop) { - MaxPop = Popularity[BB]; - MaxBB = BB; - } - } - // Set new default. - assert(MaxPop > 0 && MaxBB); - DefaultMBB = FuncInfo.MBBMap[MaxBB]; - - // Remove cases that were pointing to the destination that is now the - // default. - CaseClusterVector New; - New.reserve(Clusters.size()); - for (CaseCluster &CC : Clusters) { - if (CC.MBB != DefaultMBB) - New.push_back(CC); - } - Clusters = std::move(New); - } - } - // The branch probablity of the peeled case. BranchProbability PeeledCaseProb = BranchProbability::getZero(); MachineBasicBlock *PeeledSwitchMBB = @@ -10380,8 +10407,8 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { return; } - findJumpTables(Clusters, &SI, DefaultMBB); - findBitTestClusters(Clusters, &SI); + SL->findJumpTables(Clusters, &SI, DefaultMBB); + SL->findBitTestClusters(Clusters, &SI); LLVM_DEBUG({ dbgs() << "Case clusters: "; @@ -10420,7 +10447,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { unsigned NumClusters = W.LastCluster - W.FirstCluster + 1; if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None && - !DefaultMBB->getParent()->getFunction().optForMinSize()) { + !DefaultMBB->getParent()->getFunction().hasMinSize()) { // For optimized builds, lower large range as a balanced binary tree. splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB); continue; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 5f9cdb69daf7..0072e33f23b7 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -1,9 +1,8 @@ //===- SelectionDAGBuilder.h - Selection-DAG building -----------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -18,11 +17,13 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/SwitchLoweringUtils.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/CallSite.h" @@ -47,6 +48,7 @@ class AtomicRMWInst; class BasicBlock; class BranchInst; class CallInst; +class CallBrInst; class CatchPadInst; class CatchReturnInst; class CatchSwitchInst; @@ -76,6 +78,7 @@ class ResumeInst; class ReturnInst; class SDDbgValue; class StoreInst; +class SwiftErrorValueTracking; class SwitchInst; class TargetLibraryInfo; class TargetMachine; @@ -91,16 +94,16 @@ class Value; /// implementation that is parameterized by a TargetLowering object. /// class SelectionDAGBuilder { - /// CurInst - The current instruction being visited + /// The current instruction being visited. const Instruction *CurInst = nullptr; DenseMap<const Value*, SDValue> NodeMap; - /// UnusedArgNodeMap - Maps argument value for unused arguments. This is used + /// Maps argument value for unused arguments. This is used /// to preserve debug information for incoming arguments. DenseMap<const Value*, SDValue> UnusedArgNodeMap; - /// DanglingDebugInfo - Helper type for DanglingDebugInfoMap. + /// Helper type for DanglingDebugInfoMap. class DanglingDebugInfo { const DbgValueInst* DI = nullptr; DebugLoc dl; @@ -116,18 +119,17 @@ class SelectionDAGBuilder { unsigned getSDNodeOrder() { return SDNodeOrder; } }; - /// DanglingDebugInfoVector - Helper type for DanglingDebugInfoMap. + /// Helper type for DanglingDebugInfoMap. typedef std::vector<DanglingDebugInfo> DanglingDebugInfoVector; - /// DanglingDebugInfoMap - Keeps track of dbg_values for which we have not - /// yet seen the referent. We defer handling these until we do see it. - DenseMap<const Value*, DanglingDebugInfoVector> DanglingDebugInfoMap; + /// Keeps track of dbg_values for which we have not yet seen the referent. + /// We defer handling these until we do see it. + MapVector<const Value*, DanglingDebugInfoVector> DanglingDebugInfoMap; public: - /// PendingLoads - Loads are not emitted to the program immediately. We bunch - /// them up and then emit token factor nodes when possible. This allows us to - /// get simple disambiguation between loads without worrying about alias - /// analysis. + /// Loads are not emitted to the program immediately. We bunch them up and + /// then emit token factor nodes when possible. This allows us to get simple + /// disambiguation between loads without worrying about alias analysis. SmallVector<SDValue, 8> PendingLoads; /// State used while lowering a statepoint sequence (gc_statepoint, @@ -135,247 +137,37 @@ public: StatepointLoweringState StatepointLowering; private: - /// PendingExports - CopyToReg nodes that copy values to virtual registers - /// for export to other blocks need to be emitted before any terminator - /// instruction, but they have no other ordering requirements. We bunch them - /// up and the emit a single tokenfactor for them just before terminator - /// instructions. + /// CopyToReg nodes that copy values to virtual registers for export to other + /// blocks need to be emitted before any terminator instruction, but they have + /// no other ordering requirements. We bunch them up and the emit a single + /// tokenfactor for them just before terminator instructions. SmallVector<SDValue, 8> PendingExports; - /// SDNodeOrder - A unique monotonically increasing number used to order the - /// SDNodes we create. + /// A unique monotonically increasing number used to order the SDNodes we + /// create. unsigned SDNodeOrder; - enum CaseClusterKind { - /// A cluster of adjacent case labels with the same destination, or just one - /// case. - CC_Range, - /// A cluster of cases suitable for jump table lowering. - CC_JumpTable, - /// A cluster of cases suitable for bit test lowering. - CC_BitTests - }; - - /// A cluster of case labels. - struct CaseCluster { - CaseClusterKind Kind; - const ConstantInt *Low, *High; - union { - MachineBasicBlock *MBB; - unsigned JTCasesIndex; - unsigned BTCasesIndex; - }; - BranchProbability Prob; - - static CaseCluster range(const ConstantInt *Low, const ConstantInt *High, - MachineBasicBlock *MBB, BranchProbability Prob) { - CaseCluster C; - C.Kind = CC_Range; - C.Low = Low; - C.High = High; - C.MBB = MBB; - C.Prob = Prob; - return C; - } - - static CaseCluster jumpTable(const ConstantInt *Low, - const ConstantInt *High, unsigned JTCasesIndex, - BranchProbability Prob) { - CaseCluster C; - C.Kind = CC_JumpTable; - C.Low = Low; - C.High = High; - C.JTCasesIndex = JTCasesIndex; - C.Prob = Prob; - return C; - } - - static CaseCluster bitTests(const ConstantInt *Low, const ConstantInt *High, - unsigned BTCasesIndex, BranchProbability Prob) { - CaseCluster C; - C.Kind = CC_BitTests; - C.Low = Low; - C.High = High; - C.BTCasesIndex = BTCasesIndex; - C.Prob = Prob; - return C; - } - }; - - using CaseClusterVector = std::vector<CaseCluster>; - using CaseClusterIt = CaseClusterVector::iterator; - - struct CaseBits { - uint64_t Mask = 0; - MachineBasicBlock* BB = nullptr; - unsigned Bits = 0; - BranchProbability ExtraProb; - - CaseBits() = default; - CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits, - BranchProbability Prob): - Mask(mask), BB(bb), Bits(bits), ExtraProb(Prob) {} - }; - - using CaseBitsVector = std::vector<CaseBits>; - - /// Sort Clusters and merge adjacent cases. - void sortAndRangeify(CaseClusterVector &Clusters); - - /// CaseBlock - This structure is used to communicate between - /// SelectionDAGBuilder and SDISel for the code generation of additional basic - /// blocks needed by multi-case switch statements. - struct CaseBlock { - // CC - the condition code to use for the case block's setcc node - ISD::CondCode CC; - - // CmpLHS/CmpRHS/CmpMHS - The LHS/MHS/RHS of the comparison to emit. - // Emit by default LHS op RHS. MHS is used for range comparisons: - // If MHS is not null: (LHS <= MHS) and (MHS <= RHS). - const Value *CmpLHS, *CmpMHS, *CmpRHS; - - // TrueBB/FalseBB - the block to branch to if the setcc is true/false. - MachineBasicBlock *TrueBB, *FalseBB; - - // ThisBB - the block into which to emit the code for the setcc and branches - MachineBasicBlock *ThisBB; - - /// The debug location of the instruction this CaseBlock was - /// produced from. - SDLoc DL; - - // TrueProb/FalseProb - branch weights. - BranchProbability TrueProb, FalseProb; - - CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs, - const Value *cmpmiddle, MachineBasicBlock *truebb, - MachineBasicBlock *falsebb, MachineBasicBlock *me, - SDLoc dl, - BranchProbability trueprob = BranchProbability::getUnknown(), - BranchProbability falseprob = BranchProbability::getUnknown()) - : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs), - TrueBB(truebb), FalseBB(falsebb), ThisBB(me), DL(dl), - TrueProb(trueprob), FalseProb(falseprob) {} - }; - - struct JumpTable { - /// Reg - the virtual register containing the index of the jump table entry - //. to jump to. - unsigned Reg; - /// JTI - the JumpTableIndex for this jump table in the function. - unsigned JTI; - /// MBB - the MBB into which to emit the code for the indirect jump. - MachineBasicBlock *MBB; - /// Default - the MBB of the default bb, which is a successor of the range - /// check MBB. This is when updating PHI nodes in successors. - MachineBasicBlock *Default; - - JumpTable(unsigned R, unsigned J, MachineBasicBlock *M, - MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {} - }; - struct JumpTableHeader { - APInt First; - APInt Last; - const Value *SValue; - MachineBasicBlock *HeaderBB; - bool Emitted; - - JumpTableHeader(APInt F, APInt L, const Value *SV, MachineBasicBlock *H, - bool E = false) - : First(std::move(F)), Last(std::move(L)), SValue(SV), HeaderBB(H), - Emitted(E) {} - }; - using JumpTableBlock = std::pair<JumpTableHeader, JumpTable>; - - struct BitTestCase { - uint64_t Mask; - MachineBasicBlock *ThisBB; - MachineBasicBlock *TargetBB; - BranchProbability ExtraProb; - - BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr, - BranchProbability Prob): - Mask(M), ThisBB(T), TargetBB(Tr), ExtraProb(Prob) {} - }; - - using BitTestInfo = SmallVector<BitTestCase, 3>; - - struct BitTestBlock { - APInt First; - APInt Range; - const Value *SValue; - unsigned Reg; - MVT RegVT; - bool Emitted; - bool ContiguousRange; - MachineBasicBlock *Parent; - MachineBasicBlock *Default; - BitTestInfo Cases; - BranchProbability Prob; - BranchProbability DefaultProb; - - BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT, - bool E, bool CR, MachineBasicBlock *P, MachineBasicBlock *D, - BitTestInfo C, BranchProbability Pr) - : First(std::move(F)), Range(std::move(R)), SValue(SV), Reg(Rg), - RegVT(RgVT), Emitted(E), ContiguousRange(CR), Parent(P), Default(D), - Cases(std::move(C)), Prob(Pr) {} - }; - - /// Return the range of value in [First..Last]. - uint64_t getJumpTableRange(const CaseClusterVector &Clusters, unsigned First, - unsigned Last) const; - - /// Return the number of cases in [First..Last]. - uint64_t getJumpTableNumCases(const SmallVectorImpl<unsigned> &TotalCases, - unsigned First, unsigned Last) const; - - /// Build a jump table cluster from Clusters[First..Last]. Returns false if it - /// decides it's not a good idea. - bool buildJumpTable(const CaseClusterVector &Clusters, unsigned First, - unsigned Last, const SwitchInst *SI, - MachineBasicBlock *DefaultMBB, CaseCluster &JTCluster); - - /// Find clusters of cases suitable for jump table lowering. - void findJumpTables(CaseClusterVector &Clusters, const SwitchInst *SI, - MachineBasicBlock *DefaultMBB); - - /// Build a bit test cluster from Clusters[First..Last]. Returns false if it - /// decides it's not a good idea. - bool buildBitTests(CaseClusterVector &Clusters, unsigned First, unsigned Last, - const SwitchInst *SI, CaseCluster &BTCluster); - - /// Find clusters of cases suitable for bit test lowering. - void findBitTestClusters(CaseClusterVector &Clusters, const SwitchInst *SI); - - struct SwitchWorkListItem { - MachineBasicBlock *MBB; - CaseClusterIt FirstCluster; - CaseClusterIt LastCluster; - const ConstantInt *GE; - const ConstantInt *LT; - BranchProbability DefaultProb; - }; - using SwitchWorkList = SmallVector<SwitchWorkListItem, 4>; - /// Determine the rank by weight of CC in [First,Last]. If CC has more weight /// than each cluster in the range, its rank is 0. - static unsigned caseClusterRank(const CaseCluster &CC, CaseClusterIt First, - CaseClusterIt Last); + unsigned caseClusterRank(const SwitchCG::CaseCluster &CC, + SwitchCG::CaseClusterIt First, + SwitchCG::CaseClusterIt Last); /// Emit comparison and split W into two subtrees. - void splitWorkItem(SwitchWorkList &WorkList, const SwitchWorkListItem &W, - Value *Cond, MachineBasicBlock *SwitchMBB); + void splitWorkItem(SwitchCG::SwitchWorkList &WorkList, + const SwitchCG::SwitchWorkListItem &W, Value *Cond, + MachineBasicBlock *SwitchMBB); /// Lower W. - void lowerWorkItem(SwitchWorkListItem W, Value *Cond, + void lowerWorkItem(SwitchCG::SwitchWorkListItem W, Value *Cond, MachineBasicBlock *SwitchMBB, MachineBasicBlock *DefaultMBB); /// Peel the top probability case if it exceeds the threshold - MachineBasicBlock *peelDominantCaseCluster(const SwitchInst &SI, - CaseClusterVector &Clusters, - BranchProbability &PeeledCaseProb); + MachineBasicBlock * + peelDominantCaseCluster(const SwitchInst &SI, + SwitchCG::CaseClusterVector &Clusters, + BranchProbability &PeeledCaseProb); /// A class which encapsulates all of the information needed to generate a /// stack protector check and signals to isel via its state being initialized @@ -588,17 +380,22 @@ public: AliasAnalysis *AA = nullptr; const TargetLibraryInfo *LibInfo; - /// SwitchCases - Vector of CaseBlock structures used to communicate - /// SwitchInst code generation information. - std::vector<CaseBlock> SwitchCases; + class SDAGSwitchLowering : public SwitchCG::SwitchLowering { + public: + SDAGSwitchLowering(SelectionDAGBuilder *sdb, FunctionLoweringInfo &funcinfo) + : SwitchCG::SwitchLowering(funcinfo), SDB(sdb) {} + + virtual void addSuccessorWithProb( + MachineBasicBlock *Src, MachineBasicBlock *Dst, + BranchProbability Prob = BranchProbability::getUnknown()) override { + SDB->addSuccessorWithProb(Src, Dst, Prob); + } - /// JTCases - Vector of JumpTable structures used to communicate - /// SwitchInst code generation information. - std::vector<JumpTableBlock> JTCases; + private: + SelectionDAGBuilder *SDB; + }; - /// BitTestCases - Vector of BitTestBlock structures used to communicate - /// SwitchInst code generation information. - std::vector<BitTestBlock> BitTestCases; + std::unique_ptr<SDAGSwitchLowering> SL; /// A StackProtectorDescriptor structure used to communicate stack protector /// information in between SelectBasicBlock and FinishBasicBlock. @@ -608,27 +405,29 @@ public: // PHI nodes. DenseMap<const Constant *, unsigned> ConstantsOut; - /// FuncInfo - Information about the function as a whole. - /// + /// Information about the function as a whole. FunctionLoweringInfo &FuncInfo; - /// GFI - Garbage collection metadata for the function. + /// Information about the swifterror values used throughout the function. + SwiftErrorValueTracking &SwiftError; + + /// Garbage collection metadata for the function. GCFunctionInfo *GFI; - /// LPadToCallSiteMap - Map a landing pad to the call site indexes. + /// Map a landing pad to the call site indexes. DenseMap<MachineBasicBlock *, SmallVector<unsigned, 4>> LPadToCallSiteMap; - /// HasTailCall - This is set to true if a call in the current - /// block has been translated as a tail call. In this case, - /// no subsequent DAG nodes should be created. + /// This is set to true if a call in the current block has been translated as + /// a tail call. In this case, no subsequent DAG nodes should be created. bool HasTailCall = false; LLVMContext *Context; SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo, - CodeGenOpt::Level ol) - : SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), DAG(dag), - FuncInfo(funcinfo) {} + SwiftErrorValueTracking &swifterror, CodeGenOpt::Level ol) + : SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), DAG(dag), + SL(make_unique<SDAGSwitchLowering>(this, funcinfo)), FuncInfo(funcinfo), + SwiftError(swifterror) {} void init(GCFunctionInfo *gfi, AliasAnalysis *AA, const TargetLibraryInfo *li); @@ -670,20 +469,34 @@ public: void visit(unsigned Opcode, const User &I); - /// getCopyFromRegs - If there was virtual register allocated for the value V - /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise. + /// If there was virtual register allocated for the value V emit CopyFromReg + /// of the specified type Ty. Return empty SDValue() otherwise. SDValue getCopyFromRegs(const Value *V, Type *Ty); /// If we have dangling debug info that describes \p Variable, or an /// overlapping part of variable considering the \p Expr, then this method - /// weill drop that debug info as it isn't valid any longer. + /// will drop that debug info as it isn't valid any longer. void dropDanglingDebugInfo(const DILocalVariable *Variable, const DIExpression *Expr); - // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V, - // generate the debug data structures now that we've seen its definition. + /// If we saw an earlier dbg_value referring to V, generate the debug data + /// structures now that we've seen its definition. void resolveDanglingDebugInfo(const Value *V, SDValue Val); + /// For the given dangling debuginfo record, perform last-ditch efforts to + /// resolve the debuginfo to something that is represented in this DAG. If + /// this cannot be done, produce an Undef debug value record. + void salvageUnresolvedDbgValue(DanglingDebugInfo &DDI); + + /// For a given Value, attempt to create and record a SDDbgValue in the + /// SelectionDAG. + bool handleDebugValue(const Value *V, DILocalVariable *Var, + DIExpression *Expr, DebugLoc CurDL, + DebugLoc InstDL, unsigned Order); + + /// Evict any dangling debug information, attempting to salvage it first. + void resolveOrClearDbgInfo(); + SDValue getValue(const Value *V); bool findValue(const Value *V) const; @@ -720,7 +533,7 @@ public: MachineBasicBlock *SwitchBB, BranchProbability TProb, BranchProbability FProb, bool InvertCond); - bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases); + bool ShouldEmitAsBranches(const std::vector<SwitchCG::CaseBlock> &Cases); bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB); void CopyToExportRegsIfNeeded(const Value *V); void ExportFromCurrentBlock(const Value *V); @@ -733,7 +546,7 @@ public: SDValue Op); void populateCallLoweringInfo(TargetLowering::CallLoweringInfo &CLI, - ImmutableCallSite CS, unsigned ArgIdx, + const CallBase *Call, unsigned ArgIdx, unsigned NumArgs, SDValue Callee, Type *ReturnTy, bool IsPatchPoint); @@ -741,7 +554,7 @@ public: lowerInvokable(TargetLowering::CallLoweringInfo &CLI, const BasicBlock *EHPadBB = nullptr); - /// UpdateSplitBlock - When an MBB was split during scheduling, update the + /// When an MBB was split during scheduling, update the /// references that need to refer to the last resulting block. void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last); @@ -797,13 +610,13 @@ public: void LowerStatepoint(ImmutableStatepoint ISP, const BasicBlock *EHPadBB = nullptr); - void LowerCallSiteWithDeoptBundle(ImmutableCallSite CS, SDValue Callee, + void LowerCallSiteWithDeoptBundle(const CallBase *Call, SDValue Callee, const BasicBlock *EHPadBB); void LowerDeoptimizeCall(const CallInst *CI); void LowerDeoptimizingReturn(); - void LowerCallSiteWithDeoptBundleImpl(ImmutableCallSite CS, SDValue Callee, + void LowerCallSiteWithDeoptBundleImpl(const CallBase *Call, SDValue Callee, const BasicBlock *EHPadBB, bool VarArgDisallowed, bool ForceVoidReturnTy); @@ -833,25 +646,24 @@ private: BranchProbability Prob = BranchProbability::getUnknown()); public: - void visitSwitchCase(CaseBlock &CB, - MachineBasicBlock *SwitchBB); + void visitSwitchCase(SwitchCG::CaseBlock &CB, MachineBasicBlock *SwitchBB); void visitSPDescriptorParent(StackProtectorDescriptor &SPD, MachineBasicBlock *ParentBB); void visitSPDescriptorFailure(StackProtectorDescriptor &SPD); - void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB); - void visitBitTestCase(BitTestBlock &BB, - MachineBasicBlock* NextMBB, - BranchProbability BranchProbToNext, - unsigned Reg, - BitTestCase &B, - MachineBasicBlock *SwitchBB); - void visitJumpTable(JumpTable &JT); - void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH, + void visitBitTestHeader(SwitchCG::BitTestBlock &B, + MachineBasicBlock *SwitchBB); + void visitBitTestCase(SwitchCG::BitTestBlock &BB, MachineBasicBlock *NextMBB, + BranchProbability BranchProbToNext, unsigned Reg, + SwitchCG::BitTestCase &B, MachineBasicBlock *SwitchBB); + void visitJumpTable(SwitchCG::JumpTable &JT); + void visitJumpTableHeader(SwitchCG::JumpTable &JT, + SwitchCG::JumpTableHeader &JTH, MachineBasicBlock *SwitchBB); private: // These all get lowered before this pass. void visitInvoke(const InvokeInst &I); + void visitCallBr(const CallBrInst &I); void visitResume(const ResumeInst &I); void visitUnary(const User &I, unsigned Opcode); @@ -932,7 +744,7 @@ private: void visitStoreToSwiftError(const StoreInst &I); void visitInlineAsm(ImmutableCallSite CS); - const char *visitIntrinsicCall(const CallInst &I, unsigned Intrinsic); + void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic); void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic); void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI); @@ -982,9 +794,12 @@ private: SDDbgValue *getDbgValue(SDValue N, DILocalVariable *Variable, DIExpression *Expr, const DebugLoc &dl, unsigned DbgSDNodeOrder); + + /// Lowers CallInst to an external symbol. + void lowerCallToExternalSymbol(const CallInst &I, const char *FunctionName); }; -/// RegsForValue - This struct represents the registers (physical or virtual) +/// This struct represents the registers (physical or virtual) /// that a particular set of values is assigned, and the type information about /// the value. The most common situation is to represent one value at a time, /// but struct or array values are handled element-wise as multiple values. The diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 43df2abb674b..da3049881d31 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -1,9 +1,8 @@ //===- SelectionDAGDumper.cpp - Implement SelectionDAG::dump() ------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -96,6 +95,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax"; case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin"; case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax"; + case ISD::ATOMIC_LOAD_FADD: return "AtomicLoadFAdd"; case ISD::ATOMIC_LOAD: return "AtomicLoad"; case ISD::ATOMIC_STORE: return "AtomicStore"; case ISD::PCMARKER: return "PCMarker"; @@ -145,6 +145,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue(); if (IID < Intrinsic::num_intrinsics) return Intrinsic::getName((Intrinsic::ID)IID, None); + else if (!G) + return "Unknown intrinsic"; else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo()) return TII->getName(IID); llvm_unreachable("Invalid intrinsic ID"); @@ -170,7 +172,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::UNDEF: return "undef"; case ISD::MERGE_VALUES: return "merge_values"; case ISD::INLINEASM: return "inlineasm"; + case ISD::INLINEASM_BR: return "inlineasm_br"; case ISD::EH_LABEL: return "eh_label"; + case ISD::ANNOTATION_LABEL: return "annotation_label"; case ISD::HANDLENODE: return "handlenode"; // Unary operators @@ -297,7 +301,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::UADDSAT: return "uaddsat"; case ISD::SSUBSAT: return "ssubsat"; case ISD::USUBSAT: return "usubsat"; + case ISD::SMULFIX: return "smulfix"; + case ISD::SMULFIXSAT: return "smulfixsat"; + case ISD::UMULFIX: return "umulfix"; // Conversion operators. case ISD::SIGN_EXTEND: return "sign_extend"; @@ -309,9 +316,11 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::ZERO_EXTEND_VECTOR_INREG: return "zero_extend_vector_inreg"; case ISD::TRUNCATE: return "truncate"; case ISD::FP_ROUND: return "fp_round"; + case ISD::STRICT_FP_ROUND: return "strict_fp_round"; case ISD::FLT_ROUNDS_: return "flt_rounds"; case ISD::FP_ROUND_INREG: return "fp_round_inreg"; case ISD::FP_EXTEND: return "fp_extend"; + case ISD::STRICT_FP_EXTEND: return "strict_fp_extend"; case ISD::SINT_TO_FP: return "sint_to_fp"; case ISD::UINT_TO_FP: return "uint_to_fp"; @@ -321,6 +330,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::ADDRSPACECAST: return "addrspacecast"; case ISD::FP16_TO_FP: return "fp16_to_fp"; case ISD::FP_TO_FP16: return "fp_to_fp16"; + case ISD::LROUND: return "lround"; + case ISD::LLROUND: return "llround"; + case ISD::LRINT: return "lrint"; + case ISD::LLRINT: return "llrint"; // Control flow instructions case ISD::BR: return "br"; @@ -650,6 +663,36 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << ", " << AM; OS << ">"; + } else if (const MaskedLoadSDNode *MLd = dyn_cast<MaskedLoadSDNode>(this)) { + OS << "<"; + + printMemOperand(OS, *MLd->getMemOperand(), G); + + bool doExt = true; + switch (MLd->getExtensionType()) { + default: doExt = false; break; + case ISD::EXTLOAD: OS << ", anyext"; break; + case ISD::SEXTLOAD: OS << ", sext"; break; + case ISD::ZEXTLOAD: OS << ", zext"; break; + } + if (doExt) + OS << " from " << MLd->getMemoryVT().getEVTString(); + + if (MLd->isExpandingLoad()) + OS << ", expanding"; + + OS << ">"; + } else if (const MaskedStoreSDNode *MSt = dyn_cast<MaskedStoreSDNode>(this)) { + OS << "<"; + printMemOperand(OS, *MSt->getMemOperand(), G); + + if (MSt->isTruncatingStore()) + OS << ", trunc to " << MSt->getMemoryVT().getEVTString(); + + if (MSt->isCompressingStore()) + OS << ", compressing"; + + OS << ">"; } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) { OS << "<"; printMemOperand(OS, *M->getMemOperand(), G); @@ -675,6 +718,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { << " -> " << ASC->getDestAddressSpace() << ']'; + } else if (const LifetimeSDNode *LN = dyn_cast<LifetimeSDNode>(this)) { + if (LN->hasOffset()) + OS << "<" << LN->getOffset() << " to " << LN->getOffset() + LN->getSize() << ">"; } if (VerboseDAGDumping) { @@ -684,45 +730,63 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (getNodeId() != -1) OS << " [ID=" << getNodeId() << ']'; if (!(isa<ConstantSDNode>(this) || (isa<ConstantFPSDNode>(this)))) - OS << "# D:" << isDivergent(); - - if (!G) - return; - - DILocation *L = getDebugLoc(); - if (!L) - return; - - if (auto *Scope = L->getScope()) - OS << Scope->getFilename(); - else - OS << "<unknown>"; - OS << ':' << L->getLine(); - if (unsigned C = L->getColumn()) - OS << ':' << C; - - for (SDDbgValue *Dbg : G->GetDbgValues(this)) { - if (Dbg->getKind() != SDDbgValue::SDNODE || Dbg->isInvalidated()) - continue; - Dbg->dump(OS); - } + OS << " # D:" << isDivergent(); + + if (G && !G->GetDbgValues(this).empty()) { + OS << " [NoOfDbgValues=" << G->GetDbgValues(this).size() << ']'; + for (SDDbgValue *Dbg : G->GetDbgValues(this)) + if (!Dbg->isInvalidated()) + Dbg->print(OS); + } else if (getHasDebugValue()) + OS << " [NoOfDbgValues>0]"; } } -LLVM_DUMP_METHOD void SDDbgValue::dump(raw_ostream &OS) const { - OS << " DbgVal"; - if (kind==SDNODE) - OS << '(' << u.s.ResNo << ')'; - OS << ":\"" << Var->getName() << '"'; +LLVM_DUMP_METHOD void SDDbgValue::print(raw_ostream &OS) const { + OS << " DbgVal(Order=" << getOrder() << ')'; + if (isInvalidated()) OS << "(Invalidated)"; + if (isEmitted()) OS << "(Emitted)"; + switch (getKind()) { + case SDNODE: + if (getSDNode()) + OS << "(SDNODE=" << PrintNodeId(*getSDNode()) << ':' << getResNo() << ')'; + else + OS << "(SDNODE)"; + break; + case CONST: + OS << "(CONST)"; + break; + case FRAMEIX: + OS << "(FRAMEIX=" << getFrameIx() << ')'; + break; + case VREG: + OS << "(VREG=" << getVReg() << ')'; + break; + } + if (isIndirect()) OS << "(Indirect)"; + OS << ":\"" << Var->getName() << '"'; #ifndef NDEBUG - if (Expr->getNumElements()) - Expr->dump(); + if (Expr->getNumElements()) + Expr->dump(); #endif } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void SDDbgValue::dump() const { + if (isInvalidated()) + return; + print(dbgs()); + dbgs() << "\n"; +} +#endif + /// Return true if this node is so simple that we should just print it inline /// if it appears as an operand. -static bool shouldPrintInline(const SDNode &Node) { +static bool shouldPrintInline(const SDNode &Node, const SelectionDAG *G) { + // Avoid lots of cluttering when inline printing nodes with associated + // DbgValues in verbose mode. + if (VerboseDAGDumping && G && !G->GetDbgValues(&Node).empty()) + return false; if (Node.getOpcode() == ISD::EntryToken) return false; return Node.getNumOperands() == 0; @@ -731,7 +795,7 @@ static bool shouldPrintInline(const SDNode &Node) { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { for (const SDValue &Op : N->op_values()) { - if (shouldPrintInline(*Op.getNode())) + if (shouldPrintInline(*Op.getNode(), G)) continue; if (Op.getNode()->hasOneUse()) DumpNodes(Op.getNode(), indent+2, G); @@ -748,12 +812,24 @@ LLVM_DUMP_METHOD void SelectionDAG::dump() const { I != E; ++I) { const SDNode *N = &*I; if (!N->hasOneUse() && N != getRoot().getNode() && - (!shouldPrintInline(*N) || N->use_empty())) + (!shouldPrintInline(*N, this) || N->use_empty())) DumpNodes(N, 2, this); } if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this); - dbgs() << "\n\n"; + dbgs() << "\n"; + + if (VerboseDAGDumping) { + if (DbgBegin() != DbgEnd()) + dbgs() << "SDDbgValues:\n"; + for (auto *Dbg : make_range(DbgBegin(), DbgEnd())) + Dbg->dump(); + if (ByvalParmDbgBegin() != ByvalParmDbgEnd()) + dbgs() << "Byval SDDbgValues:\n"; + for (auto *Dbg : make_range(ByvalParmDbgBegin(), ByvalParmDbgEnd())) + Dbg->dump(); + } + dbgs() << "\n"; } #endif @@ -769,7 +845,7 @@ static bool printOperand(raw_ostream &OS, const SelectionDAG *G, if (!Value.getNode()) { OS << "<null>"; return false; - } else if (shouldPrintInline(*Value.getNode())) { + } else if (shouldPrintInline(*Value.getNode(), G)) { OS << Value->getOperationName(G) << ':'; Value->print_types(OS, G); Value->print_details(OS, G); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index af5c2433fa2f..bdf9f2c166e1 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -1,9 +1,8 @@ //===- SelectionDAGISel.cpp - Implement the SelectionDAGISel class --------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -42,6 +41,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachinePassRegistry.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -49,6 +49,7 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/StackProtector.h" +#include "llvm/CodeGen/SwiftErrorValueTracking.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -63,6 +64,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -306,8 +308,9 @@ SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL) : MachineFunctionPass(ID), TM(tm), FuncInfo(new FunctionLoweringInfo()), + SwiftError(new SwiftErrorValueTracking()), CurDAG(new SelectionDAG(tm, OL)), - SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)), + SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, *SwiftError, OL)), AA(), GFI(), OptLevel(OL), DAGSize(0) { @@ -323,6 +326,7 @@ SelectionDAGISel::~SelectionDAGISel() { delete SDB; delete CurDAG; delete FuncInfo; + delete SwiftError; } void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { @@ -379,6 +383,30 @@ static void SplitCriticalSideEffectEdges(Function &Fn, DominatorTree *DT, } } +static void computeUsesMSVCFloatingPoint(const Triple &TT, const Function &F, + MachineModuleInfo &MMI) { + // Only needed for MSVC + if (!TT.isWindowsMSVCEnvironment()) + return; + + // If it's already set, nothing to do. + if (MMI.usesMSVCFloatingPoint()) + return; + + for (const Instruction &I : instructions(F)) { + if (I.getType()->isFPOrFPVectorTy()) { + MMI.setUsesMSVCFloatingPoint(true); + return; + } + for (const auto &Op : I.operands()) { + if (Op->getType()->isFPOrFPVectorTy()) { + MMI.setUsesMSVCFloatingPoint(true); + return; + } + } + } +} + bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // If we already selected that function, we do not need to run SDISel. if (mf.getProperties().hasProperty( @@ -421,6 +449,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { CurDAG->init(*MF, *ORE, this, LibInfo, getAnalysisIfAvailable<LegacyDivergenceAnalysis>()); FuncInfo->set(Fn, *MF, CurDAG); + SwiftError->setFunction(*MF); // Now get the optional analyzes if we want to. // This is based on the possibly changed OptLevel (after optnone is taken @@ -474,6 +503,40 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { Fn.getContext().diagnose(DiagFallback); } + // Replace forward-declared registers with the registers containing + // the desired value. + // Note: it is important that this happens **before** the call to + // EmitLiveInCopies, since implementations can skip copies of unused + // registers. If we don't apply the reg fixups before, some registers may + // appear as unused and will be skipped, resulting in bad MI. + MachineRegisterInfo &MRI = MF->getRegInfo(); + for (DenseMap<unsigned, unsigned>::iterator I = FuncInfo->RegFixups.begin(), + E = FuncInfo->RegFixups.end(); + I != E; ++I) { + unsigned From = I->first; + unsigned To = I->second; + // If To is also scheduled to be replaced, find what its ultimate + // replacement is. + while (true) { + DenseMap<unsigned, unsigned>::iterator J = FuncInfo->RegFixups.find(To); + if (J == E) + break; + To = J->second; + } + // Make sure the new register has a sufficiently constrained register class. + if (TargetRegisterInfo::isVirtualRegister(From) && + TargetRegisterInfo::isVirtualRegister(To)) + MRI.constrainRegClass(To, MRI.getRegClass(From)); + // Replace it. + + // Replacing one register with another won't touch the kill flags. + // We need to conservatively clear the kill flags as a kill on the old + // register might dominate existing uses of the new register. + if (!MRI.use_empty(To)) + MRI.clearKillFlags(From); + MRI.replaceRegWith(From, To); + } + // If the first basic block in the function has live ins that need to be // copied into vregs, emit the copies into the top of the block before // emitting the code for the block. @@ -507,7 +570,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) { MachineInstr *MI = FuncInfo->ArgDbgValues[e-i-1]; bool hasFI = MI->getOperand(0).isFI(); - unsigned Reg = + Register Reg = hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) EntryMBB->insert(EntryMBB->begin(), MI); @@ -590,9 +653,11 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // Determine if there is a call to setjmp in the machine function. MF->setExposesReturnsTwice(Fn.callsFunctionThatReturnsTwice()); + // Determine if floating point is used for msvc + computeUsesMSVCFloatingPoint(TM.getTargetTriple(), Fn, MF->getMMI()); + // Replace forward-declared registers with the registers containing // the desired value. - MachineRegisterInfo &MRI = MF->getRegInfo(); for (DenseMap<unsigned, unsigned>::iterator I = FuncInfo->RegFixups.begin(), E = FuncInfo->RegFixups.end(); I != E; ++I) { @@ -663,6 +728,7 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, // Make sure the root of the DAG is up-to-date. CurDAG->setRoot(SDB->getControlRoot()); HadTailCall = SDB->HasTailCall; + SDB->resolveOrClearDbgInfo(); SDB->clear(); // Final step, emit the lowered DAG as machine code. @@ -713,8 +779,6 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { StringRef GroupName = "sdag"; StringRef GroupDescription = "Instruction Selection and Scheduling"; std::string BlockName; - int BlockNumber = -1; - (void)BlockNumber; bool MatchFilterBB = false; (void)MatchFilterBB; #ifndef NDEBUG TargetTransformInfo &TTI = @@ -735,7 +799,6 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { ViewSUnitDAGs) #endif { - BlockNumber = FuncInfo->MBB->getNumber(); BlockName = (MF->getName() + ":" + FuncInfo->MBB->getBasicBlock()->getName()).str(); } @@ -1092,16 +1155,14 @@ void SelectionDAGISel::DoInstructionSelection() { #endif // When we are using non-default rounding modes or FP exception behavior - // FP operations are represented by StrictFP pseudo-operations. They - // need to be simplified here so that the target-specific instruction - // selectors know how to handle them. - // - // If the current node is a strict FP pseudo-op, the isStrictFPOp() - // function will provide the corresponding normal FP opcode to which the - // node should be mutated. - // - // FIXME: The backends need a way to handle FP constraints. - if (Node->isStrictFPOpcode()) + // FP operations are represented by StrictFP pseudo-operations. For + // targets that do not (yet) understand strict FP operations directly, + // we convert them to normal FP opcodes instead at this point. This + // will allow them to be handled by existing target-specific instruction + // selectors. + if (Node->isStrictFPOpcode() && + (TLI->getOperationAction(Node->getOpcode(), Node->getValueType(0)) + != TargetLowering::Legal)) Node = CurDAG->mutateStrictFPToFP(Node); LLVM_DEBUG(dbgs() << "\nISEL: Starting selection on root node: "; @@ -1228,77 +1289,6 @@ static bool isFoldedOrDeadInstruction(const Instruction *I, !FuncInfo->isExportedInst(I); // Exported instrs must be computed. } -/// Set up SwiftErrorVals by going through the function. If the function has -/// swifterror argument, it will be the first entry. -static void setupSwiftErrorVals(const Function &Fn, const TargetLowering *TLI, - FunctionLoweringInfo *FuncInfo) { - if (!TLI->supportSwiftError()) - return; - - FuncInfo->SwiftErrorVals.clear(); - FuncInfo->SwiftErrorVRegDefMap.clear(); - FuncInfo->SwiftErrorVRegUpwardsUse.clear(); - FuncInfo->SwiftErrorVRegDefUses.clear(); - FuncInfo->SwiftErrorArg = nullptr; - - // Check if function has a swifterror argument. - bool HaveSeenSwiftErrorArg = false; - for (Function::const_arg_iterator AI = Fn.arg_begin(), AE = Fn.arg_end(); - AI != AE; ++AI) - if (AI->hasSwiftErrorAttr()) { - assert(!HaveSeenSwiftErrorArg && - "Must have only one swifterror parameter"); - (void)HaveSeenSwiftErrorArg; // silence warning. - HaveSeenSwiftErrorArg = true; - FuncInfo->SwiftErrorArg = &*AI; - FuncInfo->SwiftErrorVals.push_back(&*AI); - } - - for (const auto &LLVMBB : Fn) - for (const auto &Inst : LLVMBB) { - if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(&Inst)) - if (Alloca->isSwiftError()) - FuncInfo->SwiftErrorVals.push_back(Alloca); - } -} - -static void createSwiftErrorEntriesInEntryBlock(FunctionLoweringInfo *FuncInfo, - FastISel *FastIS, - const TargetLowering *TLI, - const TargetInstrInfo *TII, - SelectionDAGBuilder *SDB) { - if (!TLI->supportSwiftError()) - return; - - // We only need to do this when we have swifterror parameter or swifterror - // alloc. - if (FuncInfo->SwiftErrorVals.empty()) - return; - - assert(FuncInfo->MBB == &*FuncInfo->MF->begin() && - "expected to insert into entry block"); - auto &DL = FuncInfo->MF->getDataLayout(); - auto const *RC = TLI->getRegClassFor(TLI->getPointerTy(DL)); - for (const auto *SwiftErrorVal : FuncInfo->SwiftErrorVals) { - // We will always generate a copy from the argument. It is always used at - // least by the 'return' of the swifterror. - if (FuncInfo->SwiftErrorArg && FuncInfo->SwiftErrorArg == SwiftErrorVal) - continue; - unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC); - // Assign Undef to Vreg. We construct MI directly to make sure it works - // with FastISel. - BuildMI(*FuncInfo->MBB, FuncInfo->MBB->getFirstNonPHI(), - SDB->getCurDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF), - VReg); - - // Keep FastIS informed about the value we just inserted. - if (FastIS) - FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt)); - - FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorVal, VReg); - } -} - /// Collect llvm.dbg.declare information. This is done after argument lowering /// in case the declarations refer to arguments. static void processDbgDeclares(FunctionLoweringInfo *FuncInfo) { @@ -1337,202 +1327,13 @@ static void processDbgDeclares(FunctionLoweringInfo *FuncInfo) { DIExpression *Expr = DI->getExpression(); if (Offset.getBoolValue()) - Expr = DIExpression::prepend(Expr, DIExpression::NoDeref, + Expr = DIExpression::prepend(Expr, DIExpression::ApplyOffset, Offset.getZExtValue()); MF->setVariableDbgInfo(DI->getVariable(), Expr, FI, DI->getDebugLoc()); } } } -/// Propagate swifterror values through the machine function CFG. -static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) { - auto *TLI = FuncInfo->TLI; - if (!TLI->supportSwiftError()) - return; - - // We only need to do this when we have swifterror parameter or swifterror - // alloc. - if (FuncInfo->SwiftErrorVals.empty()) - return; - - // For each machine basic block in reverse post order. - ReversePostOrderTraversal<MachineFunction *> RPOT(FuncInfo->MF); - for (MachineBasicBlock *MBB : RPOT) { - // For each swifterror value in the function. - for(const auto *SwiftErrorVal : FuncInfo->SwiftErrorVals) { - auto Key = std::make_pair(MBB, SwiftErrorVal); - auto UUseIt = FuncInfo->SwiftErrorVRegUpwardsUse.find(Key); - auto VRegDefIt = FuncInfo->SwiftErrorVRegDefMap.find(Key); - bool UpwardsUse = UUseIt != FuncInfo->SwiftErrorVRegUpwardsUse.end(); - unsigned UUseVReg = UpwardsUse ? UUseIt->second : 0; - bool DownwardDef = VRegDefIt != FuncInfo->SwiftErrorVRegDefMap.end(); - assert(!(UpwardsUse && !DownwardDef) && - "We can't have an upwards use but no downwards def"); - - // If there is no upwards exposed use and an entry for the swifterror in - // the def map for this value we don't need to do anything: We already - // have a downward def for this basic block. - if (!UpwardsUse && DownwardDef) - continue; - - // Otherwise we either have an upwards exposed use vreg that we need to - // materialize or need to forward the downward def from predecessors. - - // Check whether we have a single vreg def from all predecessors. - // Otherwise we need a phi. - SmallVector<std::pair<MachineBasicBlock *, unsigned>, 4> VRegs; - SmallSet<const MachineBasicBlock*, 8> Visited; - for (auto *Pred : MBB->predecessors()) { - if (!Visited.insert(Pred).second) - continue; - VRegs.push_back(std::make_pair( - Pred, FuncInfo->getOrCreateSwiftErrorVReg(Pred, SwiftErrorVal))); - if (Pred != MBB) - continue; - // We have a self-edge. - // If there was no upwards use in this basic block there is now one: the - // phi needs to use it self. - if (!UpwardsUse) { - UpwardsUse = true; - UUseIt = FuncInfo->SwiftErrorVRegUpwardsUse.find(Key); - assert(UUseIt != FuncInfo->SwiftErrorVRegUpwardsUse.end()); - UUseVReg = UUseIt->second; - } - } - - // We need a phi node if we have more than one predecessor with different - // downward defs. - bool needPHI = - VRegs.size() >= 1 && - std::find_if( - VRegs.begin(), VRegs.end(), - [&](const std::pair<const MachineBasicBlock *, unsigned> &V) - -> bool { return V.second != VRegs[0].second; }) != - VRegs.end(); - - // If there is no upwards exposed used and we don't need a phi just - // forward the swifterror vreg from the predecessor(s). - if (!UpwardsUse && !needPHI) { - assert(!VRegs.empty() && - "No predecessors? The entry block should bail out earlier"); - // Just forward the swifterror vreg from the predecessor(s). - FuncInfo->setCurrentSwiftErrorVReg(MBB, SwiftErrorVal, VRegs[0].second); - continue; - } - - auto DLoc = isa<Instruction>(SwiftErrorVal) - ? cast<Instruction>(SwiftErrorVal)->getDebugLoc() - : DebugLoc(); - const auto *TII = FuncInfo->MF->getSubtarget().getInstrInfo(); - - // If we don't need a phi create a copy to the upward exposed vreg. - if (!needPHI) { - assert(UpwardsUse); - assert(!VRegs.empty() && - "No predecessors? Is the Calling Convention correct?"); - unsigned DestReg = UUseVReg; - BuildMI(*MBB, MBB->getFirstNonPHI(), DLoc, TII->get(TargetOpcode::COPY), - DestReg) - .addReg(VRegs[0].second); - continue; - } - - // We need a phi: if there is an upwards exposed use we already have a - // destination virtual register number otherwise we generate a new one. - auto &DL = FuncInfo->MF->getDataLayout(); - auto const *RC = TLI->getRegClassFor(TLI->getPointerTy(DL)); - unsigned PHIVReg = - UpwardsUse ? UUseVReg - : FuncInfo->MF->getRegInfo().createVirtualRegister(RC); - MachineInstrBuilder SwiftErrorPHI = - BuildMI(*MBB, MBB->getFirstNonPHI(), DLoc, - TII->get(TargetOpcode::PHI), PHIVReg); - for (auto BBRegPair : VRegs) { - SwiftErrorPHI.addReg(BBRegPair.second).addMBB(BBRegPair.first); - } - - // We did not have a definition in this block before: store the phi's vreg - // as this block downward exposed def. - if (!UpwardsUse) - FuncInfo->setCurrentSwiftErrorVReg(MBB, SwiftErrorVal, PHIVReg); - } - } -} - -static void preassignSwiftErrorRegs(const TargetLowering *TLI, - FunctionLoweringInfo *FuncInfo, - BasicBlock::const_iterator Begin, - BasicBlock::const_iterator End) { - if (!TLI->supportSwiftError() || FuncInfo->SwiftErrorVals.empty()) - return; - - // Iterator over instructions and assign vregs to swifterror defs and uses. - for (auto It = Begin; It != End; ++It) { - ImmutableCallSite CS(&*It); - if (CS) { - // A call-site with a swifterror argument is both use and def. - const Value *SwiftErrorAddr = nullptr; - for (auto &Arg : CS.args()) { - if (!Arg->isSwiftError()) - continue; - // Use of swifterror. - assert(!SwiftErrorAddr && "Cannot have multiple swifterror arguments"); - SwiftErrorAddr = &*Arg; - assert(SwiftErrorAddr->isSwiftError() && - "Must have a swifterror value argument"); - unsigned VReg; bool CreatedReg; - std::tie(VReg, CreatedReg) = FuncInfo->getOrCreateSwiftErrorVRegUseAt( - &*It, FuncInfo->MBB, SwiftErrorAddr); - assert(CreatedReg); - } - if (!SwiftErrorAddr) - continue; - - // Def of swifterror. - unsigned VReg; bool CreatedReg; - std::tie(VReg, CreatedReg) = - FuncInfo->getOrCreateSwiftErrorVRegDefAt(&*It); - assert(CreatedReg); - FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorAddr, VReg); - - // A load is a use. - } else if (const LoadInst *LI = dyn_cast<const LoadInst>(&*It)) { - const Value *V = LI->getOperand(0); - if (!V->isSwiftError()) - continue; - - unsigned VReg; bool CreatedReg; - std::tie(VReg, CreatedReg) = - FuncInfo->getOrCreateSwiftErrorVRegUseAt(LI, FuncInfo->MBB, V); - assert(CreatedReg); - - // A store is a def. - } else if (const StoreInst *SI = dyn_cast<const StoreInst>(&*It)) { - const Value *SwiftErrorAddr = SI->getOperand(1); - if (!SwiftErrorAddr->isSwiftError()) - continue; - - // Def of swifterror. - unsigned VReg; bool CreatedReg; - std::tie(VReg, CreatedReg) = - FuncInfo->getOrCreateSwiftErrorVRegDefAt(&*It); - assert(CreatedReg); - FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorAddr, VReg); - - // A return in a swiferror returning function is a use. - } else if (const ReturnInst *R = dyn_cast<const ReturnInst>(&*It)) { - const Function *F = R->getParent()->getParent(); - if(!F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) - continue; - - unsigned VReg; bool CreatedReg; - std::tie(VReg, CreatedReg) = FuncInfo->getOrCreateSwiftErrorVRegUseAt( - R, FuncInfo->MBB, FuncInfo->SwiftErrorArg); - assert(CreatedReg); - } - } -} - void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FastISelFailed = false; // Initialize the Fast-ISel state, if needed. @@ -1542,8 +1343,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FastIS = TLI->createFastISel(*FuncInfo, LibInfo); } - setupSwiftErrorVals(Fn, TLI, FuncInfo); - ReversePostOrderTraversal<const Function*> RPOT(&Fn); // Lower arguments up front. An RPO iteration always visits the entry block @@ -1589,7 +1388,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { else FastIS->setLastLocalValue(nullptr); } - createSwiftErrorEntriesInEntryBlock(FuncInfo, FastIS, TLI, TII, SDB); + + bool Inserted = SwiftError->createEntriesInEntryBlock(SDB->getCurDebugLoc()); + + if (FastIS && Inserted) + FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt)); processDbgDeclares(FuncInfo); @@ -1644,7 +1447,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { unsigned NumFastIselRemaining = std::distance(Begin, End); // Pre-assign swifterror vregs. - preassignSwiftErrorRegs(TLI, FuncInfo, Begin, End); + SwiftError->preassignVRegs(FuncInfo->MBB, Begin, End); // Do FastISel on as many instructions as possible. for (; BI != Begin; --BI) { @@ -1692,7 +1495,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // to keep track of gc-relocates for a particular gc-statepoint. This is // done by SelectionDAGBuilder::LowerAsSTATEPOINT, called before // visitGCRelocate. - if (isa<CallInst>(Inst) && !isStatepoint(Inst) && !isGCRelocate(Inst)) { + if (isa<CallInst>(Inst) && !isStatepoint(Inst) && !isGCRelocate(Inst) && + !isGCResult(Inst)) { OptimizationRemarkMissed R("sdagisel", "FastISelFailure", Inst->getDebugLoc(), LLVMBB); @@ -1712,7 +1516,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { !Inst->use_empty()) { unsigned &R = FuncInfo->ValueMap[Inst]; if (!R) - R = FuncInfo->CreateRegs(Inst->getType()); + R = FuncInfo->CreateRegs(Inst); } bool HadTailCall = false; @@ -1799,7 +1603,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { SP.copyToMachineFrameInfo(MF->getFrameInfo()); - propagateSwiftErrorVRegs(FuncInfo); + SwiftError->propagateVRegs(); delete FastIS; SDB->clearDanglingDebugInfo(); @@ -1969,7 +1773,7 @@ SelectionDAGISel::FinishBasicBlock() { } // Lower each BitTestBlock. - for (auto &BTB : SDB->BitTestCases) { + for (auto &BTB : SDB->SL->BitTestCases) { // Lower header first, if it wasn't already lowered if (!BTB.Emitted) { // Set the current basic block to the mbb we wish to insert the code into @@ -2050,30 +1854,30 @@ SelectionDAGISel::FinishBasicBlock() { } } } - SDB->BitTestCases.clear(); + SDB->SL->BitTestCases.clear(); // If the JumpTable record is filled in, then we need to emit a jump table. // Updating the PHI nodes is tricky in this case, since we need to determine // whether the PHI is a successor of the range check MBB or the jump table MBB - for (unsigned i = 0, e = SDB->JTCases.size(); i != e; ++i) { + for (unsigned i = 0, e = SDB->SL->JTCases.size(); i != e; ++i) { // Lower header first, if it wasn't already lowered - if (!SDB->JTCases[i].first.Emitted) { + if (!SDB->SL->JTCases[i].first.Emitted) { // Set the current basic block to the mbb we wish to insert the code into - FuncInfo->MBB = SDB->JTCases[i].first.HeaderBB; + FuncInfo->MBB = SDB->SL->JTCases[i].first.HeaderBB; FuncInfo->InsertPt = FuncInfo->MBB->end(); // Emit the code - SDB->visitJumpTableHeader(SDB->JTCases[i].second, SDB->JTCases[i].first, - FuncInfo->MBB); + SDB->visitJumpTableHeader(SDB->SL->JTCases[i].second, + SDB->SL->JTCases[i].first, FuncInfo->MBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); CodeGenAndEmitDAG(); } // Set the current basic block to the mbb we wish to insert the code into - FuncInfo->MBB = SDB->JTCases[i].second.MBB; + FuncInfo->MBB = SDB->SL->JTCases[i].second.MBB; FuncInfo->InsertPt = FuncInfo->MBB->end(); // Emit the code - SDB->visitJumpTable(SDB->JTCases[i].second); + SDB->visitJumpTable(SDB->SL->JTCases[i].second); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); CodeGenAndEmitDAG(); @@ -2086,31 +1890,31 @@ SelectionDAGISel::FinishBasicBlock() { assert(PHI->isPHI() && "This is not a machine PHI node that we are updating!"); // "default" BB. We can go there only from header BB. - if (PHIBB == SDB->JTCases[i].second.Default) + if (PHIBB == SDB->SL->JTCases[i].second.Default) PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second) - .addMBB(SDB->JTCases[i].first.HeaderBB); + .addMBB(SDB->SL->JTCases[i].first.HeaderBB); // JT BB. Just iterate over successors here if (FuncInfo->MBB->isSuccessor(PHIBB)) PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(FuncInfo->MBB); } } - SDB->JTCases.clear(); + SDB->SL->JTCases.clear(); // If we generated any switch lowering information, build and codegen any // additional DAGs necessary. - for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) { + for (unsigned i = 0, e = SDB->SL->SwitchCases.size(); i != e; ++i) { // Set the current basic block to the mbb we wish to insert the code into - FuncInfo->MBB = SDB->SwitchCases[i].ThisBB; + FuncInfo->MBB = SDB->SL->SwitchCases[i].ThisBB; FuncInfo->InsertPt = FuncInfo->MBB->end(); // Determine the unique successors. SmallVector<MachineBasicBlock *, 2> Succs; - Succs.push_back(SDB->SwitchCases[i].TrueBB); - if (SDB->SwitchCases[i].TrueBB != SDB->SwitchCases[i].FalseBB) - Succs.push_back(SDB->SwitchCases[i].FalseBB); + Succs.push_back(SDB->SL->SwitchCases[i].TrueBB); + if (SDB->SL->SwitchCases[i].TrueBB != SDB->SL->SwitchCases[i].FalseBB) + Succs.push_back(SDB->SL->SwitchCases[i].FalseBB); // Emit the code. Note that this could result in FuncInfo->MBB being split. - SDB->visitSwitchCase(SDB->SwitchCases[i], FuncInfo->MBB); + SDB->visitSwitchCase(SDB->SL->SwitchCases[i], FuncInfo->MBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); CodeGenAndEmitDAG(); @@ -2146,7 +1950,7 @@ SelectionDAGISel::FinishBasicBlock() { } } } - SDB->SwitchCases.clear(); + SDB->SL->SwitchCases.clear(); } /// Create the scheduler. If a specific scheduler was specified @@ -2413,14 +2217,14 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, return !findNonImmUse(Root, N.getNode(), U, IgnoreChains); } -void SelectionDAGISel::Select_INLINEASM(SDNode *N) { +void SelectionDAGISel::Select_INLINEASM(SDNode *N, bool Branch) { SDLoc DL(N); std::vector<SDValue> Ops(N->op_begin(), N->op_end()); SelectInlineAsmMemoryOperands(Ops, DL); const EVT VTs[] = {MVT::Other, MVT::Glue}; - SDValue New = CurDAG->getNode(ISD::INLINEASM, DL, VTs, Ops); + SDValue New = CurDAG->getNode(Branch ? ISD::INLINEASM_BR : ISD::INLINEASM, DL, VTs, Ops); New->setNodeId(-1); ReplaceUses(N, New.getNode()); CurDAG->RemoveDeadNode(N); @@ -2728,6 +2532,14 @@ CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex, } LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool +CheckChild2CondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N) { + if (2 >= N.getNumOperands()) + return false; + return ::CheckCondCode(MatcherTable, MatcherIndex, N.getOperand(2)); +} + +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering *TLI, const DataLayout &DL) { MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; @@ -2842,6 +2654,9 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table, case SelectionDAGISel::OPC_CheckCondCode: Result = !::CheckCondCode(Table, Index, N); return Index; + case SelectionDAGISel::OPC_CheckChild2CondCode: + Result = !::CheckChild2CondCode(Table, Index, N); + return Index; case SelectionDAGISel::OPC_CheckValueType: Result = !::CheckValueType(Table, Index, N, SDISel.TLI, SDISel.CurDAG->getDataLayout()); @@ -2970,7 +2785,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, CurDAG->RemoveDeadNode(NodeToMatch); return; case ISD::INLINEASM: - Select_INLINEASM(NodeToMatch); + case ISD::INLINEASM_BR: + Select_INLINEASM(NodeToMatch, + NodeToMatch->getOpcode() == ISD::INLINEASM_BR); return; case ISD::READ_REGISTER: Select_READ_REGISTER(NodeToMatch); @@ -3328,6 +3145,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, case OPC_CheckCondCode: if (!::CheckCondCode(MatcherTable, MatcherIndex, N)) break; continue; + case OPC_CheckChild2CondCode: + if (!::CheckChild2CondCode(MatcherTable, MatcherIndex, N)) break; + continue; case OPC_CheckValueType: if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI, CurDAG->getDataLayout())) @@ -3348,6 +3168,12 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, case OPC_CheckOrImm: if (!::CheckOrImm(MatcherTable, MatcherIndex, N, *this)) break; continue; + case OPC_CheckImmAllOnesV: + if (!ISD::isBuildVectorAllOnes(N.getNode())) break; + continue; + case OPC_CheckImmAllZerosV: + if (!ISD::isBuildVectorAllZeros(N.getNode())) break; + continue; case OPC_CheckFoldableChainNode: { assert(NodeStack.size() != 1 && "No parent node"); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index 3b19bff4743d..cdc09d59f6a4 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -1,9 +1,8 @@ //===-- SelectionDAGPrinter.cpp - Implement SelectionDAG::viewGraph() -----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp index 3a283bc5fdc0..3a2df6f60593 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp @@ -1,9 +1,8 @@ //===- SelectionDAGTargetInfo.cpp - SelectionDAG Info ---------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 90a1b350fc94..395e9a8a4fc5 100644 --- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -1,9 +1,8 @@ //===- StatepointLowering.cpp - SDAGBuilder's statepoint code -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -348,16 +347,28 @@ static std::pair<SDValue, SDNode *> lowerCallFromStatepointLoweringInfo( return std::make_pair(ReturnValue, CallEnd->getOperand(0).getNode()); } +static MachineMemOperand* getMachineMemOperand(MachineFunction &MF, + FrameIndexSDNode &FI) { + auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FI.getIndex()); + auto MMOFlags = MachineMemOperand::MOStore | + MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile; + auto &MFI = MF.getFrameInfo(); + return MF.getMachineMemOperand(PtrInfo, MMOFlags, + MFI.getObjectSize(FI.getIndex()), + MFI.getObjectAlignment(FI.getIndex())); +} + /// Spill a value incoming to the statepoint. It might be either part of /// vmstate /// or gcstate. In both cases unconditionally spill it on the stack unless it /// is a null constant. Return pair with first element being frame index /// containing saved value and second element with outgoing chain from the /// emitted store -static std::pair<SDValue, SDValue> +static std::tuple<SDValue, SDValue, MachineMemOperand*> spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, SelectionDAGBuilder &Builder) { SDValue Loc = Builder.StatepointLowering.getLocation(Incoming); + MachineMemOperand* MMO = nullptr; // Emit new store if we didn't do it for this ptr before if (!Loc.getNode()) { @@ -367,10 +378,6 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, // We use TargetFrameIndex so that isel will not select it into LEA Loc = Builder.DAG.getTargetFrameIndex(Index, Builder.getFrameIndexTy()); - // TODO: We can create TokenFactor node instead of - // chaining stores one after another, this may allow - // a bit more optimal scheduling for them - #ifndef NDEBUG // Right now we always allocate spill slots that are of the same // size as the value we're about to spill (the size of spillee can @@ -382,15 +389,18 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, "Bad spill: stack slot does not match!"); #endif + auto &MF = Builder.DAG.getMachineFunction(); + auto PtrInfo = MachinePointerInfo::getFixedStack(MF, Index); Chain = Builder.DAG.getStore(Chain, Builder.getCurSDLoc(), Incoming, Loc, - MachinePointerInfo::getFixedStack( - Builder.DAG.getMachineFunction(), Index)); + PtrInfo); + MMO = getMachineMemOperand(MF, *cast<FrameIndexSDNode>(Loc)); + Builder.StatepointLowering.setLocation(Incoming, Loc); } assert(Loc.getNode()); - return std::make_pair(Loc, Chain); + return std::make_tuple(Loc, Chain, MMO); } /// Lower a single value incoming to a statepoint node. This value can be @@ -398,7 +408,11 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, /// case constants and allocas, then fall back to spilling if required. static void lowerIncomingStatepointValue(SDValue Incoming, bool LiveInOnly, SmallVectorImpl<SDValue> &Ops, + SmallVectorImpl<MachineMemOperand*> &MemRefs, SelectionDAGBuilder &Builder) { + // Note: We know all of these spills are independent, but don't bother to + // exploit that chain wise. DAGCombine will happily do so as needed, so + // doing it here would be a small compile time win at most. SDValue Chain = Builder.getRoot(); if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Incoming)) { @@ -417,6 +431,11 @@ static void lowerIncomingStatepointValue(SDValue Incoming, bool LiveInOnly, "Incoming value is a frame index!"); Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(), Builder.getFrameIndexTy())); + + auto &MF = Builder.DAG.getMachineFunction(); + auto *MMO = getMachineMemOperand(MF, *FI); + MemRefs.push_back(MMO); + } else if (LiveInOnly) { // If this value is live in (not live-on-return, or live-through), we can // treat it the same way patchpoint treats it's "live in" values. We'll @@ -433,8 +452,10 @@ static void lowerIncomingStatepointValue(SDValue Incoming, bool LiveInOnly, // need to be optional since it requires a lot of complexity on the // runtime side which not all would support. auto Res = spillIncomingStatepointValue(Incoming, Chain, Builder); - Ops.push_back(Res.first); - Chain = Res.second; + Ops.push_back(std::get<0>(Res)); + if (auto *MMO = std::get<2>(Res)) + MemRefs.push_back(MMO); + Chain = std::get<1>(Res);; } Builder.DAG.setRoot(Chain); @@ -449,7 +470,7 @@ static void lowerIncomingStatepointValue(SDValue Incoming, bool LiveInOnly, /// will be set to the last value spilled (if any were). static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, - SelectionDAGBuilder::StatepointLoweringInfo &SI, + SmallVectorImpl<MachineMemOperand*> &MemRefs, SelectionDAGBuilder::StatepointLoweringInfo &SI, SelectionDAGBuilder &Builder) { // Lower the deopt and gc arguments for this statepoint. Layout will be: // deopt argument length, deopt arguments.., gc arguments... @@ -533,7 +554,7 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, if (!Incoming.getNode()) Incoming = Builder.getValue(V); const bool LiveInValue = LiveInDeopt && !isGCValue(V); - lowerIncomingStatepointValue(Incoming, LiveInValue, Ops, Builder); + lowerIncomingStatepointValue(Incoming, LiveInValue, Ops, MemRefs, Builder); } // Finally, go ahead and lower all the gc arguments. There's no prefixed @@ -544,11 +565,11 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, for (unsigned i = 0; i < SI.Bases.size(); ++i) { const Value *Base = SI.Bases[i]; lowerIncomingStatepointValue(Builder.getValue(Base), /*LiveInOnly*/ false, - Ops, Builder); + Ops, MemRefs, Builder); const Value *Ptr = SI.Ptrs[i]; lowerIncomingStatepointValue(Builder.getValue(Ptr), /*LiveInOnly*/ false, - Ops, Builder); + Ops, MemRefs, Builder); } // If there are any explicit spill slots passed to the statepoint, record @@ -564,6 +585,10 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, "Incoming value is a frame index!"); Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(), Builder.getFrameIndexTy())); + + auto &MF = Builder.DAG.getMachineFunction(); + auto *MMO = getMachineMemOperand(MF, *FI); + MemRefs.push_back(MMO); } } @@ -630,7 +655,8 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( // Lower statepoint vmstate and gcstate arguments SmallVector<SDValue, 10> LoweredMetaArgs; - lowerStatepointMetaArgs(LoweredMetaArgs, SI, *this); + SmallVector<MachineMemOperand*, 16> MemRefs; + lowerStatepointMetaArgs(LoweredMetaArgs, MemRefs, SI, *this); // Now that we've emitted the spills, we need to update the root so that the // call sequence is ordered correctly. @@ -746,8 +772,9 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( // input. This allows someone else to chain off us as needed. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - SDNode *StatepointMCNode = - DAG.getMachineNode(TargetOpcode::STATEPOINT, getCurSDLoc(), NodeTys, Ops); + MachineSDNode *StatepointMCNode = + DAG.getMachineNode(TargetOpcode::STATEPOINT, getCurSDLoc(), NodeTys, Ops); + DAG.setNodeMemRefs(StatepointMCNode, MemRefs); SDNode *SinkNode = StatepointMCNode; @@ -799,7 +826,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( void SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP, const BasicBlock *EHPadBB /*= nullptr*/) { - assert(ISP.getCallSite().getCallingConv() != CallingConv::AnyReg && + assert(ISP.getCall()->getCallingConv() != CallingConv::AnyReg && "anyregcc is not supported on statepoints!"); #ifndef NDEBUG @@ -832,7 +859,7 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP, } StatepointLoweringInfo SI(DAG); - populateCallLoweringInfo(SI.CLI, ISP.getCallSite(), + populateCallLoweringInfo(SI.CLI, ISP.getCall(), ImmutableStatepoint::CallArgsBeginPos, ISP.getNumCallArgs(), ActualCallee, ISP.getActualReturnType(), false /* IsPatchPoint */); @@ -859,7 +886,7 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP, const GCResultInst *GCResult = ISP.getGCResult(); Type *RetTy = ISP.getActualReturnType(); if (!RetTy->isVoidTy() && GCResult) { - if (GCResult->getParent() != ISP.getCallSite().getParent()) { + if (GCResult->getParent() != ISP.getCall()->getParent()) { // Result value will be used in a different basic block so we need to // export it now. Default exporting mechanism will not work here because // statepoint call has a different type than the actual call. It means @@ -871,7 +898,7 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP, unsigned Reg = FuncInfo.CreateRegs(RetTy); RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), DAG.getDataLayout(), Reg, RetTy, - ISP.getCallSite().getCallingConv()); + ISP.getCall()->getCallingConv()); SDValue Chain = DAG.getEntryNode(); RFV.getCopyToRegs(ReturnValue, DAG, getCurSDLoc(), Chain, nullptr); @@ -891,22 +918,22 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP, } void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl( - ImmutableCallSite CS, SDValue Callee, const BasicBlock *EHPadBB, + const CallBase *Call, SDValue Callee, const BasicBlock *EHPadBB, bool VarArgDisallowed, bool ForceVoidReturnTy) { StatepointLoweringInfo SI(DAG); - unsigned ArgBeginIndex = CS.arg_begin() - CS.getInstruction()->op_begin(); + unsigned ArgBeginIndex = Call->arg_begin() - Call->op_begin(); populateCallLoweringInfo( - SI.CLI, CS, ArgBeginIndex, CS.getNumArgOperands(), Callee, - ForceVoidReturnTy ? Type::getVoidTy(*DAG.getContext()) : CS.getType(), + SI.CLI, Call, ArgBeginIndex, Call->getNumArgOperands(), Callee, + ForceVoidReturnTy ? Type::getVoidTy(*DAG.getContext()) : Call->getType(), false); if (!VarArgDisallowed) - SI.CLI.IsVarArg = CS.getFunctionType()->isVarArg(); + SI.CLI.IsVarArg = Call->getFunctionType()->isVarArg(); - auto DeoptBundle = *CS.getOperandBundle(LLVMContext::OB_deopt); + auto DeoptBundle = *Call->getOperandBundle(LLVMContext::OB_deopt); unsigned DefaultID = StatepointDirectives::DeoptBundleStatepointID; - auto SD = parseStatepointDirectivesFromAttrs(CS.getAttributes()); + auto SD = parseStatepointDirectivesFromAttrs(Call->getAttributes()); SI.ID = SD.StatepointID.getValueOr(DefaultID); SI.NumPatchBytes = SD.NumPatchBytes.getValueOr(0); @@ -918,15 +945,14 @@ void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl( // NB! The GC arguments are deliberately left empty. if (SDValue ReturnVal = LowerAsSTATEPOINT(SI)) { - const Instruction *Inst = CS.getInstruction(); - ReturnVal = lowerRangeToAssertZExt(DAG, *Inst, ReturnVal); - setValue(Inst, ReturnVal); + ReturnVal = lowerRangeToAssertZExt(DAG, *Call, ReturnVal); + setValue(Call, ReturnVal); } } void SelectionDAGBuilder::LowerCallSiteWithDeoptBundle( - ImmutableCallSite CS, SDValue Callee, const BasicBlock *EHPadBB) { - LowerCallSiteWithDeoptBundleImpl(CS, Callee, EHPadBB, + const CallBase *Call, SDValue Callee, const BasicBlock *EHPadBB) { + LowerCallSiteWithDeoptBundleImpl(Call, Callee, EHPadBB, /* VarArgDisallowed = */ false, /* ForceVoidReturnTy = */ false); } @@ -986,11 +1012,11 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { } SDValue SpillSlot = - DAG.getTargetFrameIndex(*DerivedPtrLocation, getFrameIndexTy()); + DAG.getTargetFrameIndex(*DerivedPtrLocation, getFrameIndexTy()); - // Be conservative: flush all pending loads - // TODO: Probably we can be less restrictive on this, - // it may allow more scheduling opportunities. + // Note: We know all of these reloads are independent, but don't bother to + // exploit that chain wise. DAGCombine will happily do so as needed, so + // doing it here would be a small compile time win at most. SDValue Chain = getRoot(); SDValue SpillLoad = @@ -1000,7 +1026,6 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), *DerivedPtrLocation)); - // Again, be conservative, don't emit pending loads DAG.setRoot(SpillLoad.getValue(1)); assert(SpillLoad.getNode()); diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.h b/lib/CodeGen/SelectionDAG/StatepointLowering.h index 372c82a359f6..70507932681d 100644 --- a/lib/CodeGen/SelectionDAG/StatepointLowering.h +++ b/lib/CodeGen/SelectionDAG/StatepointLowering.h @@ -1,9 +1,8 @@ //===- StatepointLowering.h - SDAGBuilder's statepoint code ---*- C++ -*---===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -67,13 +66,18 @@ public: /// before the next statepoint. If we don't see it, we'll report /// an assertion. void scheduleRelocCall(const CallInst &RelocCall) { - PendingGCRelocateCalls.push_back(&RelocCall); + // We are not interested in lowering dead instructions. + if (!RelocCall.use_empty()) + PendingGCRelocateCalls.push_back(&RelocCall); } /// Remove this gc_relocate from the list we're expecting to see /// before the next statepoint. If we weren't expecting to see /// it, we'll report an assertion. void relocCallVisited(const CallInst &RelocCall) { + // We are not interested in lowering dead instructions. + if (RelocCall.use_empty()) + return; auto I = llvm::find(PendingGCRelocateCalls, &RelocCall); assert(I != PendingGCRelocateCalls.end() && "Visited unexpected gcrelocate call"); diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index a2f05c1e3cef..b260cd91d468 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1,9 +1,8 @@ //===-- TargetLowering.cpp - Implement the TargetLowering class -----------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -100,19 +99,22 @@ bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI, /// Set CallLoweringInfo attribute flags based on a call instruction /// and called function attributes. -void TargetLoweringBase::ArgListEntry::setAttributes(ImmutableCallSite *CS, +void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call, unsigned ArgIdx) { - IsSExt = CS->paramHasAttr(ArgIdx, Attribute::SExt); - IsZExt = CS->paramHasAttr(ArgIdx, Attribute::ZExt); - IsInReg = CS->paramHasAttr(ArgIdx, Attribute::InReg); - IsSRet = CS->paramHasAttr(ArgIdx, Attribute::StructRet); - IsNest = CS->paramHasAttr(ArgIdx, Attribute::Nest); - IsByVal = CS->paramHasAttr(ArgIdx, Attribute::ByVal); - IsInAlloca = CS->paramHasAttr(ArgIdx, Attribute::InAlloca); - IsReturned = CS->paramHasAttr(ArgIdx, Attribute::Returned); - IsSwiftSelf = CS->paramHasAttr(ArgIdx, Attribute::SwiftSelf); - IsSwiftError = CS->paramHasAttr(ArgIdx, Attribute::SwiftError); - Alignment = CS->getParamAlignment(ArgIdx); + IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt); + IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt); + IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg); + IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet); + IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest); + IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal); + IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca); + IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned); + IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf); + IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError); + Alignment = Call->getParamAlignment(ArgIdx); + ByValType = nullptr; + if (Call->paramHasAttr(ArgIdx, Attribute::ByVal)) + ByValType = Call->getParamByValType(ArgIdx); } /// Generate a libcall taking the given operands as arguments and returning a @@ -121,7 +123,8 @@ std::pair<SDValue, SDValue> TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef<SDValue> Ops, bool isSigned, const SDLoc &dl, bool doesNotReturn, - bool isReturnValueUsed) const { + bool isReturnValueUsed, + bool isPostTypeLegalization) const { TargetLowering::ArgListTy Args; Args.reserve(Ops.size()); @@ -147,11 +150,114 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) .setNoReturn(doesNotReturn) .setDiscardResult(!isReturnValueUsed) + .setIsPostTypeLegalization(isPostTypeLegalization) .setSExtResult(signExtend) .setZExtResult(!signExtend); return LowerCallTo(CLI); } +bool +TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps, + unsigned Limit, uint64_t Size, + unsigned DstAlign, unsigned SrcAlign, + bool IsMemset, + bool ZeroMemset, + bool MemcpyStrSrc, + bool AllowOverlap, + unsigned DstAS, unsigned SrcAS, + const AttributeList &FuncAttributes) const { + // If 'SrcAlign' is zero, that means the memory operation does not need to + // load the value, i.e. memset or memcpy from constant string. Otherwise, + // it's the inferred alignment of the source. 'DstAlign', on the other hand, + // is the specified alignment of the memory operation. If it is zero, that + // means it's possible to change the alignment of the destination. + // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does + // not need to be loaded. + if (!(SrcAlign == 0 || SrcAlign >= DstAlign)) + return false; + + EVT VT = getOptimalMemOpType(Size, DstAlign, SrcAlign, + IsMemset, ZeroMemset, MemcpyStrSrc, + FuncAttributes); + + if (VT == MVT::Other) { + // Use the largest integer type whose alignment constraints are satisfied. + // We only need to check DstAlign here as SrcAlign is always greater or + // equal to DstAlign (or zero). + VT = MVT::i64; + while (DstAlign && DstAlign < VT.getSizeInBits() / 8 && + !allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign)) + VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1); + assert(VT.isInteger()); + + // Find the largest legal integer type. + MVT LVT = MVT::i64; + while (!isTypeLegal(LVT)) + LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1); + assert(LVT.isInteger()); + + // If the type we've chosen is larger than the largest legal integer type + // then use that instead. + if (VT.bitsGT(LVT)) + VT = LVT; + } + + unsigned NumMemOps = 0; + while (Size != 0) { + unsigned VTSize = VT.getSizeInBits() / 8; + while (VTSize > Size) { + // For now, only use non-vector load / store's for the left-over pieces. + EVT NewVT = VT; + unsigned NewVTSize; + + bool Found = false; + if (VT.isVector() || VT.isFloatingPoint()) { + NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32; + if (isOperationLegalOrCustom(ISD::STORE, NewVT) && + isSafeMemOpType(NewVT.getSimpleVT())) + Found = true; + else if (NewVT == MVT::i64 && + isOperationLegalOrCustom(ISD::STORE, MVT::f64) && + isSafeMemOpType(MVT::f64)) { + // i64 is usually not legal on 32-bit targets, but f64 may be. + NewVT = MVT::f64; + Found = true; + } + } + + if (!Found) { + do { + NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1); + if (NewVT == MVT::i8) + break; + } while (!isSafeMemOpType(NewVT.getSimpleVT())); + } + NewVTSize = NewVT.getSizeInBits() / 8; + + // If the new VT cannot cover all of the remaining bits, then consider + // issuing a (or a pair of) unaligned and overlapping load / store. + bool Fast; + if (NumMemOps && AllowOverlap && NewVTSize < Size && + allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, + MachineMemOperand::MONone, &Fast) && + Fast) + VTSize = Size; + else { + VT = NewVT; + VTSize = NewVTSize; + } + } + + if (++NumMemOps > Limit) + return false; + + MemOps.push_back(VT); + Size -= VTSize; + } + + return true; +} + /// Soften the operands of a comparison. This code is shared among BR_CC, /// SELECT_CC, and SETCC handlers. void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, @@ -346,7 +452,6 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { /// return true. bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded, TargetLoweringOpt &TLO) const { - SelectionDAG &DAG = TLO.DAG; SDLoc DL(Op); unsigned Opcode = Op.getOpcode(); @@ -372,8 +477,8 @@ bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded, if (!C.isSubsetOf(Demanded)) { EVT VT = Op.getValueType(); - SDValue NewC = DAG.getConstant(Demanded & C, DL, VT); - SDValue NewOp = DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC); + SDValue NewC = TLO.DAG.getConstant(Demanded & C, DL, VT); + SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC); return TLO.CombineTo(Op, NewOp); } @@ -487,6 +592,10 @@ bool TargetLowering::SimplifyDemandedBits( // Don't know anything. Known = KnownBits(BitWidth); + // Undef operand. + if (Op.isUndef()) + return false; + if (Op.getOpcode() == ISD::Constant) { // We know all of the bits for a constant! Known.One = cast<ConstantSDNode>(Op)->getAPIntValue(); @@ -509,40 +618,116 @@ bool TargetLowering::SimplifyDemandedBits( DemandedElts = APInt::getAllOnesValue(NumElts); } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) { // Not demanding any bits/elts from Op. - if (!Op.isUndef()) - return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); - return false; + return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); } else if (Depth == 6) { // Limit search depth. return false; } KnownBits Known2, KnownOut; switch (Op.getOpcode()) { + case ISD::SCALAR_TO_VECTOR: { + if (!DemandedElts[0]) + return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); + + KnownBits SrcKnown; + SDValue Src = Op.getOperand(0); + unsigned SrcBitWidth = Src.getScalarValueSizeInBits(); + APInt SrcDemandedBits = DemandedBits.zextOrSelf(SrcBitWidth); + if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1)) + return true; + Known = SrcKnown.zextOrTrunc(BitWidth, false); + break; + } case ISD::BUILD_VECTOR: - // Collect the known bits that are shared by every constant vector element. - Known.Zero.setAllBits(); Known.One.setAllBits(); - for (SDValue SrcOp : Op->ops()) { - if (!isa<ConstantSDNode>(SrcOp)) { - // We can only handle all constant values - bail out with no known bits. - Known = KnownBits(BitWidth); - return false; - } - Known2.One = cast<ConstantSDNode>(SrcOp)->getAPIntValue(); - Known2.Zero = ~Known2.One; - - // BUILD_VECTOR can implicitly truncate sources, we must handle this. - if (Known2.One.getBitWidth() != BitWidth) { - assert(Known2.getBitWidth() > BitWidth && - "Expected BUILD_VECTOR implicit truncation"); - Known2 = Known2.trunc(BitWidth); + // Collect the known bits that are shared by every demanded element. + // TODO: Call SimplifyDemandedBits for non-constant demanded elements. + Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth); + return false; // Don't fall through, will infinitely loop. + case ISD::LOAD: { + LoadSDNode *LD = cast<LoadSDNode>(Op); + if (getTargetConstantFromLoad(LD)) { + Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth); + return false; // Don't fall through, will infinitely loop. + } + break; + } + case ISD::INSERT_VECTOR_ELT: { + SDValue Vec = Op.getOperand(0); + SDValue Scl = Op.getOperand(1); + auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2)); + EVT VecVT = Vec.getValueType(); + + // If index isn't constant, assume we need all vector elements AND the + // inserted element. + APInt DemandedVecElts(DemandedElts); + if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) { + unsigned Idx = CIdx->getZExtValue(); + DemandedVecElts.clearBit(Idx); + + // Inserted element is not required. + if (!DemandedElts[Idx]) + return TLO.CombineTo(Op, Vec); + } + + KnownBits KnownScl; + unsigned NumSclBits = Scl.getScalarValueSizeInBits(); + APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits); + if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1)) + return true; + + Known = KnownScl.zextOrTrunc(BitWidth, false); + + KnownBits KnownVec; + if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO, + Depth + 1)) + return true; + + if (!!DemandedVecElts) { + Known.One &= KnownVec.One; + Known.Zero &= KnownVec.Zero; + } + + return false; + } + case ISD::INSERT_SUBVECTOR: { + SDValue Base = Op.getOperand(0); + SDValue Sub = Op.getOperand(1); + EVT SubVT = Sub.getValueType(); + unsigned NumSubElts = SubVT.getVectorNumElements(); + + // If index isn't constant, assume we need the original demanded base + // elements and ALL the inserted subvector elements. + APInt BaseElts = DemandedElts; + APInt SubElts = APInt::getAllOnesValue(NumSubElts); + if (isa<ConstantSDNode>(Op.getOperand(2))) { + const APInt &Idx = Op.getConstantOperandAPInt(2); + if (Idx.ule(NumElts - NumSubElts)) { + unsigned SubIdx = Idx.getZExtValue(); + SubElts = DemandedElts.extractBits(NumSubElts, SubIdx); + BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx); } + } - // Known bits are the values that are shared by every element. - // TODO: support per-element known bits. - Known.One &= Known2.One; - Known.Zero &= Known2.Zero; + KnownBits KnownSub, KnownBase; + if (SimplifyDemandedBits(Sub, DemandedBits, SubElts, KnownSub, TLO, + Depth + 1)) + return true; + if (SimplifyDemandedBits(Base, DemandedBits, BaseElts, KnownBase, TLO, + Depth + 1)) + return true; + + Known.Zero.setAllBits(); + Known.One.setAllBits(); + if (!!SubElts) { + Known.One &= KnownSub.One; + Known.Zero &= KnownSub.Zero; } - return false; // Don't fall through, will infinitely loop. + if (!!BaseElts) { + Known.One &= KnownBase.One; + Known.Zero &= KnownBase.Zero; + } + break; + } case ISD::CONCAT_VECTORS: { Known.Zero.setAllBits(); Known.One.setAllBits(); @@ -640,11 +825,12 @@ bool TargetLowering::SimplifyDemandedBits( } } - if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, Depth + 1)) + if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, + Depth + 1)) return true; assert(!Known.hasConflict() && "Bits known to be one AND zero?"); - if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts, Known2, TLO, - Depth + 1)) + if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts, + Known2, TLO, Depth + 1)) return true; assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); @@ -674,11 +860,12 @@ bool TargetLowering::SimplifyDemandedBits( SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); - if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, Depth + 1)) + if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, + Depth + 1)) return true; assert(!Known.hasConflict() && "Bits known to be one AND zero?"); - if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts, Known2, TLO, - Depth + 1)) + if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts, + Known2, TLO, Depth + 1)) return true; assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); @@ -705,10 +892,12 @@ bool TargetLowering::SimplifyDemandedBits( SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); - if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, Depth + 1)) + if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, + Depth + 1)) return true; assert(!Known.hasConflict() && "Bits known to be one AND zero?"); - if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO, Depth + 1)) + if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO, + Depth + 1)) return true; assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); @@ -831,20 +1020,23 @@ bool TargetLowering::SimplifyDemandedBits( SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); - if (ConstantSDNode *SA = isConstOrConstSplat(Op1)) { + if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) { // If the shift count is an invalid immediate, don't do anything. if (SA->getAPIntValue().uge(BitWidth)) break; unsigned ShAmt = SA->getZExtValue(); + if (ShAmt == 0) + return TLO.CombineTo(Op, Op0); // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a // single shift. We can do this if the bottom bits (which are shifted // out) are never demanded. + // TODO - support non-uniform vector amounts. if (Op0.getOpcode() == ISD::SRL) { - if (ShAmt && - (DemandedBits & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) { - if (ConstantSDNode *SA2 = isConstOrConstSplat(Op0.getOperand(1))) { + if ((DemandedBits & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) { + if (ConstantSDNode *SA2 = + isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) { if (SA2->getAPIntValue().ult(BitWidth)) { unsigned C1 = SA2->getZExtValue(); unsigned Opc = ISD::SHL; @@ -862,8 +1054,14 @@ bool TargetLowering::SimplifyDemandedBits( } } - if (SimplifyDemandedBits(Op0, DemandedBits.lshr(ShAmt), DemandedElts, Known, TLO, - Depth + 1)) + if (SimplifyDemandedBits(Op0, DemandedBits.lshr(ShAmt), DemandedElts, + Known, TLO, Depth + 1)) + return true; + + // Try shrinking the operation as long as the shift amount will still be + // in range. + if ((ShAmt < DemandedBits.getActiveBits()) && + ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) return true; // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits @@ -919,12 +1117,16 @@ bool TargetLowering::SimplifyDemandedBits( SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); - if (ConstantSDNode *SA = isConstOrConstSplat(Op1)) { + if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) { // If the shift count is an invalid immediate, don't do anything. if (SA->getAPIntValue().uge(BitWidth)) break; unsigned ShAmt = SA->getZExtValue(); + if (ShAmt == 0) + return TLO.CombineTo(Op, Op0); + + EVT ShiftVT = Op1.getValueType(); APInt InDemandedMask = (DemandedBits << ShAmt); // If the shift is exact, then it does demand the low bits (and knows that @@ -935,10 +1137,11 @@ bool TargetLowering::SimplifyDemandedBits( // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a // single shift. We can do this if the top bits (which are shifted out) // are never demanded. + // TODO - support non-uniform vector amounts. if (Op0.getOpcode() == ISD::SHL) { - if (ConstantSDNode *SA2 = isConstOrConstSplat(Op0.getOperand(1))) { - if (ShAmt && - (DemandedBits & APInt::getHighBitsSet(BitWidth, ShAmt)) == 0) { + if (ConstantSDNode *SA2 = + isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) { + if ((DemandedBits & APInt::getHighBitsSet(BitWidth, ShAmt)) == 0) { if (SA2->getAPIntValue().ult(BitWidth)) { unsigned C1 = SA2->getZExtValue(); unsigned Opc = ISD::SRL; @@ -948,7 +1151,7 @@ bool TargetLowering::SimplifyDemandedBits( Opc = ISD::SHL; } - SDValue NewSA = TLO.DAG.getConstant(Diff, dl, Op1.getValueType()); + SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT); return TLO.CombineTo( Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA)); } @@ -957,7 +1160,8 @@ bool TargetLowering::SimplifyDemandedBits( } // Compute the new bits that are at the top now. - if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO, Depth + 1)) + if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO, + Depth + 1)) return true; assert(!Known.hasConflict() && "Bits known to be one AND zero?"); Known.Zero.lshrInPlace(ShAmt); @@ -978,12 +1182,15 @@ bool TargetLowering::SimplifyDemandedBits( if (DemandedBits.isOneValue()) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1)); - if (ConstantSDNode *SA = isConstOrConstSplat(Op1)) { + if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) { // If the shift count is an invalid immediate, don't do anything. if (SA->getAPIntValue().uge(BitWidth)) break; unsigned ShAmt = SA->getZExtValue(); + if (ShAmt == 0) + return TLO.CombineTo(Op, Op0); + APInt InDemandedMask = (DemandedBits << ShAmt); // If the shift is exact, then it does demand the low bits (and knows that @@ -996,7 +1203,8 @@ bool TargetLowering::SimplifyDemandedBits( if (DemandedBits.countLeadingZeros() < ShAmt) InDemandedMask.setSignBit(); - if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO, Depth + 1)) + if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO, + Depth + 1)) return true; assert(!Known.hasConflict() && "Bits known to be one AND zero?"); Known.Zero.lshrInPlace(ShAmt); @@ -1026,6 +1234,55 @@ bool TargetLowering::SimplifyDemandedBits( } break; } + case ISD::FSHL: + case ISD::FSHR: { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + SDValue Op2 = Op.getOperand(2); + bool IsFSHL = (Op.getOpcode() == ISD::FSHL); + + if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) { + unsigned Amt = SA->getAPIntValue().urem(BitWidth); + + // For fshl, 0-shift returns the 1st arg. + // For fshr, 0-shift returns the 2nd arg. + if (Amt == 0) { + if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts, + Known, TLO, Depth + 1)) + return true; + break; + } + + // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt)) + // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt) + APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt)); + APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt); + if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO, + Depth + 1)) + return true; + if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO, + Depth + 1)) + return true; + + Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt)); + Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt)); + Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt); + Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt); + Known.One |= Known2.One; + Known.Zero |= Known2.Zero; + } + break; + } + case ISD::BITREVERSE: { + SDValue Src = Op.getOperand(0); + APInt DemandedSrcBits = DemandedBits.reverseBits(); + if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO, + Depth + 1)) + return true; + Known.One = Known2.One.reverseBits(); + Known.Zero = Known2.Zero.reverseBits(); + break; + } case ISD::SIGN_EXTEND_INREG: { SDValue Op0 = Op.getOperand(0); EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); @@ -1033,8 +1290,8 @@ bool TargetLowering::SimplifyDemandedBits( // If we only care about the highest bit, don't bother shifting right. if (DemandedBits.isSignMask()) { - bool AlreadySignExtended = - TLO.DAG.ComputeNumSignBits(Op0) >= BitWidth - ExVTBits + 1; + unsigned NumSignBits = TLO.DAG.ComputeNumSignBits(Op0); + bool AlreadySignExtended = NumSignBits >= BitWidth - ExVTBits + 1; // However if the input is already sign extended we expect the sign // extension to be dropped altogether later and do not simplify. if (!AlreadySignExtended) { @@ -1099,79 +1356,116 @@ bool TargetLowering::SimplifyDemandedBits( return true; Known.Zero = KnownLo.Zero.zext(BitWidth) | - KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth); + KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth); Known.One = KnownLo.One.zext(BitWidth) | - KnownHi.One.zext(BitWidth).shl(HalfBitWidth); + KnownHi.One.zext(BitWidth).shl(HalfBitWidth); break; } - case ISD::ZERO_EXTEND: { + case ISD::ZERO_EXTEND: + case ISD::ZERO_EXTEND_VECTOR_INREG: { SDValue Src = Op.getOperand(0); - unsigned InBits = Src.getScalarValueSizeInBits(); + EVT SrcVT = Src.getValueType(); + unsigned InBits = SrcVT.getScalarSizeInBits(); + unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; + bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG; // If none of the top bits are demanded, convert this into an any_extend. - if (DemandedBits.getActiveBits() <= InBits) - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, Src)); + if (DemandedBits.getActiveBits() <= InBits) { + // If we only need the non-extended bits of the bottom element + // then we can just bitcast to the result. + if (IsVecInReg && DemandedElts == 1 && + VT.getSizeInBits() == SrcVT.getSizeInBits() && + TLO.DAG.getDataLayout().isLittleEndian()) + return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src)); + + unsigned Opc = + IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND; + if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) + return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src)); + } APInt InDemandedBits = DemandedBits.trunc(InBits); - if (SimplifyDemandedBits(Src, InDemandedBits, Known, TLO, Depth+1)) + APInt InDemandedElts = DemandedElts.zextOrSelf(InElts); + if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO, + Depth + 1)) return true; assert(!Known.hasConflict() && "Bits known to be one AND zero?"); - Known = Known.zext(BitWidth); - Known.Zero.setBitsFrom(InBits); + assert(Known.getBitWidth() == InBits && "Src width has changed?"); + Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */); break; } - case ISD::SIGN_EXTEND: { + case ISD::SIGN_EXTEND: + case ISD::SIGN_EXTEND_VECTOR_INREG: { SDValue Src = Op.getOperand(0); - unsigned InBits = Src.getScalarValueSizeInBits(); + EVT SrcVT = Src.getValueType(); + unsigned InBits = SrcVT.getScalarSizeInBits(); + unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; + bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG; // If none of the top bits are demanded, convert this into an any_extend. - if (DemandedBits.getActiveBits() <= InBits) - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, Src)); + if (DemandedBits.getActiveBits() <= InBits) { + // If we only need the non-extended bits of the bottom element + // then we can just bitcast to the result. + if (IsVecInReg && DemandedElts == 1 && + VT.getSizeInBits() == SrcVT.getSizeInBits() && + TLO.DAG.getDataLayout().isLittleEndian()) + return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src)); + + unsigned Opc = + IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND; + if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) + return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src)); + } + + APInt InDemandedBits = DemandedBits.trunc(InBits); + APInt InDemandedElts = DemandedElts.zextOrSelf(InElts); // Since some of the sign extended bits are demanded, we know that the sign // bit is demanded. - APInt InDemandedBits = DemandedBits.trunc(InBits); InDemandedBits.setBit(InBits - 1); - if (SimplifyDemandedBits(Src, InDemandedBits, Known, TLO, Depth + 1)) + if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO, + Depth + 1)) return true; assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + assert(Known.getBitWidth() == InBits && "Src width has changed?"); + // If the sign bit is known one, the top bits match. Known = Known.sext(BitWidth); // If the sign bit is known zero, convert this to a zero extend. - if (Known.isNonNegative()) - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Src)); + if (Known.isNonNegative()) { + unsigned Opc = + IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND; + if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) + return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src)); + } break; } - case ISD::SIGN_EXTEND_VECTOR_INREG: { - // TODO - merge this with SIGN_EXTEND above? + case ISD::ANY_EXTEND: + case ISD::ANY_EXTEND_VECTOR_INREG: { SDValue Src = Op.getOperand(0); - unsigned InBits = Src.getScalarValueSizeInBits(); - - APInt InDemandedBits = DemandedBits.trunc(InBits); + EVT SrcVT = Src.getValueType(); + unsigned InBits = SrcVT.getScalarSizeInBits(); + unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; + bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG; - // If some of the sign extended bits are demanded, we know that the sign - // bit is demanded. - if (InBits < DemandedBits.getActiveBits()) - InDemandedBits.setBit(InBits - 1); + // If we only need the bottom element then we can just bitcast. + // TODO: Handle ANY_EXTEND? + if (IsVecInReg && DemandedElts == 1 && + VT.getSizeInBits() == SrcVT.getSizeInBits() && + TLO.DAG.getDataLayout().isLittleEndian()) + return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src)); - if (SimplifyDemandedBits(Src, InDemandedBits, Known, TLO, Depth + 1)) - return true; - assert(!Known.hasConflict() && "Bits known to be one AND zero?"); - // If the sign bit is known one, the top bits match. - Known = Known.sext(BitWidth); - break; - } - case ISD::ANY_EXTEND: { - SDValue Src = Op.getOperand(0); - unsigned InBits = Src.getScalarValueSizeInBits(); APInt InDemandedBits = DemandedBits.trunc(InBits); - if (SimplifyDemandedBits(Src, InDemandedBits, Known, TLO, Depth+1)) + APInt InDemandedElts = DemandedElts.zextOrSelf(InElts); + if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO, + Depth + 1)) return true; assert(!Known.hasConflict() && "Bits known to be one AND zero?"); - Known = Known.zext(BitWidth); + assert(Known.getBitWidth() == InBits && "Src width has changed?"); + Known = Known.zext(BitWidth, false /* => any extend */); break; } case ISD::TRUNCATE: { @@ -1198,29 +1492,29 @@ bool TargetLowering::SimplifyDemandedBits( // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is // undesirable. break; - ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Src.getOperand(1)); - if (!ShAmt) + + auto *ShAmt = dyn_cast<ConstantSDNode>(Src.getOperand(1)); + if (!ShAmt || ShAmt->getAPIntValue().uge(BitWidth)) break; + SDValue Shift = Src.getOperand(1); - if (TLO.LegalTypes()) { - uint64_t ShVal = ShAmt->getZExtValue(); + uint64_t ShVal = ShAmt->getZExtValue(); + + if (TLO.LegalTypes()) Shift = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL)); - } - if (ShAmt->getZExtValue() < BitWidth) { - APInt HighBits = APInt::getHighBitsSet(OperandBitWidth, - OperandBitWidth - BitWidth); - HighBits.lshrInPlace(ShAmt->getZExtValue()); - HighBits = HighBits.trunc(BitWidth); - - if (!(HighBits & DemandedBits)) { - // None of the shifted in bits are needed. Add a truncate of the - // shift input, then shift it. - SDValue NewTrunc = - TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0)); - return TLO.CombineTo( - Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, Shift)); - } + APInt HighBits = + APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth); + HighBits.lshrInPlace(ShVal); + HighBits = HighBits.trunc(BitWidth); + + if (!(HighBits & DemandedBits)) { + // None of the shifted in bits are needed. Add a truncate of the + // shift input, then shift it. + SDValue NewTrunc = + TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0)); + return TLO.CombineTo( + Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, Shift)); } break; } @@ -1234,8 +1528,8 @@ bool TargetLowering::SimplifyDemandedBits( // demanded by its users. EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits()); - if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, - Known, TLO, Depth+1)) + if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known, + TLO, Depth + 1)) return true; assert(!Known.hasConflict() && "Bits known to be one AND zero?"); @@ -1266,7 +1560,7 @@ bool TargetLowering::SimplifyDemandedBits( Known = Known2; if (BitWidth > EltBitWidth) - Known = Known.zext(BitWidth); + Known = Known.zext(BitWidth, false /* => any extend */); break; } case ISD::BITCAST: { @@ -1297,40 +1591,68 @@ bool TargetLowering::SimplifyDemandedBits( TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt)); } } - // If bitcast from a vector, see if we can use SimplifyDemandedVectorElts by - // demanding the element if any bits from it are demanded. + + // Bitcast from a vector using SimplifyDemanded Bits/VectorElts. + // Demand the elt/bit if any of the original elts/bits are demanded. // TODO - bigendian once we have test coverage. // TODO - bool vectors once SimplifyDemandedVectorElts has SETCC support. if (SrcVT.isVector() && NumSrcEltBits > 1 && (BitWidth % NumSrcEltBits) == 0 && TLO.DAG.getDataLayout().isLittleEndian()) { unsigned Scale = BitWidth / NumSrcEltBits; - auto GetDemandedSubMask = [&](APInt &DemandedSubElts) -> bool { - DemandedSubElts = APInt::getNullValue(Scale); - for (unsigned i = 0; i != Scale; ++i) { - unsigned Offset = i * NumSrcEltBits; - APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset); - if (!Sub.isNullValue()) - DemandedSubElts.setBit(i); + unsigned NumSrcElts = SrcVT.getVectorNumElements(); + APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits); + APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts); + for (unsigned i = 0; i != Scale; ++i) { + unsigned Offset = i * NumSrcEltBits; + APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset); + if (!Sub.isNullValue()) { + DemandedSrcBits |= Sub; + for (unsigned j = 0; j != NumElts; ++j) + if (DemandedElts[j]) + DemandedSrcElts.setBit((j * Scale) + i); } + } + + APInt KnownSrcUndef, KnownSrcZero; + if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef, + KnownSrcZero, TLO, Depth + 1)) return true; - }; - APInt DemandedSubElts; - if (GetDemandedSubMask(DemandedSubElts)) { - unsigned NumSrcElts = SrcVT.getVectorNumElements(); - APInt DemandedElts = APInt::getSplat(NumSrcElts, DemandedSubElts); + KnownBits KnownSrcBits; + if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, + KnownSrcBits, TLO, Depth + 1)) + return true; + } else if ((NumSrcEltBits % BitWidth) == 0 && + TLO.DAG.getDataLayout().isLittleEndian()) { + unsigned Scale = NumSrcEltBits / BitWidth; + unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; + APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits); + APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts); + for (unsigned i = 0; i != NumElts; ++i) + if (DemandedElts[i]) { + unsigned Offset = (i % Scale) * BitWidth; + DemandedSrcBits.insertBits(DemandedBits, Offset); + DemandedSrcElts.setBit(i / Scale); + } - APInt KnownUndef, KnownZero; - if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero, - TLO, Depth + 1)) + if (SrcVT.isVector()) { + APInt KnownSrcUndef, KnownSrcZero; + if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef, + KnownSrcZero, TLO, Depth + 1)) return true; } + + KnownBits KnownSrcBits; + if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, + KnownSrcBits, TLO, Depth + 1)) + return true; } + // If this is a bitcast, let computeKnownBits handle it. Only do this on a // recursive call where Known may be useful to the caller. if (Depth > 0) { - Known = TLO.DAG.computeKnownBits(Op, Depth); + Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth); return false; } break; @@ -1343,8 +1665,10 @@ bool TargetLowering::SimplifyDemandedBits( SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1); unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros(); APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ); - if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO, Depth + 1) || - SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO, Depth + 1) || + if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO, + Depth + 1) || + SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO, + Depth + 1) || // See if the operation should be performed at a smaller bit width. ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) { SDNodeFlags Flags = Op.getNode()->getFlags(); @@ -1353,8 +1677,8 @@ bool TargetLowering::SimplifyDemandedBits( // won't wrap after simplification. Flags.setNoSignedWrap(false); Flags.setNoUnsignedWrap(false); - SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, - Flags); + SDValue NewOp = + TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags); return TLO.CombineTo(Op, NewOp); } return true; @@ -1431,15 +1755,64 @@ bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op, DCI.AddToWorklist(Op.getNode()); DCI.CommitTargetLoweringOpt(TLO); } + return Simplified; } +/// Given a vector binary operation and known undefined elements for each input +/// operand, compute whether each element of the output is undefined. +static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG, + const APInt &UndefOp0, + const APInt &UndefOp1) { + EVT VT = BO.getValueType(); + assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() && + "Vector binop only"); + + EVT EltVT = VT.getVectorElementType(); + unsigned NumElts = VT.getVectorNumElements(); + assert(UndefOp0.getBitWidth() == NumElts && + UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis"); + + auto getUndefOrConstantElt = [&](SDValue V, unsigned Index, + const APInt &UndefVals) { + if (UndefVals[Index]) + return DAG.getUNDEF(EltVT); + + if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) { + // Try hard to make sure that the getNode() call is not creating temporary + // nodes. Ignore opaque integers because they do not constant fold. + SDValue Elt = BV->getOperand(Index); + auto *C = dyn_cast<ConstantSDNode>(Elt); + if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque())) + return Elt; + } + + return SDValue(); + }; + + APInt KnownUndef = APInt::getNullValue(NumElts); + for (unsigned i = 0; i != NumElts; ++i) { + // If both inputs for this element are either constant or undef and match + // the element type, compute the constant/undef result for this element of + // the vector. + // TODO: Ideally we would use FoldConstantArithmetic() here, but that does + // not handle FP constants. The code within getNode() should be refactored + // to avoid the danger of creating a bogus temporary node here. + SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0); + SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1); + if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT) + if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef()) + KnownUndef.setBit(i); + } + return KnownUndef; +} + bool TargetLowering::SimplifyDemandedVectorElts( - SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, + SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth, bool AssumeSingleUse) const { EVT VT = Op.getValueType(); - APInt DemandedElts = DemandedEltMask; + APInt DemandedElts = OriginalDemandedElts; unsigned NumElts = DemandedElts.getBitWidth(); assert(VT.isVector() && "Expected vector op"); assert(VT.getVectorNumElements() == NumElts && @@ -1617,7 +1990,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( SDValue Sub = Op.getOperand(1); EVT SubVT = Sub.getValueType(); unsigned NumSubElts = SubVT.getVectorNumElements(); - const APInt& Idx = cast<ConstantSDNode>(Op.getOperand(2))->getAPIntValue(); + const APInt &Idx = Op.getConstantOperandAPInt(2); if (Idx.ugt(NumElts - NumSubElts)) break; unsigned SubIdx = Idx.getZExtValue(); @@ -1786,18 +2159,26 @@ bool TargetLowering::SimplifyDemandedVectorElts( } break; } + case ISD::ANY_EXTEND_VECTOR_INREG: case ISD::SIGN_EXTEND_VECTOR_INREG: case ISD::ZERO_EXTEND_VECTOR_INREG: { APInt SrcUndef, SrcZero; SDValue Src = Op.getOperand(0); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts); - if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, - SrcZero, TLO, Depth + 1)) + if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO, + Depth + 1)) return true; KnownZero = SrcZero.zextOrTrunc(NumElts); KnownUndef = SrcUndef.zextOrTrunc(NumElts); + if (Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG && + Op.getValueSizeInBits() == Src.getValueSizeInBits() && + DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian()) { + // aext - if we just need the bottom element then we can bitcast. + return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src)); + } + if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) { // zext(undef) upper bits are guaranteed to be zero. if (DemandedElts.isSubsetOf(KnownUndef)) @@ -1806,6 +2187,9 @@ bool TargetLowering::SimplifyDemandedVectorElts( } break; } + + // TODO: There are more binop opcodes that could be handled here - MUL, MIN, + // MAX, saturated math, etc. case ISD::OR: case ISD::XOR: case ISD::ADD: @@ -1815,17 +2199,38 @@ bool TargetLowering::SimplifyDemandedVectorElts( case ISD::FMUL: case ISD::FDIV: case ISD::FREM: { - APInt SrcUndef, SrcZero; - if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef, - SrcZero, TLO, Depth + 1)) + APInt UndefRHS, ZeroRHS; + if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS, + ZeroRHS, TLO, Depth + 1)) return true; - if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef, - KnownZero, TLO, Depth + 1)) + APInt UndefLHS, ZeroLHS; + if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS, + ZeroLHS, TLO, Depth + 1)) return true; - KnownZero &= SrcZero; - KnownUndef &= SrcUndef; + + KnownZero = ZeroLHS & ZeroRHS; + KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS); + break; + } + case ISD::SHL: + case ISD::SRL: + case ISD::SRA: + case ISD::ROTL: + case ISD::ROTR: { + APInt UndefRHS, ZeroRHS; + if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS, + ZeroRHS, TLO, Depth + 1)) + return true; + APInt UndefLHS, ZeroLHS; + if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS, + ZeroLHS, TLO, Depth + 1)) + return true; + + KnownZero = ZeroLHS; + KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop? break; } + case ISD::MUL: case ISD::AND: { APInt SrcUndef, SrcZero; if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef, @@ -1837,6 +2242,8 @@ bool TargetLowering::SimplifyDemandedVectorElts( // If either side has a zero element, then the result element is zero, even // if the other is an UNDEF. + // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros + // and then handle 'and' nodes with the rest of the binop opcodes. KnownZero |= SrcZero; KnownUndef &= SrcUndef; KnownUndef &= ~KnownZero; @@ -1864,8 +2271,8 @@ bool TargetLowering::SimplifyDemandedVectorElts( } else { KnownBits Known; APInt DemandedBits = APInt::getAllOnesValue(EltSizeInBits); - if (SimplifyDemandedBits(Op, DemandedBits, DemandedEltMask, Known, TLO, - Depth, AssumeSingleUse)) + if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known, + TLO, Depth, AssumeSingleUse)) return true; } break; @@ -1950,6 +2357,10 @@ bool TargetLowering::SimplifyDemandedBitsForTargetNode( return false; } +const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const { + return nullptr; +} + bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op, const SelectionDAG &DAG, bool SNaN, @@ -2044,10 +2455,9 @@ bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT, /// This helper function of SimplifySetCC tries to optimize the comparison when /// either operand of the SetCC node is a bitwise-and instruction. -SDValue TargetLowering::simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1, - ISD::CondCode Cond, - DAGCombinerInfo &DCI, - const SDLoc &DL) const { +SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, + ISD::CondCode Cond, const SDLoc &DL, + DAGCombinerInfo &DCI) const { // Match these patterns in any of their permutations: // (X & Y) == Y // (X & Y) != Y @@ -2200,6 +2610,49 @@ SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck( return T2; } +/// Try to fold an equality comparison with a {add/sub/xor} binary operation as +/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to +/// handle the commuted versions of these patterns. +SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1, + ISD::CondCode Cond, const SDLoc &DL, + DAGCombinerInfo &DCI) const { + unsigned BOpcode = N0.getOpcode(); + assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) && + "Unexpected binop"); + assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode"); + + // (X + Y) == X --> Y == 0 + // (X - Y) == X --> Y == 0 + // (X ^ Y) == X --> Y == 0 + SelectionDAG &DAG = DCI.DAG; + EVT OpVT = N0.getValueType(); + SDValue X = N0.getOperand(0); + SDValue Y = N0.getOperand(1); + if (X == N1) + return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond); + + if (Y != N1) + return SDValue(); + + // (X + Y) == Y --> X == 0 + // (X ^ Y) == Y --> X == 0 + if (BOpcode == ISD::ADD || BOpcode == ISD::XOR) + return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond); + + // The shift would not be valid if the operands are boolean (i1). + if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1) + return SDValue(); + + // (X - Y) == Y --> X == Y << 1 + EVT ShiftVT = getShiftAmountTy(OpVT, DAG.getDataLayout(), + !DCI.isBeforeLegalize()); + SDValue One = DAG.getConstant(1, DL, ShiftVT); + SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(YShl1.getNode()); + return DAG.getSetCC(DL, VT, X, YShl1, Cond); +} + /// Try to simplify a setcc built with the specified operands and cc. If it is /// unable to simplify it, return a null SDValue. SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, @@ -2209,14 +2662,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, SelectionDAG &DAG = DCI.DAG; EVT OpVT = N0.getValueType(); - // These setcc operations always fold. - switch (Cond) { - default: break; - case ISD::SETFALSE: - case ISD::SETFALSE2: return DAG.getBoolConstant(false, dl, VT, OpVT); - case ISD::SETTRUE: - case ISD::SETTRUE2: return DAG.getBoolConstant(true, dl, VT, OpVT); - } + // Constant fold or commute setcc. + if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl)) + return Fold; // Ensure that the constant occurs on the RHS and fold constant comparisons. // TODO: Handle non-splat vector constants. All undef causes trouble. @@ -2226,6 +2674,17 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, isCondCodeLegal(SwappedCC, N0.getSimpleValueType()))) return DAG.getSetCC(dl, VT, N1, N0, SwappedCC); + // If we have a subtract with the same 2 non-constant operands as this setcc + // -- but in reverse order -- then try to commute the operands of this setcc + // to match. A matching pair of setcc (cmp) and sub may be combined into 1 + // instruction on some targets. + if (!isConstOrConstSplat(N0) && !isConstOrConstSplat(N1) && + (DCI.isBeforeLegalizeOps() || + isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) && + DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N1, N0 } ) && + !DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N0, N1 } )) + return DAG.getSetCC(dl, VT, N1, N0, SwappedCC); + if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { const APInt &C1 = N1C->getAPIntValue(); @@ -2235,8 +2694,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) && N0.getOperand(0).getOpcode() == ISD::CTLZ && N0.getOperand(1).getOpcode() == ISD::Constant) { - const APInt &ShAmt - = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + const APInt &ShAmt = N0.getConstantOperandAPInt(1); if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && ShAmt == Log2_32(N0.getValueSizeInBits())) { if ((C1 == 0) == (Cond == ISD::SETEQ)) { @@ -2275,7 +2733,21 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, dl, CTVT), CC); } - // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal. + // If ctpop is not supported, expand a power-of-2 comparison based on it. + if (C1 == 1 && !isOperationLegalOrCustom(ISD::CTPOP, CTVT) && + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0) + // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0) + SDValue Zero = DAG.getConstant(0, dl, CTVT); + SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT); + ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, true); + SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne); + SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add); + SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond); + SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond); + unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR; + return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS); + } } // (zext x) == C --> x == (trunc C) @@ -2387,8 +2859,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // 8 bits, but have to be careful... if (Lod->getExtensionType() != ISD::NON_EXTLOAD) origWidth = Lod->getMemoryVT().getSizeInBits(); - const APInt &Mask = - cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + const APInt &Mask = N0.getConstantOperandAPInt(1); for (unsigned width = origWidth / 2; width>=8; width /= 2) { APInt newMask = APInt::getLowBitsSet(maskWidth, width); for (unsigned offset=0; offset<origWidth/width; offset++) { @@ -2480,7 +2951,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, break; } default: - break; // todo, be more careful with signed comparisons + break; // todo, be more careful with signed comparisons } } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { @@ -2501,7 +2972,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } else { APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits); ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0), - DAG.getConstant(Imm, dl, Op0Ty)); + DAG.getConstant(Imm, dl, Op0Ty)); } if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(ZextOp.getNode()); @@ -2598,6 +3069,18 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } + // Given: + // icmp eq/ne (urem %x, %y), 0 + // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem': + // icmp eq/ne %x, 0 + if (N0.getOpcode() == ISD::UREM && N1C->isNullValue() && + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0)); + KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1)); + if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2) + return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond); + } + if (SDValue V = optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl)) return V; @@ -2805,25 +3288,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } - if (isa<ConstantFPSDNode>(N0.getNode())) { - // Constant fold or commute setcc. - SDValue O = DAG.FoldSetCC(VT, N0, N1, Cond, dl); - if (O.getNode()) return O; - } else if (auto *CFP = dyn_cast<ConstantFPSDNode>(N1.getNode())) { - // If the RHS of an FP comparison is a constant, simplify it away in - // some cases. - if (CFP->getValueAPF().isNaN()) { - // If an operand is known to be a nan, we can fold it. - switch (ISD::getUnorderedFlavor(Cond)) { - default: llvm_unreachable("Unknown flavor!"); - case 0: // Known false. - return DAG.getBoolConstant(false, dl, VT, OpVT); - case 1: // Known true. - return DAG.getBoolConstant(true, dl, VT, OpVT); - case 2: // Undefined. - return DAG.getUNDEF(VT); - } - } + if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) { + auto *CFP = cast<ConstantFPSDNode>(N1); + assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value"); // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the // constant if knowing that the operand is non-nan is enough. We prefer to @@ -2883,15 +3350,12 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (N0 == N1) { // The sext(setcc()) => setcc() optimization relies on the appropriate // constant being emitted. + assert(!N0.getValueType().isInteger() && + "Integer types should be handled by FoldSetCC"); bool EqTrue = ISD::isTrueWhenEqual(Cond); - - // We can always fold X == X for integer setcc's. - if (N0.getValueType().isInteger()) - return DAG.getBoolConstant(EqTrue, dl, VT, OpVT); - unsigned UOF = ISD::getUnorderedFlavor(Cond); - if (UOF == 2) // FP operators that are undefined on NaNs. + if (UOF == 2) // FP operators that are undefined on NaNs. return DAG.getBoolConstant(EqTrue, dl, VT, OpVT); if (UOF == unsigned(EqTrue)) return DAG.getBoolConstant(EqTrue, dl, VT, OpVT); @@ -2900,7 +3364,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO; if (NewCond != Cond && (DCI.isBeforeLegalizeOps() || - isCondCodeLegal(NewCond, N0.getSimpleValueType()))) + isCondCodeLegal(NewCond, N0.getSimpleValueType()))) return DAG.getSetCC(dl, VT, N0, N1, NewCond); } @@ -2969,69 +3433,39 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue()); } - // Simplify (X+Z) == X --> Z == 0 + // (X+Y) == X --> Y == 0 and similar folds. // Don't do this if X is an immediate that can fold into a cmp - // instruction and X+Z has other uses. It could be an induction variable + // instruction and X+Y has other uses. It could be an induction variable // chain, and the transform would increase register pressure. - if (!LegalRHSImm || N0.getNode()->hasOneUse()) { - if (N0.getOperand(0) == N1) - return DAG.getSetCC(dl, VT, N0.getOperand(1), - DAG.getConstant(0, dl, N0.getValueType()), Cond); - if (N0.getOperand(1) == N1) { - if (isCommutativeBinOp(N0.getOpcode())) - return DAG.getSetCC(dl, VT, N0.getOperand(0), - DAG.getConstant(0, dl, N0.getValueType()), - Cond); - if (N0.getNode()->hasOneUse()) { - assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!"); - auto &DL = DAG.getDataLayout(); - // (Z-X) == X --> Z == X<<1 - SDValue SH = DAG.getNode( - ISD::SHL, dl, N1.getValueType(), N1, - DAG.getConstant(1, dl, - getShiftAmountTy(N1.getValueType(), DL, - !DCI.isBeforeLegalize()))); - if (!DCI.isCalledByLegalizer()) - DCI.AddToWorklist(SH.getNode()); - return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond); - } - } - } + if (!LegalRHSImm || N0.hasOneUse()) + if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI)) + return V; } if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB || - N1.getOpcode() == ISD::XOR) { - // Simplify X == (X+Z) --> Z == 0 - if (N1.getOperand(0) == N0) - return DAG.getSetCC(dl, VT, N1.getOperand(1), - DAG.getConstant(0, dl, N1.getValueType()), Cond); - if (N1.getOperand(1) == N0) { - if (isCommutativeBinOp(N1.getOpcode())) - return DAG.getSetCC(dl, VT, N1.getOperand(0), - DAG.getConstant(0, dl, N1.getValueType()), Cond); - if (N1.getNode()->hasOneUse()) { - assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!"); - auto &DL = DAG.getDataLayout(); - // X == (Z-X) --> X<<1 == Z - SDValue SH = DAG.getNode( - ISD::SHL, dl, N1.getValueType(), N0, - DAG.getConstant(1, dl, getShiftAmountTy(N0.getValueType(), DL, - !DCI.isBeforeLegalize()))); - if (!DCI.isCalledByLegalizer()) - DCI.AddToWorklist(SH.getNode()); - return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond); - } - } - } + N1.getOpcode() == ISD::XOR) + if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI)) + return V; - if (SDValue V = simplifySetCCWithAnd(VT, N0, N1, Cond, DCI, dl)) + if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI)) return V; } + // Fold remainder of division by a constant. + if (N0.getOpcode() == ISD::UREM && N0.hasOneUse() && + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); + + // When division is cheap or optimizing for minimum size, + // fall through to DIVREM creation by skipping this fold. + if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) + if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl)) + return Folded; + } + // Fold away ALL boolean setcc's. - SDValue Temp; if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) { - EVT OpVT = N0.getValueType(); + SDValue Temp; switch (Cond) { default: llvm_unreachable("Unknown integer setcc!"); case ISD::SETEQ: // X == Y -> ~(X^Y) @@ -3134,18 +3568,18 @@ TargetLowering::getConstraintType(StringRef Constraint) const { switch (Constraint[0]) { default: break; case 'r': return C_RegisterClass; - case 'm': // memory - case 'o': // offsetable - case 'V': // not offsetable + case 'm': // memory + case 'o': // offsetable + case 'V': // not offsetable return C_Memory; - case 'i': // Simple Integer or Relocatable Constant - case 'n': // Simple Integer - case 'E': // Floating Point Constant - case 'F': // Floating Point Constant - case 's': // Relocatable Constant - case 'p': // Address. - case 'X': // Allow ANY value. - case 'I': // Target registers. + case 'i': // Simple Integer or Relocatable Constant + case 'n': // Simple Integer + case 'E': // Floating Point Constant + case 'F': // Floating Point Constant + case 's': // Relocatable Constant + case 'p': // Address. + case 'X': // Allow ANY value. + case 'I': // Target registers. case 'J': case 'K': case 'L': @@ -3159,7 +3593,7 @@ TargetLowering::getConstraintType(StringRef Constraint) const { } } - if (S > 1 && Constraint[0] == '{' && Constraint[S-1] == '}') { + if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') { if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}" return C_Memory; return C_Register; @@ -3170,14 +3604,20 @@ TargetLowering::getConstraintType(StringRef Constraint) const { /// Try to replace an X constraint, which matches anything, with another that /// has more specific requirements based on the type of the corresponding /// operand. -const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{ +const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const { if (ConstraintVT.isInteger()) return "r"; if (ConstraintVT.isFloatingPoint()) - return "f"; // works for many targets + return "f"; // works for many targets return nullptr; } +SDValue TargetLowering::LowerAsmOutputForConstraint( + SDValue &Chain, SDValue &Flag, SDLoc DL, const AsmOperandInfo &OpInfo, + SelectionDAG &DAG) const { + return SDValue(); +} + /// Lower the specified operand into the Ops vector. /// If it is invalid, don't add anything to Ops. void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, @@ -3191,7 +3631,8 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, switch (ConstraintLetter) { default: break; case 'X': // Allows any operand; labels (basic block) use this. - if (Op.getOpcode() == ISD::BasicBlock) { + if (Op.getOpcode() == ISD::BasicBlock || + Op.getOpcode() == ISD::TargetBlockAddress) { Ops.push_back(Op); return; } @@ -3199,46 +3640,57 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, case 'i': // Simple Integer or Relocatable Constant case 'n': // Simple Integer case 's': { // Relocatable Constant - // These operands are interested in values of the form (GV+C), where C may - // be folded in as an offset of GV, or it may be explicitly added. Also, it - // is possible and fine if either GV or C are missing. - ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); - GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op); - - // If we have "(add GV, C)", pull out GV/C - if (Op.getOpcode() == ISD::ADD) { - C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); - GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); - if (!C || !GA) { - C = dyn_cast<ConstantSDNode>(Op.getOperand(0)); - GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(1)); - } - if (!C || !GA) { - C = nullptr; - GA = nullptr; - } - } - // If we find a valid operand, map to the TargetXXX version so that the - // value itself doesn't get selected. - if (GA) { // Either &GV or &GV+C - if (ConstraintLetter != 'n') { - int64_t Offs = GA->getOffset(); - if (C) Offs += C->getZExtValue(); - Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), - C ? SDLoc(C) : SDLoc(), - Op.getValueType(), Offs)); - } - return; - } - if (C) { // just C, no GV. - // Simple constants are not allowed for 's'. - if (ConstraintLetter != 's') { + GlobalAddressSDNode *GA; + ConstantSDNode *C; + BlockAddressSDNode *BA; + uint64_t Offset = 0; + + // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C), + // etc., since getelementpointer is variadic. We can't use + // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible + // while in this case the GA may be furthest from the root node which is + // likely an ISD::ADD. + while (1) { + if ((GA = dyn_cast<GlobalAddressSDNode>(Op)) && ConstraintLetter != 'n') { + Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op), + GA->getValueType(0), + Offset + GA->getOffset())); + return; + } else if ((C = dyn_cast<ConstantSDNode>(Op)) && + ConstraintLetter != 's') { // gcc prints these as sign extended. Sign extend value to 64 bits // now; without this it would get ZExt'd later in // ScheduleDAGSDNodes::EmitNode, which is very generic. - Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), + bool IsBool = C->getConstantIntValue()->getBitWidth() == 1; + BooleanContent BCont = getBooleanContents(MVT::i64); + ISD::NodeType ExtOpc = IsBool ? getExtendForContent(BCont) + : ISD::SIGN_EXTEND; + int64_t ExtVal = ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() + : C->getSExtValue(); + Ops.push_back(DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64)); + return; + } else if ((BA = dyn_cast<BlockAddressSDNode>(Op)) && + ConstraintLetter != 'n') { + Ops.push_back(DAG.getTargetBlockAddress( + BA->getBlockAddress(), BA->getValueType(0), + Offset + BA->getOffset(), BA->getTargetFlags())); + return; + } else { + const unsigned OpCode = Op.getOpcode(); + if (OpCode == ISD::ADD || OpCode == ISD::SUB) { + if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0)))) + Op = Op.getOperand(1); + // Subtraction is not commutative. + else if (OpCode == ISD::ADD && + (C = dyn_cast<ConstantSDNode>(Op.getOperand(1)))) + Op = Op.getOperand(0); + else + return; + Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue(); + continue; + } } return; } @@ -3252,14 +3704,14 @@ TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI, StringRef Constraint, MVT VT) const { if (Constraint.empty() || Constraint[0] != '{') - return std::make_pair(0u, static_cast<TargetRegisterClass*>(nullptr)); - assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?"); + return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr)); + assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?"); // Remove the braces from around the name. - StringRef RegName(Constraint.data()+1, Constraint.size()-2); + StringRef RegName(Constraint.data() + 1, Constraint.size() - 2); - std::pair<unsigned, const TargetRegisterClass*> R = - std::make_pair(0u, static_cast<const TargetRegisterClass*>(nullptr)); + std::pair<unsigned, const TargetRegisterClass *> R = + std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr)); // Figure out which register class contains this reg. for (const TargetRegisterClass *RC : RI->regclasses()) { @@ -3271,8 +3723,8 @@ TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI, for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I != E; ++I) { if (RegName.equals_lower(RI->getRegAsmName(*I))) { - std::pair<unsigned, const TargetRegisterClass*> S = - std::make_pair(*I, RC); + std::pair<unsigned, const TargetRegisterClass *> S = + std::make_pair(*I, RC); // If this register class has the requested value type, return it, // otherwise keep searching and return the first class found @@ -3321,8 +3773,8 @@ TargetLowering::ParseConstraints(const DataLayout &DL, // Do a prepass over the constraints, canonicalizing them, and building up the // ConstraintOperands list. - unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. - unsigned ResNo = 0; // ResNo - The result number of the next output. + unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. + unsigned ResNo = 0; // ResNo - The result number of the next output. for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) { ConstraintOperands.emplace_back(std::move(CI)); @@ -3391,7 +3843,7 @@ TargetLowering::ParseConstraints(const DataLayout &DL, case 64: case 128: OpInfo.ConstraintVT = - MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true); + MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true); break; } } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) { @@ -3416,8 +3868,8 @@ TargetLowering::ParseConstraints(const DataLayout &DL, for (maIndex = 0; maIndex < maCount; ++maIndex) { int weightSum = 0; for (unsigned cIndex = 0, eIndex = ConstraintOperands.size(); - cIndex != eIndex; ++cIndex) { - AsmOperandInfo& OpInfo = ConstraintOperands[cIndex]; + cIndex != eIndex; ++cIndex) { + AsmOperandInfo &OpInfo = ConstraintOperands[cIndex]; if (OpInfo.Type == InlineAsm::isClobber) continue; @@ -3432,7 +3884,7 @@ TargetLowering::ParseConstraints(const DataLayout &DL, Input.ConstraintVT.isInteger()) || (OpInfo.ConstraintVT.getSizeInBits() != Input.ConstraintVT.getSizeInBits())) { - weightSum = -1; // Can't match. + weightSum = -1; // Can't match. break; } } @@ -3453,8 +3905,8 @@ TargetLowering::ParseConstraints(const DataLayout &DL, // Now select chosen alternative in each constraint. for (unsigned cIndex = 0, eIndex = ConstraintOperands.size(); - cIndex != eIndex; ++cIndex) { - AsmOperandInfo& cInfo = ConstraintOperands[cIndex]; + cIndex != eIndex; ++cIndex) { + AsmOperandInfo &cInfo = ConstraintOperands[cIndex]; if (cInfo.Type == InlineAsm::isClobber) continue; cInfo.selectAlternative(bestMAIndex); @@ -3464,8 +3916,8 @@ TargetLowering::ParseConstraints(const DataLayout &DL, // Check and hook up tied operands, choose constraint code to use. for (unsigned cIndex = 0, eIndex = ConstraintOperands.size(); - cIndex != eIndex; ++cIndex) { - AsmOperandInfo& OpInfo = ConstraintOperands[cIndex]; + cIndex != eIndex; ++cIndex) { + AsmOperandInfo &OpInfo = ConstraintOperands[cIndex]; // If this is an output operand with a matching input operand, look up the // matching input. If their types mismatch, e.g. one is an integer, the @@ -3577,9 +4029,9 @@ TargetLowering::ConstraintWeight weight = CW_Register; break; case 'X': // any operand. - default: - weight = CW_Default; - break; + default: + weight = CW_Default; + break; } return weight; } @@ -3678,6 +4130,9 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, return; } + if (Op.getNode() && Op.getOpcode() == ISD::TargetBlockAddress) + return; + // Otherwise, try to resolve it to something we know about by looking at // the actual operand type. if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) { @@ -3749,12 +4204,12 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, } SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, - SelectionDAG &DAG, - SmallVectorImpl<SDNode *> &Created) const { + SelectionDAG &DAG, + SmallVectorImpl<SDNode *> &Created) const { AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (TLI.isIntDivCheap(N->getValueType(0), Attr)) - return SDValue(N,0); // Lower SDIV as SDIV + return SDValue(N, 0); // Lower SDIV as SDIV return SDValue(); } @@ -4000,6 +4455,104 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, return DAG.getSelect(dl, VT, IsOne, N0, Q); } +/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE +/// where the divisor is constant and the comparison target is zero, +/// return a DAG expression that will generate the same comparison result +/// using only multiplications, additions and shifts/rotations. +/// Ref: "Hacker's Delight" 10-17. +SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode, + SDValue CompTargetNode, + ISD::CondCode Cond, + DAGCombinerInfo &DCI, + const SDLoc &DL) const { + SmallVector<SDNode *, 2> Built; + if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond, + DCI, DL, Built)) { + for (SDNode *N : Built) + DCI.AddToWorklist(N); + return Folded; + } + + return SDValue(); +} + +SDValue +TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, + SDValue CompTargetNode, ISD::CondCode Cond, + DAGCombinerInfo &DCI, const SDLoc &DL, + SmallVectorImpl<SDNode *> &Created) const { + // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q) + // - D must be constant with D = D0 * 2^K where D0 is odd and D0 != 1 + // - P is the multiplicative inverse of D0 modulo 2^W + // - Q = floor((2^W - 1) / D0) + // where W is the width of the common type of N and D. + assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + "Only applicable for (in)equality comparisons."); + + EVT VT = REMNode.getValueType(); + + // If MUL is unavailable, we cannot proceed in any case. + if (!isOperationLegalOrCustom(ISD::MUL, VT)) + return SDValue(); + + // TODO: Add non-uniform constant support. + ConstantSDNode *Divisor = isConstOrConstSplat(REMNode->getOperand(1)); + ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode); + if (!Divisor || !CompTarget || Divisor->isNullValue() || + !CompTarget->isNullValue()) + return SDValue(); + + const APInt &D = Divisor->getAPIntValue(); + + // Decompose D into D0 * 2^K + unsigned K = D.countTrailingZeros(); + bool DivisorIsEven = (K != 0); + APInt D0 = D.lshr(K); + + // The fold is invalid when D0 == 1. + // This is reachable because visitSetCC happens before visitREM. + if (D0.isOneValue()) + return SDValue(); + + // P = inv(D0, 2^W) + // 2^W requires W + 1 bits, so we have to extend and then truncate. + unsigned W = D.getBitWidth(); + APInt P = D0.zext(W + 1) + .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) + .trunc(W); + assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable + assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check."); + + // Q = floor((2^W - 1) / D) + APInt Q = APInt::getAllOnesValue(W).udiv(D); + + SelectionDAG &DAG = DCI.DAG; + + SDValue PVal = DAG.getConstant(P, DL, VT); + SDValue QVal = DAG.getConstant(Q, DL, VT); + // (mul N, P) + SDValue Op1 = DAG.getNode(ISD::MUL, DL, VT, REMNode->getOperand(0), PVal); + Created.push_back(Op1.getNode()); + + // Rotate right only if D was even. + if (DivisorIsEven) { + // We need ROTR to do this. + if (!isOperationLegalOrCustom(ISD::ROTR, VT)) + return SDValue(); + SDValue ShAmt = + DAG.getConstant(K, DL, getShiftAmountTy(VT, DAG.getDataLayout())); + SDNodeFlags Flags; + Flags.setExact(true); + // UREM: (rotr (mul N, P), K) + Op1 = DAG.getNode(ISD::ROTR, DL, VT, Op1, ShAmt, Flags); + Created.push_back(Op1.getNode()); + } + + // UREM: (setule/setugt (rotr (mul N, P), K), Q) + return DAG.getSetCC(DL, SETCCVT, Op1, QVal, + ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT)); +} + bool TargetLowering:: verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const { if (!isa<ConstantSDNode>(Op.getOperand(0))) { @@ -4308,7 +4861,7 @@ bool TargetLowering::expandROT(SDNode *Node, SDValue &Result, } bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, - SelectionDAG &DAG) const { + SelectionDAG &DAG) const { SDValue Src = Node->getOperand(0); EVT SrcVT = Src.getValueType(); EVT DstVT = Node->getValueType(0); @@ -4320,7 +4873,7 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, // Expand f32 -> i64 conversion // This algorithm comes from compiler-rt's implementation of fixsfdi: - // https://github.com/llvm-mirror/compiler-rt/blob/master/lib/builtins/fixsfdi.c + // https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c unsigned SrcEltBits = SrcVT.getScalarSizeInBits(); EVT IntVT = SrcVT.changeTypeToInteger(); EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout()); @@ -4544,6 +5097,17 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node, return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags()); } + // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that + // instead if there are no NaNs. + if (Node->getFlags().hasNoNaNs()) { + unsigned IEEE2018Op = + Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM; + if (isOperationLegalOrCustom(IEEE2018Op, VT)) { + return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0), + Node->getOperand(1), Node->getFlags()); + } + } + return SDValue(); } @@ -4771,7 +5335,7 @@ SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains); SDValue Value = DAG.getBuildVector(LD->getValueType(0), SL, Vals); - return DAG.getMergeValues({ Value, NewChain }, SL); + return DAG.getMergeValues({Value, NewChain}, SL); } SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST, @@ -4826,7 +5390,7 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST, // Store Stride in bytes unsigned Stride = MemSclVT.getSizeInBits() / 8; - assert (Stride && "Zero stride!"); + assert(Stride && "Zero stride!"); // Extract each of the elements from the original vector and save them into // memory individually. SmallVector<SDValue, 8> Stores; @@ -5013,17 +5577,16 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, EVT VT = Val.getValueType(); int Alignment = ST->getAlignment(); auto &MF = DAG.getMachineFunction(); - EVT MemVT = ST->getMemoryVT(); + EVT StoreMemVT = ST->getMemoryVT(); SDLoc dl(ST); - if (MemVT.isFloatingPoint() || MemVT.isVector()) { + if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) { EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); if (isTypeLegal(intVT)) { if (!isOperationLegalOrCustom(ISD::STORE, intVT) && - MemVT.isVector()) { + StoreMemVT.isVector()) { // Scalarize the store and let the individual components be handled. SDValue Result = scalarizeVectorStore(ST, DAG); - return Result; } // Expand to a bitconvert of the value to the integer type of the @@ -5036,24 +5599,22 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, } // Do a (aligned) store to a stack slot, then copy from the stack slot // to the final destination using (unaligned) integer loads and stores. - EVT StoredVT = ST->getMemoryVT(); - MVT RegVT = - getRegisterType(*DAG.getContext(), - EVT::getIntegerVT(*DAG.getContext(), - StoredVT.getSizeInBits())); + MVT RegVT = getRegisterType( + *DAG.getContext(), + EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits())); EVT PtrVT = Ptr.getValueType(); - unsigned StoredBytes = StoredVT.getStoreSize(); + unsigned StoredBytes = StoreMemVT.getStoreSize(); unsigned RegBytes = RegVT.getSizeInBits() / 8; unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes; // Make sure the stack slot is also aligned for the register type. - SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT); + SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT); auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); // Perform the original store, only redirected to the stack slot. SDValue Store = DAG.getTruncStore( Chain, dl, Val, StackPtr, - MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoredVT); + MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT); EVT StackPtrVT = StackPtr.getValueType(); @@ -5082,17 +5643,17 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, // The last store may be partial. Do a truncating store. On big-endian // machines this requires an extending load from the stack slot to ensure // that the bits are in the right place. - EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), - 8 * (StoredBytes - Offset)); + EVT LoadMemVT = + EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset)); // Load from the stack slot. SDValue Load = DAG.getExtLoad( ISD::EXTLOAD, dl, RegVT, Store, StackPtr, - MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT); + MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT); Stores.push_back( DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr, - ST->getPointerInfo().getWithOffset(Offset), MemVT, + ST->getPointerInfo().getWithOffset(Offset), LoadMemVT, MinAlign(ST->getAlignment(), Offset), ST->getMemOperand()->getFlags(), ST->getAAInfo())); // The order of the stores doesn't matter - say it with a TokenFactor. @@ -5100,18 +5661,16 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, return Result; } - assert(ST->getMemoryVT().isInteger() && - !ST->getMemoryVT().isVector() && + assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() && "Unaligned store of unknown type."); // Get the half-size VT - EVT NewStoredVT = ST->getMemoryVT().getHalfSizedIntegerVT(*DAG.getContext()); + EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext()); int NumBits = NewStoredVT.getSizeInBits(); int IncrementSize = NumBits / 8; // Divide the stored value in two parts. - SDValue ShiftAmount = - DAG.getConstant(NumBits, dl, getShiftAmountTy(Val.getValueType(), - DAG.getDataLayout())); + SDValue ShiftAmount = DAG.getConstant( + NumBits, dl, getShiftAmountTy(Val.getValueType(), DAG.getDataLayout())); SDValue Lo = Val; SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount); @@ -5130,7 +5689,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, ST->getMemOperand()->getFlags(), ST->getAAInfo()); SDValue Result = - DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); + DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); return Result; } @@ -5242,7 +5801,7 @@ SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, // TLSADDR will be codegen'ed as call. Inform MFI that function has calls. // At last for X86 targets, maybe good for other targets too? MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); - MFI.setAdjustsStack(true); // Is this only for X86 target? + MFI.setAdjustsStack(true); // Is this only for X86 target? MFI.setHasCalls(true); assert((GA->getOffset() == 0) && @@ -5282,15 +5841,19 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const { EVT VT = LHS.getValueType(); SDLoc dl(Node); + assert(VT == RHS.getValueType() && "Expected operands to be the same type"); + assert(VT.isInteger() && "Expected operands to be integers"); + // usub.sat(a, b) -> umax(a, b) - b if (Opcode == ISD::USUBSAT && isOperationLegalOrCustom(ISD::UMAX, VT)) { SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS); return DAG.getNode(ISD::SUB, dl, VT, Max, RHS); } - if (VT.isVector()) { - // TODO: Consider not scalarizing here. - return SDValue(); + if (Opcode == ISD::UADDSAT && isOperationLegalOrCustom(ISD::UMIN, VT)) { + SDValue InvRHS = DAG.getNOT(dl, RHS, VT); + SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS); + return DAG.getNode(ISD::ADD, dl, VT, Min, RHS); } unsigned OverflowOp; @@ -5312,96 +5875,410 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const { "addition or subtraction node."); } - assert(LHS.getValueType().isScalarInteger() && - "Expected operands to be integers. Vector of int arguments should " - "already be unrolled."); - assert(RHS.getValueType().isScalarInteger() && - "Expected operands to be integers. Vector of int arguments should " - "already be unrolled."); - assert(LHS.getValueType() == RHS.getValueType() && - "Expected both operands to be the same type"); - - unsigned BitWidth = LHS.getValueSizeInBits(); - EVT ResultType = LHS.getValueType(); - EVT BoolVT = - getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ResultType); - SDValue Result = - DAG.getNode(OverflowOp, dl, DAG.getVTList(ResultType, BoolVT), LHS, RHS); + unsigned BitWidth = LHS.getScalarValueSizeInBits(); + EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), + LHS, RHS); SDValue SumDiff = Result.getValue(0); SDValue Overflow = Result.getValue(1); - SDValue Zero = DAG.getConstant(0, dl, ResultType); + SDValue Zero = DAG.getConstant(0, dl, VT); + SDValue AllOnes = DAG.getAllOnesConstant(dl, VT); if (Opcode == ISD::UADDSAT) { - // Just need to check overflow for SatMax. - APInt MaxVal = APInt::getMaxValue(BitWidth); - SDValue SatMax = DAG.getConstant(MaxVal, dl, ResultType); - return DAG.getSelect(dl, ResultType, Overflow, SatMax, SumDiff); + if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) { + // (LHS + RHS) | OverflowMask + SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT); + return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask); + } + // Overflow ? 0xffff.... : (LHS + RHS) + return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff); } else if (Opcode == ISD::USUBSAT) { - // Just need to check overflow for SatMin. - APInt MinVal = APInt::getMinValue(BitWidth); - SDValue SatMin = DAG.getConstant(MinVal, dl, ResultType); - return DAG.getSelect(dl, ResultType, Overflow, SatMin, SumDiff); + if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) { + // (LHS - RHS) & ~OverflowMask + SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT); + SDValue Not = DAG.getNOT(dl, OverflowMask, VT); + return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not); + } + // Overflow ? 0 : (LHS - RHS) + return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff); } else { // SatMax -> Overflow && SumDiff < 0 // SatMin -> Overflow && SumDiff >= 0 APInt MinVal = APInt::getSignedMinValue(BitWidth); APInt MaxVal = APInt::getSignedMaxValue(BitWidth); - SDValue SatMin = DAG.getConstant(MinVal, dl, ResultType); - SDValue SatMax = DAG.getConstant(MaxVal, dl, ResultType); + SDValue SatMin = DAG.getConstant(MinVal, dl, VT); + SDValue SatMax = DAG.getConstant(MaxVal, dl, VT); SDValue SumNeg = DAG.getSetCC(dl, BoolVT, SumDiff, Zero, ISD::SETLT); - Result = DAG.getSelect(dl, ResultType, SumNeg, SatMax, SatMin); - return DAG.getSelect(dl, ResultType, Overflow, Result, SumDiff); + Result = DAG.getSelect(dl, VT, SumNeg, SatMax, SatMin); + return DAG.getSelect(dl, VT, Overflow, Result, SumDiff); } } SDValue -TargetLowering::getExpandedFixedPointMultiplication(SDNode *Node, - SelectionDAG &DAG) const { - assert(Node->getOpcode() == ISD::SMULFIX && "Expected opcode to be SMULFIX."); - assert(Node->getNumOperands() == 3 && - "Expected signed fixed point multiplication to have 3 operands."); +TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const { + assert((Node->getOpcode() == ISD::SMULFIX || + Node->getOpcode() == ISD::UMULFIX || + Node->getOpcode() == ISD::SMULFIXSAT) && + "Expected a fixed point multiplication opcode"); SDLoc dl(Node); SDValue LHS = Node->getOperand(0); SDValue RHS = Node->getOperand(1); - assert(LHS.getValueType().isScalarInteger() && - "Expected operands to be integers. Vector of int arguments should " - "already be unrolled."); - assert(RHS.getValueType().isScalarInteger() && - "Expected operands to be integers. Vector of int arguments should " - "already be unrolled."); - assert(LHS.getValueType() == RHS.getValueType() && - "Expected both operands to be the same type"); - - unsigned Scale = Node->getConstantOperandVal(2); EVT VT = LHS.getValueType(); - assert(Scale < VT.getScalarSizeInBits() && - "Expected scale to be less than the number of bits."); + unsigned Scale = Node->getConstantOperandVal(2); + bool Saturating = Node->getOpcode() == ISD::SMULFIXSAT; + EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + unsigned VTSize = VT.getScalarSizeInBits(); + + if (!Scale) { + // [us]mul.fix(a, b, 0) -> mul(a, b) + if (!Saturating && isOperationLegalOrCustom(ISD::MUL, VT)) { + return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS); + } else if (Saturating && isOperationLegalOrCustom(ISD::SMULO, VT)) { + SDValue Result = + DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS); + SDValue Product = Result.getValue(0); + SDValue Overflow = Result.getValue(1); + SDValue Zero = DAG.getConstant(0, dl, VT); + + APInt MinVal = APInt::getSignedMinValue(VTSize); + APInt MaxVal = APInt::getSignedMaxValue(VTSize); + SDValue SatMin = DAG.getConstant(MinVal, dl, VT); + SDValue SatMax = DAG.getConstant(MaxVal, dl, VT); + SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT); + Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin); + return DAG.getSelect(dl, VT, Overflow, Result, Product); + } + } - if (!Scale) - return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS); + bool Signed = + Node->getOpcode() == ISD::SMULFIX || Node->getOpcode() == ISD::SMULFIXSAT; + assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) && + "Expected scale to be less than the number of bits if signed or at " + "most the number of bits if unsigned."); + assert(LHS.getValueType() == RHS.getValueType() && + "Expected both operands to be the same type"); // Get the upper and lower bits of the result. SDValue Lo, Hi; - if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) { - SDValue Result = - DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), LHS, RHS); + unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI; + unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU; + if (isOperationLegalOrCustom(LoHiOp, VT)) { + SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS); Lo = Result.getValue(0); Hi = Result.getValue(1); - } else if (isOperationLegalOrCustom(ISD::MULHS, VT)) { + } else if (isOperationLegalOrCustom(HiOp, VT)) { Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS); - Hi = DAG.getNode(ISD::MULHS, dl, VT, LHS, RHS); + Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS); + } else if (VT.isVector()) { + return SDValue(); } else { - report_fatal_error("Unable to expand signed fixed point multiplication."); + report_fatal_error("Unable to expand fixed point multiplication."); } + if (Scale == VTSize) + // Result is just the top half since we'd be shifting by the width of the + // operand. + return Hi; + // The result will need to be shifted right by the scale since both operands // are scaled. The result is given to us in 2 halves, so we only want part of // both in the result. EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout()); - Lo = DAG.getNode(ISD::SRL, dl, VT, Lo, DAG.getConstant(Scale, dl, ShiftTy)); - Hi = DAG.getNode( - ISD::SHL, dl, VT, Hi, - DAG.getConstant(VT.getScalarSizeInBits() - Scale, dl, ShiftTy)); - return DAG.getNode(ISD::OR, dl, VT, Lo, Hi); + SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo, + DAG.getConstant(Scale, dl, ShiftTy)); + if (!Saturating) + return Result; + + unsigned OverflowBits = VTSize - Scale + 1; // +1 for the sign + SDValue HiMask = + DAG.getConstant(APInt::getHighBitsSet(VTSize, OverflowBits), dl, VT); + SDValue LoMask = DAG.getConstant( + APInt::getLowBitsSet(VTSize, VTSize - OverflowBits), dl, VT); + APInt MaxVal = APInt::getSignedMaxValue(VTSize); + APInt MinVal = APInt::getSignedMinValue(VTSize); + + Result = DAG.getSelectCC(dl, Hi, LoMask, + DAG.getConstant(MaxVal, dl, VT), Result, + ISD::SETGT); + return DAG.getSelectCC(dl, Hi, HiMask, + DAG.getConstant(MinVal, dl, VT), Result, + ISD::SETLT); +} + +void TargetLowering::expandUADDSUBO( + SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const { + SDLoc dl(Node); + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + bool IsAdd = Node->getOpcode() == ISD::UADDO; + + // If ADD/SUBCARRY is legal, use that instead. + unsigned OpcCarry = IsAdd ? ISD::ADDCARRY : ISD::SUBCARRY; + if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) { + SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1)); + SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(), + { LHS, RHS, CarryIn }); + Result = SDValue(NodeCarry.getNode(), 0); + Overflow = SDValue(NodeCarry.getNode(), 1); + return; + } + + Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl, + LHS.getValueType(), LHS, RHS); + + EVT ResultType = Node->getValueType(1); + EVT SetCCType = getSetCCResultType( + DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0)); + ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT; + SDValue SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC); + Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType); +} + +void TargetLowering::expandSADDSUBO( + SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const { + SDLoc dl(Node); + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + bool IsAdd = Node->getOpcode() == ISD::SADDO; + + Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl, + LHS.getValueType(), LHS, RHS); + + EVT ResultType = Node->getValueType(1); + EVT OType = getSetCCResultType( + DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0)); + + // If SADDSAT/SSUBSAT is legal, compare results to detect overflow. + unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT; + if (isOperationLegalOrCustom(OpcSat, LHS.getValueType())) { + SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS); + SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE); + Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType); + return; + } + + SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType()); + + // LHSSign -> LHS >= 0 + // RHSSign -> RHS >= 0 + // SumSign -> Result >= 0 + // + // Add: + // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) + // Sub: + // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) + SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE); + SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE); + SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign, + IsAdd ? ISD::SETEQ : ISD::SETNE); + + SDValue SumSign = DAG.getSetCC(dl, OType, Result, Zero, ISD::SETGE); + SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE); + + SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE); + Overflow = DAG.getBoolExtOrTrunc(Cmp, dl, ResultType, ResultType); +} + +bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result, + SDValue &Overflow, SelectionDAG &DAG) const { + SDLoc dl(Node); + EVT VT = Node->getValueType(0); + EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + bool isSigned = Node->getOpcode() == ISD::SMULO; + + // For power-of-two multiplications we can use a simpler shift expansion. + if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) { + const APInt &C = RHSC->getAPIntValue(); + // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X } + if (C.isPowerOf2()) { + // smulo(x, signed_min) is same as umulo(x, signed_min). + bool UseArithShift = isSigned && !C.isMinSignedValue(); + EVT ShiftAmtTy = getShiftAmountTy(VT, DAG.getDataLayout()); + SDValue ShiftAmt = DAG.getConstant(C.logBase2(), dl, ShiftAmtTy); + Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt); + Overflow = DAG.getSetCC(dl, SetCCVT, + DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL, + dl, VT, Result, ShiftAmt), + LHS, ISD::SETNE); + return true; + } + } + + EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2); + if (VT.isVector()) + WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT, + VT.getVectorNumElements()); + + SDValue BottomHalf; + SDValue TopHalf; + static const unsigned Ops[2][3] = + { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND }, + { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }}; + if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) { + BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS); + TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS); + } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) { + BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS, + RHS); + TopHalf = BottomHalf.getValue(1); + } else if (isTypeLegal(WideVT)) { + LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS); + RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS); + SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS); + BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul); + SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits(), dl, + getShiftAmountTy(WideVT, DAG.getDataLayout())); + TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, + DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt)); + } else { + if (VT.isVector()) + return false; + + // We can fall back to a libcall with an illegal type for the MUL if we + // have a libcall big enough. + // Also, we can fall back to a division in some cases, but that's a big + // performance hit in the general case. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (WideVT == MVT::i16) + LC = RTLIB::MUL_I16; + else if (WideVT == MVT::i32) + LC = RTLIB::MUL_I32; + else if (WideVT == MVT::i64) + LC = RTLIB::MUL_I64; + else if (WideVT == MVT::i128) + LC = RTLIB::MUL_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!"); + + SDValue HiLHS; + SDValue HiRHS; + if (isSigned) { + // The high part is obtained by SRA'ing all but one of the bits of low + // part. + unsigned LoSize = VT.getSizeInBits(); + HiLHS = + DAG.getNode(ISD::SRA, dl, VT, LHS, + DAG.getConstant(LoSize - 1, dl, + getPointerTy(DAG.getDataLayout()))); + HiRHS = + DAG.getNode(ISD::SRA, dl, VT, RHS, + DAG.getConstant(LoSize - 1, dl, + getPointerTy(DAG.getDataLayout()))); + } else { + HiLHS = DAG.getConstant(0, dl, VT); + HiRHS = DAG.getConstant(0, dl, VT); + } + + // Here we're passing the 2 arguments explicitly as 4 arguments that are + // pre-lowered to the correct types. This all depends upon WideVT not + // being a legal type for the architecture and thus has to be split to + // two arguments. + SDValue Ret; + if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) { + // Halves of WideVT are packed into registers in different order + // depending on platform endianness. This is usually handled by + // the C calling convention, but we can't defer to it in + // the legalizer. + SDValue Args[] = { LHS, HiLHS, RHS, HiRHS }; + Ret = makeLibCall(DAG, LC, WideVT, Args, isSigned, dl, + /* doesNotReturn */ false, /* isReturnValueUsed */ true, + /* isPostTypeLegalization */ true).first; + } else { + SDValue Args[] = { HiLHS, LHS, HiRHS, RHS }; + Ret = makeLibCall(DAG, LC, WideVT, Args, isSigned, dl, + /* doesNotReturn */ false, /* isReturnValueUsed */ true, + /* isPostTypeLegalization */ true).first; + } + assert(Ret.getOpcode() == ISD::MERGE_VALUES && + "Ret value is a collection of constituent nodes holding result."); + if (DAG.getDataLayout().isLittleEndian()) { + // Same as above. + BottomHalf = Ret.getOperand(0); + TopHalf = Ret.getOperand(1); + } else { + BottomHalf = Ret.getOperand(1); + TopHalf = Ret.getOperand(0); + } + } + + Result = BottomHalf; + if (isSigned) { + SDValue ShiftAmt = DAG.getConstant( + VT.getScalarSizeInBits() - 1, dl, + getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout())); + SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt); + Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE); + } else { + Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, + DAG.getConstant(0, dl, VT), ISD::SETNE); + } + + // Truncate the result if SetCC returns a larger type than needed. + EVT RType = Node->getValueType(1); + if (RType.getSizeInBits() < Overflow.getValueSizeInBits()) + Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow); + + assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() && + "Unexpected result type for S/UMULO legalization"); + return true; +} + +SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const { + SDLoc dl(Node); + bool NoNaN = Node->getFlags().hasNoNaNs(); + unsigned BaseOpcode = 0; + switch (Node->getOpcode()) { + default: llvm_unreachable("Expected VECREDUCE opcode"); + case ISD::VECREDUCE_FADD: BaseOpcode = ISD::FADD; break; + case ISD::VECREDUCE_FMUL: BaseOpcode = ISD::FMUL; break; + case ISD::VECREDUCE_ADD: BaseOpcode = ISD::ADD; break; + case ISD::VECREDUCE_MUL: BaseOpcode = ISD::MUL; break; + case ISD::VECREDUCE_AND: BaseOpcode = ISD::AND; break; + case ISD::VECREDUCE_OR: BaseOpcode = ISD::OR; break; + case ISD::VECREDUCE_XOR: BaseOpcode = ISD::XOR; break; + case ISD::VECREDUCE_SMAX: BaseOpcode = ISD::SMAX; break; + case ISD::VECREDUCE_SMIN: BaseOpcode = ISD::SMIN; break; + case ISD::VECREDUCE_UMAX: BaseOpcode = ISD::UMAX; break; + case ISD::VECREDUCE_UMIN: BaseOpcode = ISD::UMIN; break; + case ISD::VECREDUCE_FMAX: + BaseOpcode = NoNaN ? ISD::FMAXNUM : ISD::FMAXIMUM; + break; + case ISD::VECREDUCE_FMIN: + BaseOpcode = NoNaN ? ISD::FMINNUM : ISD::FMINIMUM; + break; + } + + SDValue Op = Node->getOperand(0); + EVT VT = Op.getValueType(); + + // Try to use a shuffle reduction for power of two vectors. + if (VT.isPow2VectorType()) { + while (VT.getVectorNumElements() > 1) { + EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext()); + if (!isOperationLegalOrCustom(BaseOpcode, HalfVT)) + break; + + SDValue Lo, Hi; + std::tie(Lo, Hi) = DAG.SplitVector(Op, dl); + Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi); + VT = HalfVT; + } + } + + EVT EltVT = VT.getVectorElementType(); + unsigned NumElts = VT.getVectorNumElements(); + + SmallVector<SDValue, 8> Ops; + DAG.ExtractVectorElements(Op, Ops, 0, NumElts); + + SDValue Res = Ops[0]; + for (unsigned i = 1; i < NumElts; i++) + Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags()); + + // Result type may be wider than element type. + if (EltVT != Node->getValueType(0)) + Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res); + return Res; } diff --git a/lib/CodeGen/ShadowStackGCLowering.cpp b/lib/CodeGen/ShadowStackGCLowering.cpp index 3e12b32b12d4..17a4d76c4c80 100644 --- a/lib/CodeGen/ShadowStackGCLowering.cpp +++ b/lib/CodeGen/ShadowStackGCLowering.cpp @@ -1,9 +1,8 @@ //===- ShadowStackGCLowering.cpp - Custom lowering for shadow-stack gc ----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -313,7 +312,8 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) { AtEntry.SetInsertPoint(IP->getParent(), IP); // Initialize the map pointer and load the current head of the shadow stack. - Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead"); + Instruction *CurrentHead = + AtEntry.CreateLoad(StackEntryTy->getPointerTo(), Head, "gc_currhead"); Instruction *EntryMapPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy, StackEntry, 0, 1, "gc_frame.map"); AtEntry.CreateStore(FrameMap, EntryMapPtr); @@ -354,7 +354,8 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) { Instruction *EntryNextPtr2 = CreateGEP(Context, *AtExit, ConcreteStackEntryTy, StackEntry, 0, 0, "gc_frame.next"); - Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead"); + Value *SavedHead = AtExit->CreateLoad(StackEntryTy->getPointerTo(), + EntryNextPtr2, "gc_savedhead"); AtExit->CreateStore(SavedHead, Head); } diff --git a/lib/CodeGen/ShrinkWrap.cpp b/lib/CodeGen/ShrinkWrap.cpp index d3454ca6ba6a..2db0ea570598 100644 --- a/lib/CodeGen/ShrinkWrap.cpp +++ b/lib/CodeGen/ShrinkWrap.cpp @@ -1,9 +1,8 @@ //===- ShrinkWrap.cpp - Compute safe point for prolog/epilog insertion ----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -259,6 +258,15 @@ INITIALIZE_PASS_END(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false) bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS) const { + // This prevents premature stack popping when occurs a indirect stack + // access. It is overly aggressive for the moment. + // TODO: - Obvious non-stack loads and store, such as global values, + // are known to not access the stack. + // - Further, data dependency and alias analysis can validate + // that load and stores never derive from the stack pointer. + if (MI.mayLoadOrStore()) + return true; + if (MI.getOpcode() == FrameSetupOpcode || MI.getOpcode() == FrameDestroyOpcode) { LLVM_DEBUG(dbgs() << "Frame instruction: " << MI << '\n'); diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 5d2669f5ae92..23e5ce0acae8 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -1,9 +1,8 @@ //===- SjLjEHPrepare.cpp - Eliminate Invoke & Unwind instructions ---------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -40,15 +39,15 @@ class SjLjEHPrepare : public FunctionPass { Type *doubleUnderDataTy; Type *doubleUnderJBufTy; Type *FunctionContextTy; - Constant *RegisterFn; - Constant *UnregisterFn; - Constant *BuiltinSetupDispatchFn; - Constant *FrameAddrFn; - Constant *StackAddrFn; - Constant *StackRestoreFn; - Constant *LSDAAddrFn; - Constant *CallSiteFn; - Constant *FuncCtxFn; + FunctionCallee RegisterFn; + FunctionCallee UnregisterFn; + Function *BuiltinSetupDispatchFn; + Function *FrameAddrFn; + Function *StackAddrFn; + Function *StackRestoreFn; + Function *LSDAAddrFn; + Function *CallSiteFn; + Function *FuncCtxFn; AllocaInst *FuncCtx; public: @@ -190,14 +189,16 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F, Builder.CreateConstGEP2_32(FunctionContextTy, FuncCtx, 0, 2, "__data"); // The exception values come back in context->__data[0]. + Type *Int32Ty = Type::getInt32Ty(F.getContext()); Value *ExceptionAddr = Builder.CreateConstGEP2_32(doubleUnderDataTy, FCData, 0, 0, "exception_gep"); - Value *ExnVal = Builder.CreateLoad(ExceptionAddr, true, "exn_val"); + Value *ExnVal = Builder.CreateLoad(Int32Ty, ExceptionAddr, true, "exn_val"); ExnVal = Builder.CreateIntToPtr(ExnVal, Builder.getInt8PtrTy()); Value *SelectorAddr = Builder.CreateConstGEP2_32(doubleUnderDataTy, FCData, 0, 1, "exn_selector_gep"); - Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val"); + Value *SelVal = + Builder.CreateLoad(Int32Ty, SelectorAddr, true, "exn_selector_val"); substituteLPadValues(LPI, ExnVal, SelVal); } diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index fccbb8ec91cb..9fff873324d0 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -1,9 +1,8 @@ //===-- SlotIndexes.cpp - Slot Indexes Pass ------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -23,7 +22,6 @@ INITIALIZE_PASS(SlotIndexes, DEBUG_TYPE, "Slot index numbering", false, false) STATISTIC(NumLocalRenum, "Number of local renumberings"); -STATISTIC(NumGlobalRenum, "Number of global renumberings"); void SlotIndexes::getAnalysisUsage(AnalysisUsage &au) const { au.setPreservesAll(); @@ -95,7 +93,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { } // Sort the Idx2MBBMap - llvm::sort(idx2MBBMap, Idx2MBBCompare()); + llvm::sort(idx2MBBMap, less_first()); LLVM_DEBUG(mf->print(dbgs(), this)); @@ -145,20 +143,6 @@ void SlotIndexes::removeSingleMachineInstrFromMaps(MachineInstr &MI) { } } -void SlotIndexes::renumberIndexes() { - // Renumber updates the index of every element of the index list. - LLVM_DEBUG(dbgs() << "\n*** Renumbering SlotIndexes ***\n"); - ++NumGlobalRenum; - - unsigned index = 0; - - for (IndexList::iterator I = indexList.begin(), E = indexList.end(); - I != E; ++I) { - I->setIndex(index); - index += SlotIndex::InstrDist; - } -} - // Renumber indexes locally after curItr was inserted, but failed to get a new // index. void SlotIndexes::renumberIndexes(IndexList::iterator curItr) { diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp index f6786b30b21c..11452fdb747a 100644 --- a/lib/CodeGen/SpillPlacement.cpp +++ b/lib/CodeGen/SpillPlacement.cpp @@ -1,9 +1,8 @@ //===- SpillPlacement.cpp - Optimal Spill Code Placement ------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/SpillPlacement.h b/lib/CodeGen/SpillPlacement.h index aa3ac444e0da..aa0e07ef92e3 100644 --- a/lib/CodeGen/SpillPlacement.h +++ b/lib/CodeGen/SpillPlacement.h @@ -1,9 +1,8 @@ //===- SpillPlacement.h - Optimal Spill Code Placement ---------*- C++ -*--===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h index 330ee81342b6..66dabf78f873 100644 --- a/lib/CodeGen/Spiller.h +++ b/lib/CodeGen/Spiller.h @@ -1,9 +1,8 @@ //===- llvm/CodeGen/Spiller.h - Spiller -------------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index d639f4475301..5c944fe3f6b3 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -1,9 +1,8 @@ //===- SplitKit.cpp - Toolkit for splitting live ranges -------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -521,17 +520,18 @@ SlotIndex SplitEditor::buildSingleSubRegCopy(unsigned FromReg, unsigned ToReg, .addReg(FromReg, 0, SubIdx); BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator(); + SlotIndexes &Indexes = *LIS.getSlotIndexes(); if (FirstCopy) { - SlotIndexes &Indexes = *LIS.getSlotIndexes(); Def = Indexes.insertMachineInstrInMaps(*CopyMI, Late).getRegSlot(); } else { CopyMI->bundleWithPred(); } LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubIdx); DestLI.refineSubRanges(Allocator, LaneMask, - [Def, &Allocator](LiveInterval::SubRange& SR) { - SR.createDeadDef(Def, Allocator); - }); + [Def, &Allocator](LiveInterval::SubRange &SR) { + SR.createDeadDef(Def, Allocator); + }, + Indexes, TRI); return Def; } diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h index bcc8f8cf18bc..86ad3811e3ad 100644 --- a/lib/CodeGen/SplitKit.h +++ b/lib/CodeGen/SplitKit.h @@ -1,9 +1,8 @@ //===- SplitKit.h - Toolkit for splitting live ranges -----------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp index eb8552915e2a..641b54205d62 100644 --- a/lib/CodeGen/StackColoring.cpp +++ b/lib/CodeGen/StackColoring.cpp @@ -1,9 +1,8 @@ //===- StackColoring.cpp --------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -1221,11 +1220,12 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { // Sort the slots according to their size. Place unused slots at the end. // Use stable sort to guarantee deterministic code generation. - std::stable_sort(SortedSlots.begin(), SortedSlots.end(), - [this](int LHS, int RHS) { + llvm::stable_sort(SortedSlots, [this](int LHS, int RHS) { // We use -1 to denote a uninteresting slot. Place these slots at the end. - if (LHS == -1) return false; - if (RHS == -1) return true; + if (LHS == -1) + return false; + if (RHS == -1) + return true; // Sort according to size. return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS); }); diff --git a/lib/CodeGen/StackMapLivenessAnalysis.cpp b/lib/CodeGen/StackMapLivenessAnalysis.cpp index 00cf8070be5e..fb2abf3daa7f 100644 --- a/lib/CodeGen/StackMapLivenessAnalysis.cpp +++ b/lib/CodeGen/StackMapLivenessAnalysis.cpp @@ -1,9 +1,8 @@ //===-- StackMapLivenessAnalysis.cpp - StackMap live Out Analysis ----------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp index 0676fa2421e8..ae9401b89700 100644 --- a/lib/CodeGen/StackMaps.cpp +++ b/lib/CodeGen/StackMaps.cpp @@ -1,9 +1,8 @@ //===- StackMaps.cpp ------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index 3b578c7391da..809960c7fdf9 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -1,9 +1,8 @@ //===- StackProtector.cpp - Stack Protector Insertion ---------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -18,6 +17,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/CodeGen/Passes.h" @@ -157,40 +157,6 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge, return NeedsProtector; } -bool StackProtector::HasAddressTaken(const Instruction *AI) { - for (const User *U : AI->users()) { - if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { - if (AI == SI->getValueOperand()) - return true; - } else if (const PtrToIntInst *SI = dyn_cast<PtrToIntInst>(U)) { - if (AI == SI->getOperand(0)) - return true; - } else if (const CallInst *CI = dyn_cast<CallInst>(U)) { - // Ignore intrinsics that are not calls. TODO: Use isLoweredToCall(). - if (!isa<DbgInfoIntrinsic>(CI) && !CI->isLifetimeStartOrEnd()) - return true; - } else if (isa<InvokeInst>(U)) { - return true; - } else if (const SelectInst *SI = dyn_cast<SelectInst>(U)) { - if (HasAddressTaken(SI)) - return true; - } else if (const PHINode *PN = dyn_cast<PHINode>(U)) { - // Keep track of what PHI nodes we have already visited to ensure - // they are only visited once. - if (VisitedPHIs.insert(PN).second) - if (HasAddressTaken(PN)) - return true; - } else if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { - if (HasAddressTaken(GEP)) - return true; - } else if (const BitCastInst *BI = dyn_cast<BitCastInst>(U)) { - if (HasAddressTaken(BI)) - return true; - } - } - return false; -} - /// Search for the first call to the llvm.stackprotector intrinsic and return it /// if present. static const CallInst *findStackProtectorIntrinsic(Function &F) { @@ -298,7 +264,9 @@ bool StackProtector::RequiresStackProtector() { continue; } - if (Strong && HasAddressTaken(AI)) { + if (Strong && PointerMayBeCaptured(AI, + /* ReturnCaptures */ false, + /* StoreCaptures */ true)) { ++NumAddrTaken; Layout.insert(std::make_pair(AI, MachineFrameInfo::SSPLK_AddrOf)); ORE.emit([&]() { @@ -323,7 +291,7 @@ static Value *getStackGuard(const TargetLoweringBase *TLI, Module *M, IRBuilder<> &B, bool *SupportsSelectionDAGSP = nullptr) { if (Value *Guard = TLI->getIRStackGuard(B)) - return B.CreateLoad(Guard, true, "StackGuard"); + return B.CreateLoad(B.getInt8PtrTy(), Guard, true, "StackGuard"); // Use SelectionDAG SSP handling, since there isn't an IR guard. // @@ -414,15 +382,14 @@ bool StackProtector::InsertStackProtectors() { // Generate epilogue instrumentation. The epilogue intrumentation can be // function-based or inlined depending on which mechanism the target is // providing. - if (Value* GuardCheck = TLI->getSSPStackGuardCheck(*M)) { + if (Function *GuardCheck = TLI->getSSPStackGuardCheck(*M)) { // Generate the function-based epilogue instrumentation. // The target provides a guard check function, generate a call to it. IRBuilder<> B(RI); - LoadInst *Guard = B.CreateLoad(AI, true, "Guard"); + LoadInst *Guard = B.CreateLoad(B.getInt8PtrTy(), AI, true, "Guard"); CallInst *Call = B.CreateCall(GuardCheck, {Guard}); - llvm::Function *Function = cast<llvm::Function>(GuardCheck); - Call->setAttributes(Function->getAttributes()); - Call->setCallingConv(Function->getCallingConv()); + Call->setAttributes(GuardCheck->getAttributes()); + Call->setCallingConv(GuardCheck->getCallingConv()); } else { // Generate the epilogue with inline instrumentation. // If we do not support SelectionDAG based tail calls, generate IR level @@ -474,7 +441,7 @@ bool StackProtector::InsertStackProtectors() { // Generate the stack protector instructions in the old basic block. IRBuilder<> B(BB); Value *Guard = getStackGuard(TLI, M, B); - LoadInst *LI2 = B.CreateLoad(AI, true); + LoadInst *LI2 = B.CreateLoad(B.getInt8PtrTy(), AI, true); Value *Cmp = B.CreateICmpEQ(Guard, LI2); auto SuccessProb = BranchProbabilityInfo::getBranchProbStackProtector(true); @@ -500,14 +467,13 @@ BasicBlock *StackProtector::CreateFailBB() { IRBuilder<> B(FailBB); B.SetCurrentDebugLocation(DebugLoc::get(0, 0, F->getSubprogram())); if (Trip.isOSOpenBSD()) { - Constant *StackChkFail = - M->getOrInsertFunction("__stack_smash_handler", - Type::getVoidTy(Context), - Type::getInt8PtrTy(Context)); + FunctionCallee StackChkFail = M->getOrInsertFunction( + "__stack_smash_handler", Type::getVoidTy(Context), + Type::getInt8PtrTy(Context)); B.CreateCall(StackChkFail, B.CreateGlobalStringPtr(F->getName(), "SSH")); } else { - Constant *StackChkFail = + FunctionCallee StackChkFail = M->getOrInsertFunction("__stack_chk_fail", Type::getVoidTy(Context)); B.CreateCall(StackChkFail, {}); @@ -517,7 +483,7 @@ BasicBlock *StackProtector::CreateFailBB() { } bool StackProtector::shouldEmitSDCheck(const BasicBlock &BB) const { - return HasPrologue && !HasIRCheck && dyn_cast<ReturnInst>(BB.getTerminator()); + return HasPrologue && !HasIRCheck && isa<ReturnInst>(BB.getTerminator()); } void StackProtector::copyToMachineFrameInfo(MachineFrameInfo &MFI) const { diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index d8c6a249e4da..99b533e10b87 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -1,9 +1,8 @@ //===- StackSlotColoring.cpp - Stack slot coloring pass. ------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -243,7 +242,7 @@ void StackSlotColoring::InitializeSlots() { LLVM_DEBUG(dbgs() << '\n'); // Sort them by weight. - std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter()); + llvm::stable_sort(SSIntervals, IntervalSorter()); NextColors.resize(AllColors.size()); @@ -348,7 +347,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { li->weight = SlotWeights[SS]; } // Sort them by new weight. - std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter()); + llvm::stable_sort(SSIntervals, IntervalSorter()); #ifndef NDEBUG for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) diff --git a/lib/CodeGen/SwiftErrorValueTracking.cpp b/lib/CodeGen/SwiftErrorValueTracking.cpp new file mode 100644 index 000000000000..96821cadb1b6 --- /dev/null +++ b/lib/CodeGen/SwiftErrorValueTracking.cpp @@ -0,0 +1,312 @@ +//===-- SwiftErrorValueTracking.cpp --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This implements a limited mem2reg-like analysis to promote uses of function +// arguments and allocas marked with swiftalloc from memory into virtual +// registers tracked by this class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/SwiftErrorValueTracking.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/IR/Value.h" + +using namespace llvm; + +Register SwiftErrorValueTracking::getOrCreateVReg(const MachineBasicBlock *MBB, + const Value *Val) { + auto Key = std::make_pair(MBB, Val); + auto It = VRegDefMap.find(Key); + // If this is the first use of this swifterror value in this basic block, + // create a new virtual register. + // After we processed all basic blocks we will satisfy this "upwards exposed + // use" by inserting a copy or phi at the beginning of this block. + if (It == VRegDefMap.end()) { + auto &DL = MF->getDataLayout(); + const TargetRegisterClass *RC = TLI->getRegClassFor(TLI->getPointerTy(DL)); + auto VReg = MF->getRegInfo().createVirtualRegister(RC); + VRegDefMap[Key] = VReg; + VRegUpwardsUse[Key] = VReg; + return VReg; + } else + return It->second; +} + +void SwiftErrorValueTracking::setCurrentVReg(const MachineBasicBlock *MBB, + const Value *Val, Register VReg) { + VRegDefMap[std::make_pair(MBB, Val)] = VReg; +} + +Register SwiftErrorValueTracking::getOrCreateVRegDefAt( + const Instruction *I, const MachineBasicBlock *MBB, const Value *Val) { + auto Key = PointerIntPair<const Instruction *, 1, bool>(I, true); + auto It = VRegDefUses.find(Key); + if (It != VRegDefUses.end()) + return It->second; + + auto &DL = MF->getDataLayout(); + const TargetRegisterClass *RC = TLI->getRegClassFor(TLI->getPointerTy(DL)); + Register VReg = MF->getRegInfo().createVirtualRegister(RC); + VRegDefUses[Key] = VReg; + setCurrentVReg(MBB, Val, VReg); + return VReg; +} + +Register SwiftErrorValueTracking::getOrCreateVRegUseAt( + const Instruction *I, const MachineBasicBlock *MBB, const Value *Val) { + auto Key = PointerIntPair<const Instruction *, 1, bool>(I, false); + auto It = VRegDefUses.find(Key); + if (It != VRegDefUses.end()) + return It->second; + + Register VReg = getOrCreateVReg(MBB, Val); + VRegDefUses[Key] = VReg; + return VReg; +} + +/// Set up SwiftErrorVals by going through the function. If the function has +/// swifterror argument, it will be the first entry. +void SwiftErrorValueTracking::setFunction(MachineFunction &mf) { + MF = &mf; + Fn = &MF->getFunction(); + TLI = MF->getSubtarget().getTargetLowering(); + TII = MF->getSubtarget().getInstrInfo(); + + if (!TLI->supportSwiftError()) + return; + + SwiftErrorVals.clear(); + VRegDefMap.clear(); + VRegUpwardsUse.clear(); + VRegDefUses.clear(); + SwiftErrorArg = nullptr; + + // Check if function has a swifterror argument. + bool HaveSeenSwiftErrorArg = false; + for (Function::const_arg_iterator AI = Fn->arg_begin(), AE = Fn->arg_end(); + AI != AE; ++AI) + if (AI->hasSwiftErrorAttr()) { + assert(!HaveSeenSwiftErrorArg && + "Must have only one swifterror parameter"); + (void)HaveSeenSwiftErrorArg; // silence warning. + HaveSeenSwiftErrorArg = true; + SwiftErrorArg = &*AI; + SwiftErrorVals.push_back(&*AI); + } + + for (const auto &LLVMBB : *Fn) + for (const auto &Inst : LLVMBB) { + if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(&Inst)) + if (Alloca->isSwiftError()) + SwiftErrorVals.push_back(Alloca); + } +} + +bool SwiftErrorValueTracking::createEntriesInEntryBlock(DebugLoc DbgLoc) { + if (!TLI->supportSwiftError()) + return false; + + // We only need to do this when we have swifterror parameter or swifterror + // alloc. + if (SwiftErrorVals.empty()) + return false; + + MachineBasicBlock *MBB = &*MF->begin(); + auto &DL = MF->getDataLayout(); + auto const *RC = TLI->getRegClassFor(TLI->getPointerTy(DL)); + bool Inserted = false; + for (const auto *SwiftErrorVal : SwiftErrorVals) { + // We will always generate a copy from the argument. It is always used at + // least by the 'return' of the swifterror. + if (SwiftErrorArg && SwiftErrorArg == SwiftErrorVal) + continue; + Register VReg = MF->getRegInfo().createVirtualRegister(RC); + // Assign Undef to Vreg. We construct MI directly to make sure it works + // with FastISel. + BuildMI(*MBB, MBB->getFirstNonPHI(), DbgLoc, + TII->get(TargetOpcode::IMPLICIT_DEF), VReg); + + setCurrentVReg(MBB, SwiftErrorVal, VReg); + Inserted = true; + } + + return Inserted; +} + +/// Propagate swifterror values through the machine function CFG. +void SwiftErrorValueTracking::propagateVRegs() { + if (!TLI->supportSwiftError()) + return; + + // We only need to do this when we have swifterror parameter or swifterror + // alloc. + if (SwiftErrorVals.empty()) + return; + + // For each machine basic block in reverse post order. + ReversePostOrderTraversal<MachineFunction *> RPOT(MF); + for (MachineBasicBlock *MBB : RPOT) { + // For each swifterror value in the function. + for (const auto *SwiftErrorVal : SwiftErrorVals) { + auto Key = std::make_pair(MBB, SwiftErrorVal); + auto UUseIt = VRegUpwardsUse.find(Key); + auto VRegDefIt = VRegDefMap.find(Key); + bool UpwardsUse = UUseIt != VRegUpwardsUse.end(); + Register UUseVReg = UpwardsUse ? UUseIt->second : Register(); + bool DownwardDef = VRegDefIt != VRegDefMap.end(); + assert(!(UpwardsUse && !DownwardDef) && + "We can't have an upwards use but no downwards def"); + + // If there is no upwards exposed use and an entry for the swifterror in + // the def map for this value we don't need to do anything: We already + // have a downward def for this basic block. + if (!UpwardsUse && DownwardDef) + continue; + + // Otherwise we either have an upwards exposed use vreg that we need to + // materialize or need to forward the downward def from predecessors. + + // Check whether we have a single vreg def from all predecessors. + // Otherwise we need a phi. + SmallVector<std::pair<MachineBasicBlock *, Register>, 4> VRegs; + SmallSet<const MachineBasicBlock *, 8> Visited; + for (auto *Pred : MBB->predecessors()) { + if (!Visited.insert(Pred).second) + continue; + VRegs.push_back(std::make_pair( + Pred, getOrCreateVReg(Pred, SwiftErrorVal))); + if (Pred != MBB) + continue; + // We have a self-edge. + // If there was no upwards use in this basic block there is now one: the + // phi needs to use it self. + if (!UpwardsUse) { + UpwardsUse = true; + UUseIt = VRegUpwardsUse.find(Key); + assert(UUseIt != VRegUpwardsUse.end()); + UUseVReg = UUseIt->second; + } + } + + // We need a phi node if we have more than one predecessor with different + // downward defs. + bool needPHI = + VRegs.size() >= 1 && + std::find_if( + VRegs.begin(), VRegs.end(), + [&](const std::pair<const MachineBasicBlock *, Register> &V) + -> bool { return V.second != VRegs[0].second; }) != + VRegs.end(); + + // If there is no upwards exposed used and we don't need a phi just + // forward the swifterror vreg from the predecessor(s). + if (!UpwardsUse && !needPHI) { + assert(!VRegs.empty() && + "No predecessors? The entry block should bail out earlier"); + // Just forward the swifterror vreg from the predecessor(s). + setCurrentVReg(MBB, SwiftErrorVal, VRegs[0].second); + continue; + } + + auto DLoc = isa<Instruction>(SwiftErrorVal) + ? cast<Instruction>(SwiftErrorVal)->getDebugLoc() + : DebugLoc(); + const auto *TII = MF->getSubtarget().getInstrInfo(); + + // If we don't need a phi create a copy to the upward exposed vreg. + if (!needPHI) { + assert(UpwardsUse); + assert(!VRegs.empty() && + "No predecessors? Is the Calling Convention correct?"); + Register DestReg = UUseVReg; + BuildMI(*MBB, MBB->getFirstNonPHI(), DLoc, TII->get(TargetOpcode::COPY), + DestReg) + .addReg(VRegs[0].second); + continue; + } + + // We need a phi: if there is an upwards exposed use we already have a + // destination virtual register number otherwise we generate a new one. + auto &DL = MF->getDataLayout(); + auto const *RC = TLI->getRegClassFor(TLI->getPointerTy(DL)); + Register PHIVReg = + UpwardsUse ? UUseVReg : MF->getRegInfo().createVirtualRegister(RC); + MachineInstrBuilder PHI = + BuildMI(*MBB, MBB->getFirstNonPHI(), DLoc, + TII->get(TargetOpcode::PHI), PHIVReg); + for (auto BBRegPair : VRegs) { + PHI.addReg(BBRegPair.second).addMBB(BBRegPair.first); + } + + // We did not have a definition in this block before: store the phi's vreg + // as this block downward exposed def. + if (!UpwardsUse) + setCurrentVReg(MBB, SwiftErrorVal, PHIVReg); + } + } +} + +void SwiftErrorValueTracking::preassignVRegs( + MachineBasicBlock *MBB, BasicBlock::const_iterator Begin, + BasicBlock::const_iterator End) { + if (!TLI->supportSwiftError() || SwiftErrorVals.empty()) + return; + + // Iterator over instructions and assign vregs to swifterror defs and uses. + for (auto It = Begin; It != End; ++It) { + ImmutableCallSite CS(&*It); + if (CS) { + // A call-site with a swifterror argument is both use and def. + const Value *SwiftErrorAddr = nullptr; + for (auto &Arg : CS.args()) { + if (!Arg->isSwiftError()) + continue; + // Use of swifterror. + assert(!SwiftErrorAddr && "Cannot have multiple swifterror arguments"); + SwiftErrorAddr = &*Arg; + assert(SwiftErrorAddr->isSwiftError() && + "Must have a swifterror value argument"); + getOrCreateVRegUseAt(&*It, MBB, SwiftErrorAddr); + } + if (!SwiftErrorAddr) + continue; + + // Def of swifterror. + getOrCreateVRegDefAt(&*It, MBB, SwiftErrorAddr); + + // A load is a use. + } else if (const LoadInst *LI = dyn_cast<const LoadInst>(&*It)) { + const Value *V = LI->getOperand(0); + if (!V->isSwiftError()) + continue; + + getOrCreateVRegUseAt(LI, MBB, V); + + // A store is a def. + } else if (const StoreInst *SI = dyn_cast<const StoreInst>(&*It)) { + const Value *SwiftErrorAddr = SI->getOperand(1); + if (!SwiftErrorAddr->isSwiftError()) + continue; + + // Def of swifterror. + getOrCreateVRegDefAt(&*It, MBB, SwiftErrorAddr); + + // A return in a swiferror returning function is a use. + } else if (const ReturnInst *R = dyn_cast<const ReturnInst>(&*It)) { + const Function *F = R->getParent()->getParent(); + if (!F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) + continue; + + getOrCreateVRegUseAt(R, MBB, SwiftErrorArg); + } + } +} diff --git a/lib/CodeGen/SwitchLoweringUtils.cpp b/lib/CodeGen/SwitchLoweringUtils.cpp new file mode 100644 index 000000000000..83acf7f80715 --- /dev/null +++ b/lib/CodeGen/SwitchLoweringUtils.cpp @@ -0,0 +1,489 @@ +//===- SwitchLoweringUtils.cpp - Switch Lowering --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains switch inst lowering optimizations and utilities for +// codegen, so that it can be used for both SelectionDAG and GlobalISel. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/SwitchLoweringUtils.h" + +using namespace llvm; +using namespace SwitchCG; + +uint64_t SwitchCG::getJumpTableRange(const CaseClusterVector &Clusters, + unsigned First, unsigned Last) { + assert(Last >= First); + const APInt &LowCase = Clusters[First].Low->getValue(); + const APInt &HighCase = Clusters[Last].High->getValue(); + assert(LowCase.getBitWidth() == HighCase.getBitWidth()); + + // FIXME: A range of consecutive cases has 100% density, but only requires one + // comparison to lower. We should discriminate against such consecutive ranges + // in jump tables. + return (HighCase - LowCase).getLimitedValue((UINT64_MAX - 1) / 100) + 1; +} + +uint64_t +SwitchCG::getJumpTableNumCases(const SmallVectorImpl<unsigned> &TotalCases, + unsigned First, unsigned Last) { + assert(Last >= First); + assert(TotalCases[Last] >= TotalCases[First]); + uint64_t NumCases = + TotalCases[Last] - (First == 0 ? 0 : TotalCases[First - 1]); + return NumCases; +} + +void SwitchCG::SwitchLowering::findJumpTables(CaseClusterVector &Clusters, + const SwitchInst *SI, + MachineBasicBlock *DefaultMBB) { +#ifndef NDEBUG + // Clusters must be non-empty, sorted, and only contain Range clusters. + assert(!Clusters.empty()); + for (CaseCluster &C : Clusters) + assert(C.Kind == CC_Range); + for (unsigned i = 1, e = Clusters.size(); i < e; ++i) + assert(Clusters[i - 1].High->getValue().slt(Clusters[i].Low->getValue())); +#endif + + assert(TLI && "TLI not set!"); + if (!TLI->areJTsAllowed(SI->getParent()->getParent())) + return; + + const unsigned MinJumpTableEntries = TLI->getMinimumJumpTableEntries(); + const unsigned SmallNumberOfEntries = MinJumpTableEntries / 2; + + // Bail if not enough cases. + const int64_t N = Clusters.size(); + if (N < 2 || N < MinJumpTableEntries) + return; + + // Accumulated number of cases in each cluster and those prior to it. + SmallVector<unsigned, 8> TotalCases(N); + for (unsigned i = 0; i < N; ++i) { + const APInt &Hi = Clusters[i].High->getValue(); + const APInt &Lo = Clusters[i].Low->getValue(); + TotalCases[i] = (Hi - Lo).getLimitedValue() + 1; + if (i != 0) + TotalCases[i] += TotalCases[i - 1]; + } + + uint64_t Range = getJumpTableRange(Clusters,0, N - 1); + uint64_t NumCases = getJumpTableNumCases(TotalCases, 0, N - 1); + assert(NumCases < UINT64_MAX / 100); + assert(Range >= NumCases); + + // Cheap case: the whole range may be suitable for jump table. + if (TLI->isSuitableForJumpTable(SI, NumCases, Range)) { + CaseCluster JTCluster; + if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) { + Clusters[0] = JTCluster; + Clusters.resize(1); + return; + } + } + + // The algorithm below is not suitable for -O0. + if (TM->getOptLevel() == CodeGenOpt::None) + return; + + // Split Clusters into minimum number of dense partitions. The algorithm uses + // the same idea as Kannan & Proebsting "Correction to 'Producing Good Code + // for the Case Statement'" (1994), but builds the MinPartitions array in + // reverse order to make it easier to reconstruct the partitions in ascending + // order. In the choice between two optimal partitionings, it picks the one + // which yields more jump tables. + + // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1]. + SmallVector<unsigned, 8> MinPartitions(N); + // LastElement[i] is the last element of the partition starting at i. + SmallVector<unsigned, 8> LastElement(N); + // PartitionsScore[i] is used to break ties when choosing between two + // partitionings resulting in the same number of partitions. + SmallVector<unsigned, 8> PartitionsScore(N); + // For PartitionsScore, a small number of comparisons is considered as good as + // a jump table and a single comparison is considered better than a jump + // table. + enum PartitionScores : unsigned { + NoTable = 0, + Table = 1, + FewCases = 1, + SingleCase = 2 + }; + + // Base case: There is only one way to partition Clusters[N-1]. + MinPartitions[N - 1] = 1; + LastElement[N - 1] = N - 1; + PartitionsScore[N - 1] = PartitionScores::SingleCase; + + // Note: loop indexes are signed to avoid underflow. + for (int64_t i = N - 2; i >= 0; i--) { + // Find optimal partitioning of Clusters[i..N-1]. + // Baseline: Put Clusters[i] into a partition on its own. + MinPartitions[i] = MinPartitions[i + 1] + 1; + LastElement[i] = i; + PartitionsScore[i] = PartitionsScore[i + 1] + PartitionScores::SingleCase; + + // Search for a solution that results in fewer partitions. + for (int64_t j = N - 1; j > i; j--) { + // Try building a partition from Clusters[i..j]. + Range = getJumpTableRange(Clusters, i, j); + NumCases = getJumpTableNumCases(TotalCases, i, j); + assert(NumCases < UINT64_MAX / 100); + assert(Range >= NumCases); + + if (TLI->isSuitableForJumpTable(SI, NumCases, Range)) { + unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]); + unsigned Score = j == N - 1 ? 0 : PartitionsScore[j + 1]; + int64_t NumEntries = j - i + 1; + + if (NumEntries == 1) + Score += PartitionScores::SingleCase; + else if (NumEntries <= SmallNumberOfEntries) + Score += PartitionScores::FewCases; + else if (NumEntries >= MinJumpTableEntries) + Score += PartitionScores::Table; + + // If this leads to fewer partitions, or to the same number of + // partitions with better score, it is a better partitioning. + if (NumPartitions < MinPartitions[i] || + (NumPartitions == MinPartitions[i] && Score > PartitionsScore[i])) { + MinPartitions[i] = NumPartitions; + LastElement[i] = j; + PartitionsScore[i] = Score; + } + } + } + } + + // Iterate over the partitions, replacing some with jump tables in-place. + unsigned DstIndex = 0; + for (unsigned First = 0, Last; First < N; First = Last + 1) { + Last = LastElement[First]; + assert(Last >= First); + assert(DstIndex <= First); + unsigned NumClusters = Last - First + 1; + + CaseCluster JTCluster; + if (NumClusters >= MinJumpTableEntries && + buildJumpTable(Clusters, First, Last, SI, DefaultMBB, JTCluster)) { + Clusters[DstIndex++] = JTCluster; + } else { + for (unsigned I = First; I <= Last; ++I) + std::memmove(&Clusters[DstIndex++], &Clusters[I], sizeof(Clusters[I])); + } + } + Clusters.resize(DstIndex); +} + +bool SwitchCG::SwitchLowering::buildJumpTable(const CaseClusterVector &Clusters, + unsigned First, unsigned Last, + const SwitchInst *SI, + MachineBasicBlock *DefaultMBB, + CaseCluster &JTCluster) { + assert(First <= Last); + + auto Prob = BranchProbability::getZero(); + unsigned NumCmps = 0; + std::vector<MachineBasicBlock*> Table; + DenseMap<MachineBasicBlock*, BranchProbability> JTProbs; + + // Initialize probabilities in JTProbs. + for (unsigned I = First; I <= Last; ++I) + JTProbs[Clusters[I].MBB] = BranchProbability::getZero(); + + for (unsigned I = First; I <= Last; ++I) { + assert(Clusters[I].Kind == CC_Range); + Prob += Clusters[I].Prob; + const APInt &Low = Clusters[I].Low->getValue(); + const APInt &High = Clusters[I].High->getValue(); + NumCmps += (Low == High) ? 1 : 2; + if (I != First) { + // Fill the gap between this and the previous cluster. + const APInt &PreviousHigh = Clusters[I - 1].High->getValue(); + assert(PreviousHigh.slt(Low)); + uint64_t Gap = (Low - PreviousHigh).getLimitedValue() - 1; + for (uint64_t J = 0; J < Gap; J++) + Table.push_back(DefaultMBB); + } + uint64_t ClusterSize = (High - Low).getLimitedValue() + 1; + for (uint64_t J = 0; J < ClusterSize; ++J) + Table.push_back(Clusters[I].MBB); + JTProbs[Clusters[I].MBB] += Clusters[I].Prob; + } + + unsigned NumDests = JTProbs.size(); + if (TLI->isSuitableForBitTests(NumDests, NumCmps, + Clusters[First].Low->getValue(), + Clusters[Last].High->getValue(), *DL)) { + // Clusters[First..Last] should be lowered as bit tests instead. + return false; + } + + // Create the MBB that will load from and jump through the table. + // Note: We create it here, but it's not inserted into the function yet. + MachineFunction *CurMF = FuncInfo.MF; + MachineBasicBlock *JumpTableMBB = + CurMF->CreateMachineBasicBlock(SI->getParent()); + + // Add successors. Note: use table order for determinism. + SmallPtrSet<MachineBasicBlock *, 8> Done; + for (MachineBasicBlock *Succ : Table) { + if (Done.count(Succ)) + continue; + addSuccessorWithProb(JumpTableMBB, Succ, JTProbs[Succ]); + Done.insert(Succ); + } + JumpTableMBB->normalizeSuccProbs(); + + unsigned JTI = CurMF->getOrCreateJumpTableInfo(TLI->getJumpTableEncoding()) + ->createJumpTableIndex(Table); + + // Set up the jump table info. + JumpTable JT(-1U, JTI, JumpTableMBB, nullptr); + JumpTableHeader JTH(Clusters[First].Low->getValue(), + Clusters[Last].High->getValue(), SI->getCondition(), + nullptr, false); + JTCases.emplace_back(std::move(JTH), std::move(JT)); + + JTCluster = CaseCluster::jumpTable(Clusters[First].Low, Clusters[Last].High, + JTCases.size() - 1, Prob); + return true; +} + +void SwitchCG::SwitchLowering::findBitTestClusters(CaseClusterVector &Clusters, + const SwitchInst *SI) { + // Partition Clusters into as few subsets as possible, where each subset has a + // range that fits in a machine word and has <= 3 unique destinations. + +#ifndef NDEBUG + // Clusters must be sorted and contain Range or JumpTable clusters. + assert(!Clusters.empty()); + assert(Clusters[0].Kind == CC_Range || Clusters[0].Kind == CC_JumpTable); + for (const CaseCluster &C : Clusters) + assert(C.Kind == CC_Range || C.Kind == CC_JumpTable); + for (unsigned i = 1; i < Clusters.size(); ++i) + assert(Clusters[i-1].High->getValue().slt(Clusters[i].Low->getValue())); +#endif + + // The algorithm below is not suitable for -O0. + if (TM->getOptLevel() == CodeGenOpt::None) + return; + + // If target does not have legal shift left, do not emit bit tests at all. + EVT PTy = TLI->getPointerTy(*DL); + if (!TLI->isOperationLegal(ISD::SHL, PTy)) + return; + + int BitWidth = PTy.getSizeInBits(); + const int64_t N = Clusters.size(); + + // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1]. + SmallVector<unsigned, 8> MinPartitions(N); + // LastElement[i] is the last element of the partition starting at i. + SmallVector<unsigned, 8> LastElement(N); + + // FIXME: This might not be the best algorithm for finding bit test clusters. + + // Base case: There is only one way to partition Clusters[N-1]. + MinPartitions[N - 1] = 1; + LastElement[N - 1] = N - 1; + + // Note: loop indexes are signed to avoid underflow. + for (int64_t i = N - 2; i >= 0; --i) { + // Find optimal partitioning of Clusters[i..N-1]. + // Baseline: Put Clusters[i] into a partition on its own. + MinPartitions[i] = MinPartitions[i + 1] + 1; + LastElement[i] = i; + + // Search for a solution that results in fewer partitions. + // Note: the search is limited by BitWidth, reducing time complexity. + for (int64_t j = std::min(N - 1, i + BitWidth - 1); j > i; --j) { + // Try building a partition from Clusters[i..j]. + + // Check the range. + if (!TLI->rangeFitsInWord(Clusters[i].Low->getValue(), + Clusters[j].High->getValue(), *DL)) + continue; + + // Check nbr of destinations and cluster types. + // FIXME: This works, but doesn't seem very efficient. + bool RangesOnly = true; + BitVector Dests(FuncInfo.MF->getNumBlockIDs()); + for (int64_t k = i; k <= j; k++) { + if (Clusters[k].Kind != CC_Range) { + RangesOnly = false; + break; + } + Dests.set(Clusters[k].MBB->getNumber()); + } + if (!RangesOnly || Dests.count() > 3) + break; + + // Check if it's a better partition. + unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]); + if (NumPartitions < MinPartitions[i]) { + // Found a better partition. + MinPartitions[i] = NumPartitions; + LastElement[i] = j; + } + } + } + + // Iterate over the partitions, replacing with bit-test clusters in-place. + unsigned DstIndex = 0; + for (unsigned First = 0, Last; First < N; First = Last + 1) { + Last = LastElement[First]; + assert(First <= Last); + assert(DstIndex <= First); + + CaseCluster BitTestCluster; + if (buildBitTests(Clusters, First, Last, SI, BitTestCluster)) { + Clusters[DstIndex++] = BitTestCluster; + } else { + size_t NumClusters = Last - First + 1; + std::memmove(&Clusters[DstIndex], &Clusters[First], + sizeof(Clusters[0]) * NumClusters); + DstIndex += NumClusters; + } + } + Clusters.resize(DstIndex); +} + +bool SwitchCG::SwitchLowering::buildBitTests(CaseClusterVector &Clusters, + unsigned First, unsigned Last, + const SwitchInst *SI, + CaseCluster &BTCluster) { + assert(First <= Last); + if (First == Last) + return false; + + BitVector Dests(FuncInfo.MF->getNumBlockIDs()); + unsigned NumCmps = 0; + for (int64_t I = First; I <= Last; ++I) { + assert(Clusters[I].Kind == CC_Range); + Dests.set(Clusters[I].MBB->getNumber()); + NumCmps += (Clusters[I].Low == Clusters[I].High) ? 1 : 2; + } + unsigned NumDests = Dests.count(); + + APInt Low = Clusters[First].Low->getValue(); + APInt High = Clusters[Last].High->getValue(); + assert(Low.slt(High)); + + if (!TLI->isSuitableForBitTests(NumDests, NumCmps, Low, High, *DL)) + return false; + + APInt LowBound; + APInt CmpRange; + + const int BitWidth = TLI->getPointerTy(*DL).getSizeInBits(); + assert(TLI->rangeFitsInWord(Low, High, *DL) && + "Case range must fit in bit mask!"); + + // Check if the clusters cover a contiguous range such that no value in the + // range will jump to the default statement. + bool ContiguousRange = true; + for (int64_t I = First + 1; I <= Last; ++I) { + if (Clusters[I].Low->getValue() != Clusters[I - 1].High->getValue() + 1) { + ContiguousRange = false; + break; + } + } + + if (Low.isStrictlyPositive() && High.slt(BitWidth)) { + // Optimize the case where all the case values fit in a word without having + // to subtract minValue. In this case, we can optimize away the subtraction. + LowBound = APInt::getNullValue(Low.getBitWidth()); + CmpRange = High; + ContiguousRange = false; + } else { + LowBound = Low; + CmpRange = High - Low; + } + + CaseBitsVector CBV; + auto TotalProb = BranchProbability::getZero(); + for (unsigned i = First; i <= Last; ++i) { + // Find the CaseBits for this destination. + unsigned j; + for (j = 0; j < CBV.size(); ++j) + if (CBV[j].BB == Clusters[i].MBB) + break; + if (j == CBV.size()) + CBV.push_back( + CaseBits(0, Clusters[i].MBB, 0, BranchProbability::getZero())); + CaseBits *CB = &CBV[j]; + + // Update Mask, Bits and ExtraProb. + uint64_t Lo = (Clusters[i].Low->getValue() - LowBound).getZExtValue(); + uint64_t Hi = (Clusters[i].High->getValue() - LowBound).getZExtValue(); + assert(Hi >= Lo && Hi < 64 && "Invalid bit case!"); + CB->Mask |= (-1ULL >> (63 - (Hi - Lo))) << Lo; + CB->Bits += Hi - Lo + 1; + CB->ExtraProb += Clusters[i].Prob; + TotalProb += Clusters[i].Prob; + } + + BitTestInfo BTI; + llvm::sort(CBV, [](const CaseBits &a, const CaseBits &b) { + // Sort by probability first, number of bits second, bit mask third. + if (a.ExtraProb != b.ExtraProb) + return a.ExtraProb > b.ExtraProb; + if (a.Bits != b.Bits) + return a.Bits > b.Bits; + return a.Mask < b.Mask; + }); + + for (auto &CB : CBV) { + MachineBasicBlock *BitTestBB = + FuncInfo.MF->CreateMachineBasicBlock(SI->getParent()); + BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraProb)); + } + BitTestCases.emplace_back(std::move(LowBound), std::move(CmpRange), + SI->getCondition(), -1U, MVT::Other, false, + ContiguousRange, nullptr, nullptr, std::move(BTI), + TotalProb); + + BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High, + BitTestCases.size() - 1, TotalProb); + return true; +} + +void SwitchCG::sortAndRangeify(CaseClusterVector &Clusters) { +#ifndef NDEBUG + for (const CaseCluster &CC : Clusters) + assert(CC.Low == CC.High && "Input clusters must be single-case"); +#endif + + llvm::sort(Clusters, [](const CaseCluster &a, const CaseCluster &b) { + return a.Low->getValue().slt(b.Low->getValue()); + }); + + // Merge adjacent clusters with the same destination. + const unsigned N = Clusters.size(); + unsigned DstIndex = 0; + for (unsigned SrcIndex = 0; SrcIndex < N; ++SrcIndex) { + CaseCluster &CC = Clusters[SrcIndex]; + const ConstantInt *CaseVal = CC.Low; + MachineBasicBlock *Succ = CC.MBB; + + if (DstIndex != 0 && Clusters[DstIndex - 1].MBB == Succ && + (CaseVal->getValue() - Clusters[DstIndex - 1].High->getValue()) == 1) { + // If this case has the same successor and is a neighbour, merge it into + // the previous cluster. + Clusters[DstIndex - 1].High = CaseVal; + Clusters[DstIndex - 1].Prob += CC.Prob; + } else { + std::memmove(&Clusters[DstIndex++], &Clusters[SrcIndex], + sizeof(Clusters[SrcIndex])); + } + } + Clusters.resize(DstIndex); +} diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index 25cd7802264e..ba348b4a9d41 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -1,9 +1,8 @@ //===- TailDuplication.cpp - Duplicate blocks into predecessors' tails ----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/TailDuplicator.cpp b/lib/CodeGen/TailDuplicator.cpp index b118c176a897..a0590a8a6cc6 100644 --- a/lib/CodeGen/TailDuplicator.cpp +++ b/lib/CodeGen/TailDuplicator.cpp @@ -1,9 +1,8 @@ //===- TailDuplicator.cpp - Duplicate blocks into predecessors' tails -----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -435,7 +434,7 @@ void TailDuplicator::duplicateInstruction( if (NewRC == nullptr) NewRC = OrigRC; unsigned NewReg = MRI->createVirtualRegister(NewRC); - BuildMI(*PredBB, MI, MI->getDebugLoc(), + BuildMI(*PredBB, NewMI, NewMI.getDebugLoc(), TII->get(TargetOpcode::COPY), NewReg) .addReg(VI->second.Reg, 0, VI->second.SubReg); LocalVRMap.erase(VI); @@ -558,7 +557,7 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, unsigned MaxDuplicateCount; if (TailDupSize == 0 && TailDuplicateSize.getNumOccurrences() == 0 && - MF->getFunction().optForSize()) + MF->getFunction().hasOptSize()) MaxDuplicateCount = 1; else if (TailDupSize == 0) MaxDuplicateCount = TailDuplicateSize; @@ -857,11 +856,6 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, } appendCopies(PredBB, CopyInfos, Copies); - // Simplify - MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; - SmallVector<MachineOperand, 4> PredCond; - TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond); - NumTailDupAdded += TailBB->size() - 1; // subtract one for removed branch // Update the CFG. diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp index cf78fb5a1f12..9c4483cb240d 100644 --- a/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -1,9 +1,8 @@ //===- TargetFrameLoweringImpl.cpp - Implement target frame interface ------==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp index 2a17af391105..868617ffe14d 100644 --- a/lib/CodeGen/TargetInstrInfo.cpp +++ b/lib/CodeGen/TargetInstrInfo.cpp @@ -1,9 +1,8 @@ //===-- TargetInstrInfo.cpp - Target Instruction Information --------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -86,11 +85,13 @@ static bool isAsmComment(const char *Str, const MCAsmInfo &MAI) { /// simple--i.e. not a logical or arithmetic expression--size values without /// the optional fill value. This is primarily used for creating arbitrary /// sized inline asm blocks for testing purposes. -unsigned TargetInstrInfo::getInlineAsmLength(const char *Str, - const MCAsmInfo &MAI) const { +unsigned TargetInstrInfo::getInlineAsmLength( + const char *Str, + const MCAsmInfo &MAI, const TargetSubtargetInfo *STI) const { // Count the number of instructions in the asm. bool AtInsnStart = true; unsigned Length = 0; + const unsigned MaxInstLength = MAI.getMaxInstLength(STI); for (; *Str; ++Str) { if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(), strlen(MAI.getSeparatorString())) == 0) { @@ -102,7 +103,7 @@ unsigned TargetInstrInfo::getInlineAsmLength(const char *Str, } if (AtInsnStart && !std::isspace(static_cast<unsigned char>(*Str))) { - unsigned AddLength = MAI.getMaxInstLength(); + unsigned AddLength = MaxInstLength; if (strncmp(Str, ".space", 6) == 0) { char *EStr; int SpaceSize; @@ -136,8 +137,14 @@ TargetInstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, // Save off the debug loc before erasing the instruction. DebugLoc DL = Tail->getDebugLoc(); - // Remove all the dead instructions from the end of MBB. - MBB->erase(Tail, MBB->end()); + // Update call site info and remove all the dead instructions + // from the end of MBB. + while (Tail != MBB->end()) { + auto MI = Tail++; + if (MI->isCall()) + MBB->getParent()->updateCallSiteInfo(&*MI); + MBB->erase(MI); + } // If MBB isn't immediately before MBB, insert a branch to it. if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(NewDest)) @@ -162,9 +169,9 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI, assert(MI.getOperand(Idx1).isReg() && MI.getOperand(Idx2).isReg() && "This only knows how to commute register operands so far"); - unsigned Reg0 = HasDef ? MI.getOperand(0).getReg() : 0; - unsigned Reg1 = MI.getOperand(Idx1).getReg(); - unsigned Reg2 = MI.getOperand(Idx2).getReg(); + Register Reg0 = HasDef ? MI.getOperand(0).getReg() : Register(); + Register Reg1 = MI.getOperand(Idx1).getReg(); + Register Reg2 = MI.getOperand(Idx2).getReg(); unsigned SubReg0 = HasDef ? MI.getOperand(0).getSubReg() : 0; unsigned SubReg1 = MI.getOperand(Idx1).getSubReg(); unsigned SubReg2 = MI.getOperand(Idx2).getSubReg(); @@ -523,7 +530,8 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI, MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, ArrayRef<unsigned> Ops, int FI, - LiveIntervals *LIS) const { + LiveIntervals *LIS, + VirtRegMap *VRM) const { auto Flags = MachineMemOperand::MONone; for (unsigned OpIdx : Ops) Flags |= MI.getOperand(OpIdx).isDef() ? MachineMemOperand::MOStore @@ -569,7 +577,7 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, MBB->insert(MI, NewMI); } else { // Ask the target to do the actual folding. - NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI, LIS); + NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI, LIS, VRM); } if (NewMI) { @@ -898,7 +906,8 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric( return true; // Avoid instructions obviously unsafe for remat. - if (MI.isNotDuplicable() || MI.mayStore() || MI.hasUnmodeledSideEffects()) + if (MI.isNotDuplicable() || MI.mayStore() || MI.mayRaiseFPException() || + MI.hasUnmodeledSideEffects()) return false; // Don't remat inline asm. We have no idea how expensive it is @@ -1010,7 +1019,7 @@ ScheduleHazardRecognizer *TargetInstrInfo:: CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const { return (ScheduleHazardRecognizer *) - new ScoreboardHazardRecognizer(II, DAG, "misched"); + new ScoreboardHazardRecognizer(II, DAG, "machine-scheduler"); } // Default implementation of CreateTargetPostRAHazardRecognizer. diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index e86190375642..9b28c1a6c450 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -1,9 +1,8 @@ //===- TargetLoweringBase.cpp - Implement the TargetLoweringBase class ----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -74,8 +73,8 @@ static cl::opt<unsigned> MinimumJumpTableEntries cl::desc("Set minimum number of entries to use a jump table.")); static cl::opt<unsigned> MaximumJumpTableSize - ("max-jump-table-size", cl::init(0), cl::Hidden, - cl::desc("Set maximum size of jump tables; zero for no limit.")); + ("max-jump-table-size", cl::init(UINT_MAX), cl::Hidden, + cl::desc("Set maximum size of jump tables.")); /// Minimum jump table density for normal functions. static cl::opt<unsigned> @@ -124,6 +123,34 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) { for (int LC = 0; LC < RTLIB::UNKNOWN_LIBCALL; ++LC) setLibcallCallingConv((RTLIB::Libcall)LC, CallingConv::C); + // For IEEE quad-precision libcall names, PPC uses "kf" instead of "tf". + if (TT.getArch() == Triple::ppc || TT.isPPC64()) { + setLibcallName(RTLIB::ADD_F128, "__addkf3"); + setLibcallName(RTLIB::SUB_F128, "__subkf3"); + setLibcallName(RTLIB::MUL_F128, "__mulkf3"); + setLibcallName(RTLIB::DIV_F128, "__divkf3"); + setLibcallName(RTLIB::FPEXT_F32_F128, "__extendsfkf2"); + setLibcallName(RTLIB::FPEXT_F64_F128, "__extenddfkf2"); + setLibcallName(RTLIB::FPROUND_F128_F32, "__trunckfsf2"); + setLibcallName(RTLIB::FPROUND_F128_F64, "__trunckfdf2"); + setLibcallName(RTLIB::FPTOSINT_F128_I32, "__fixkfsi"); + setLibcallName(RTLIB::FPTOSINT_F128_I64, "__fixkfdi"); + setLibcallName(RTLIB::FPTOUINT_F128_I32, "__fixunskfsi"); + setLibcallName(RTLIB::FPTOUINT_F128_I64, "__fixunskfdi"); + setLibcallName(RTLIB::SINTTOFP_I32_F128, "__floatsikf"); + setLibcallName(RTLIB::SINTTOFP_I64_F128, "__floatdikf"); + setLibcallName(RTLIB::UINTTOFP_I32_F128, "__floatunsikf"); + setLibcallName(RTLIB::UINTTOFP_I64_F128, "__floatundikf"); + setLibcallName(RTLIB::OEQ_F128, "__eqkf2"); + setLibcallName(RTLIB::UNE_F128, "__nekf2"); + setLibcallName(RTLIB::OGE_F128, "__gekf2"); + setLibcallName(RTLIB::OLT_F128, "__ltkf2"); + setLibcallName(RTLIB::OLE_F128, "__lekf2"); + setLibcallName(RTLIB::OGT_F128, "__gtkf2"); + setLibcallName(RTLIB::UO_F128, "__unordkf2"); + setLibcallName(RTLIB::O_F128, "__unordkf2"); + } + // A few names are different on particular architectures or environments. if (TT.isOSDarwin()) { // For f16/f32 conversions, Darwin uses the standard naming scheme, instead @@ -546,7 +573,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) { JumpIsExpensive = JumpIsExpensiveOverride; PredictableSelectIsExpensive = false; EnableExtLdPromotion = false; - HasFloatingPointExceptions = true; StackPointerRegisterToSaveRestore = 0; BooleanContents = UndefinedBooleanContent; BooleanFloatContents = UndefinedBooleanContent; @@ -583,6 +609,14 @@ void TargetLoweringBase::initActions() { std::fill(std::begin(TargetDAGCombineArray), std::end(TargetDAGCombineArray), 0); + for (MVT VT : MVT::fp_valuetypes()) { + MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits()); + if (IntVT.isValid()) { + setOperationAction(ISD::ATOMIC_SWAP, VT, Promote); + AddPromotedToType(ISD::ATOMIC_SWAP, VT, IntVT); + } + } + // Set default actions for various operations. for (MVT VT : MVT::all_valuetypes()) { // Default all indexed load / store to expand. @@ -617,6 +651,8 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::SSUBSAT, VT, Expand); setOperationAction(ISD::USUBSAT, VT, Expand); setOperationAction(ISD::SMULFIX, VT, Expand); + setOperationAction(ISD::SMULFIXSAT, VT, Expand); + setOperationAction(ISD::UMULFIX, VT, Expand); // Overflow operations default to expand setOperationAction(ISD::SADDO, VT, Expand); @@ -655,8 +691,51 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand); } + // Constrained floating-point operations default to expand. + setOperationAction(ISD::STRICT_FADD, VT, Expand); + setOperationAction(ISD::STRICT_FSUB, VT, Expand); + setOperationAction(ISD::STRICT_FMUL, VT, Expand); + setOperationAction(ISD::STRICT_FDIV, VT, Expand); + setOperationAction(ISD::STRICT_FREM, VT, Expand); + setOperationAction(ISD::STRICT_FMA, VT, Expand); + setOperationAction(ISD::STRICT_FSQRT, VT, Expand); + setOperationAction(ISD::STRICT_FPOW, VT, Expand); + setOperationAction(ISD::STRICT_FPOWI, VT, Expand); + setOperationAction(ISD::STRICT_FSIN, VT, Expand); + setOperationAction(ISD::STRICT_FCOS, VT, Expand); + setOperationAction(ISD::STRICT_FEXP, VT, Expand); + setOperationAction(ISD::STRICT_FEXP2, VT, Expand); + setOperationAction(ISD::STRICT_FLOG, VT, Expand); + setOperationAction(ISD::STRICT_FLOG10, VT, Expand); + setOperationAction(ISD::STRICT_FLOG2, VT, Expand); + setOperationAction(ISD::STRICT_FRINT, VT, Expand); + setOperationAction(ISD::STRICT_FNEARBYINT, VT, Expand); + setOperationAction(ISD::STRICT_FCEIL, VT, Expand); + setOperationAction(ISD::STRICT_FFLOOR, VT, Expand); + setOperationAction(ISD::STRICT_FROUND, VT, Expand); + setOperationAction(ISD::STRICT_FTRUNC, VT, Expand); + setOperationAction(ISD::STRICT_FMAXNUM, VT, Expand); + setOperationAction(ISD::STRICT_FMINNUM, VT, Expand); + setOperationAction(ISD::STRICT_FP_ROUND, VT, Expand); + setOperationAction(ISD::STRICT_FP_EXTEND, VT, Expand); + // For most targets @llvm.get.dynamic.area.offset just returns 0. setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand); + + // Vector reduction default to expand. + setOperationAction(ISD::VECREDUCE_FADD, VT, Expand); + setOperationAction(ISD::VECREDUCE_FMUL, VT, Expand); + setOperationAction(ISD::VECREDUCE_ADD, VT, Expand); + setOperationAction(ISD::VECREDUCE_MUL, VT, Expand); + setOperationAction(ISD::VECREDUCE_AND, VT, Expand); + setOperationAction(ISD::VECREDUCE_OR, VT, Expand); + setOperationAction(ISD::VECREDUCE_XOR, VT, Expand); + setOperationAction(ISD::VECREDUCE_SMAX, VT, Expand); + setOperationAction(ISD::VECREDUCE_SMIN, VT, Expand); + setOperationAction(ISD::VECREDUCE_UMAX, VT, Expand); + setOperationAction(ISD::VECREDUCE_UMIN, VT, Expand); + setOperationAction(ISD::VECREDUCE_FMAX, VT, Expand); + setOperationAction(ISD::VECREDUCE_FMIN, VT, Expand); } // Most targets ignore the @llvm.prefetch intrinsic. @@ -688,6 +767,10 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::FRINT, VT, Expand); setOperationAction(ISD::FTRUNC, VT, Expand); setOperationAction(ISD::FROUND, VT, Expand); + setOperationAction(ISD::LROUND, VT, Expand); + setOperationAction(ISD::LLROUND, VT, Expand); + setOperationAction(ISD::LRINT, VT, Expand); + setOperationAction(ISD::LLRINT, VT, Expand); } // Default ISD::TRAP to expand (which turns it into abort). @@ -700,7 +783,7 @@ void TargetLoweringBase::initActions() { MVT TargetLoweringBase::getScalarShiftAmountTy(const DataLayout &DL, EVT) const { - return MVT::getIntegerVT(8 * DL.getPointerSize(0)); + return MVT::getIntegerVT(DL.getPointerSizeInBits(0)); } EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy, const DataLayout &DL, @@ -985,16 +1068,16 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI, // Add a new memory operand for this FI. assert(MFI.getObjectOffset(FI) != -1); - auto Flags = MachineMemOperand::MOLoad; - if (MI->getOpcode() == TargetOpcode::STATEPOINT) { - Flags |= MachineMemOperand::MOStore; - Flags |= MachineMemOperand::MOVolatile; + // Note: STATEPOINT MMOs are added during SelectionDAG. STACKMAP, and + // PATCHPOINT should be updated to do the same. (TODO) + if (MI->getOpcode() != TargetOpcode::STATEPOINT) { + auto Flags = MachineMemOperand::MOLoad; + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, FI), Flags, + MF.getDataLayout().getPointerSize(), MFI.getObjectAlignment(FI)); + MIB->addMemOperand(MF, MMO); } - MachineMemOperand *MMO = MF.getMachineMemOperand( - MachinePointerInfo::getFixedStack(MF, FI), Flags, - MF.getDataLayout().getPointerSize(), MFI.getObjectAlignment(FI)); - MIB->addMemOperand(MF, MMO); - + // Replace the instruction and update the operand index. MBB->insert(MachineBasicBlock::iterator(MI), MIB); OperIdx += (MIB->getNumOperands() - MI->getNumOperands()) - 1; @@ -1393,7 +1476,7 @@ void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType, Flags.setZExt(); for (unsigned i = 0; i < NumParts; ++i) - Outs.push_back(ISD::OutputArg(Flags, PartVT, VT, /*isFixed=*/true, 0, 0)); + Outs.push_back(ISD::OutputArg(Flags, PartVT, VT, /*isfixed=*/true, 0, 0)); } } @@ -1409,6 +1492,7 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace, unsigned Alignment, + MachineMemOperand::Flags Flags, bool *Fast) const { // Check if the specified alignment is sufficient based on the data layout. // TODO: While using the data layout works in practice, a better solution @@ -1424,7 +1508,15 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, } // This is a misaligned access. - return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Fast); + return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags, Fast); +} + +bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, + const DataLayout &DL, EVT VT, + const MachineMemOperand &MMO, + bool *Fast) const { + return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(), + MMO.getAlignment(), MMO.getFlags(), Fast); } BranchProbability TargetLoweringBase::getPredictableBranchThreshold() const { @@ -1447,6 +1539,7 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const { case Switch: return 0; case IndirectBr: return 0; case Invoke: return 0; + case CallBr: return 0; case Resume: return 0; case Unreachable: return 0; case CleanupRet: return 0; @@ -1580,8 +1673,8 @@ Value *TargetLoweringBase::getSafeStackPointerLocation(IRBuilder<> &IRB) const { // thread's unsafe stack pointer. Module *M = IRB.GetInsertBlock()->getParent()->getParent(); Type *StackPtrTy = Type::getInt8PtrTy(M->getContext()); - Value *Fn = M->getOrInsertFunction("__safestack_pointer_address", - StackPtrTy->getPointerTo(0)); + FunctionCallee Fn = M->getOrInsertFunction("__safestack_pointer_address", + StackPtrTy->getPointerTo(0)); return IRB.CreateCall(Fn); } @@ -1656,7 +1749,7 @@ Value *TargetLoweringBase::getSDagStackGuard(const Module &M) const { return M.getNamedValue("__stack_chk_guard"); } -Value *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const { +Function *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const { return nullptr; } diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index cb2fe691d702..4c8f75b237aa 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -1,9 +1,8 @@ //===- llvm/CodeGen/TargetLoweringObjectFileImpl.cpp - Object File Info ---===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -219,6 +218,16 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, PersonalityEncoding = dwarf::DW_EH_PE_absptr; TTypeEncoding = dwarf::DW_EH_PE_absptr; } + CallSiteEncoding = dwarf::DW_EH_PE_udata4; + break; + case Triple::riscv32: + case Triple::riscv64: + LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; + PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + dwarf::DW_EH_PE_sdata4; + TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + dwarf::DW_EH_PE_sdata4; + CallSiteEncoding = dwarf::DW_EH_PE_udata4; break; case Triple::sparcv9: LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; @@ -272,6 +281,19 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer, } } + if (NamedMDNode *DependentLibraries = M.getNamedMetadata("llvm.dependent-libraries")) { + auto *S = C.getELFSection(".deplibs", ELF::SHT_LLVM_DEPENDENT_LIBRARIES, + ELF::SHF_MERGE | ELF::SHF_STRINGS, 1, ""); + + Streamer.SwitchSection(S); + + for (const auto &Operand : DependentLibraries->operands()) { + Streamer.EmitBytes( + cast<MDString>(cast<MDNode>(Operand)->getOperand(0))->getString()); + Streamer.EmitIntValue(0, 1); + } + } + unsigned Version = 0; unsigned Flags = 0; StringRef Section; @@ -1458,7 +1480,7 @@ void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx, const TargetMachine &TM) { TargetLoweringObjectFile::Initialize(Ctx, TM); const Triple &T = TM.getTargetTriple(); - if (T.isKnownWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) { + if (T.isWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) { StaticCtorSection = Ctx.getCOFFSection(".CRT$XCU", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, @@ -1484,7 +1506,7 @@ static MCSectionCOFF *getCOFFStaticStructorSection(MCContext &Ctx, unsigned Priority, const MCSymbol *KeySym, MCSectionCOFF *Default) { - if (T.isKnownWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) { + if (T.isWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) { // If the priority is the default, use .CRT$XCU, possibly associative. if (Priority == 65535) return Ctx.getAssociativeCOFFSection(Default, KeySym, 0); @@ -1544,9 +1566,7 @@ const MCExpr *TargetLoweringObjectFileCOFF::lowerRelativeReference( const GlobalValue *LHS, const GlobalValue *RHS, const TargetMachine &TM) const { const Triple &T = TM.getTargetTriple(); - if (!T.isKnownWindowsMSVCEnvironment() && - !T.isWindowsItaniumEnvironment() && - !T.isWindowsCoreCLREnvironment()) + if (T.isOSCygMing()) return nullptr; // Our symbols should exist in address space zero, cowardly no-op if @@ -1694,8 +1714,11 @@ MCSection *TargetLoweringObjectFileWasm::getExplicitSectionGlobal( Group = C->getName(); } - return getContext().getWasmSection(Name, Kind, Group, - MCContext::GenericSectionID); + MCSectionWasm* Section = + getContext().getWasmSection(Name, Kind, Group, + MCContext::GenericSectionID); + + return Section; } static MCSectionWasm *selectWasmSectionForGlobal( @@ -1724,6 +1747,7 @@ static MCSectionWasm *selectWasmSectionForGlobal( UniqueID = *NextUniqueID; (*NextUniqueID)++; } + return Ctx.getWasmSection(Name, Kind, Group, UniqueID); } diff --git a/lib/CodeGen/TargetOptionsImpl.cpp b/lib/CodeGen/TargetOptionsImpl.cpp index 3c133fb8594e..039748d817ca 100644 --- a/lib/CodeGen/TargetOptionsImpl.cpp +++ b/lib/CodeGen/TargetOptionsImpl.cpp @@ -1,9 +1,8 @@ //===-- TargetOptionsImpl.cpp - Options that apply to all targets ----------==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp index 28126fcf766d..36df02692f86 100644 --- a/lib/CodeGen/TargetPassConfig.cpp +++ b/lib/CodeGen/TargetPassConfig.cpp @@ -1,9 +1,8 @@ //===- TargetPassConfig.cpp - Target independent code generation passes ---===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -23,6 +22,7 @@ #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" +#include "llvm/CodeGen/CSEConfigBase.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachinePassRegistry.h" #include "llvm/CodeGen/Passes.h" @@ -408,7 +408,7 @@ TargetPassConfig::TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm) TM.Options.EnableIPRA = EnableIPRA; else { // If not explicitly specified, use target default. - TM.Options.EnableIPRA = TM.useIPRA(); + TM.Options.EnableIPRA |= TM.useIPRA(); } if (TM.Options.EnableIPRA) @@ -646,7 +646,7 @@ void TargetPassConfig::addIRPasses() { // into optimally-sized loads and compares. The transforms are enabled by a // target lowering hook. if (!DisableMergeICmps) - addPass(createMergeICmpsPass()); + addPass(createMergeICmpsLegacyPass()); addPass(createExpandMemCmpPass()); } @@ -815,6 +815,13 @@ bool TargetPassConfig::addCoreISelPasses() { } else if (addInstSelector()) return true; + // Expand pseudo-instructions emitted by ISel. Don't run the verifier before + // FinalizeISel. + addPass(&FinalizeISelID); + + // Print the instruction selected machine code... + printAndVerify("After Instruction Selection"); + return false; } @@ -874,12 +881,6 @@ void TargetPassConfig::addMachinePasses() { } } - // Print the instruction selected machine code... - printAndVerify("After Instruction Selection"); - - // Expand pseudo-instructions emitted by ISel. - addPass(&ExpandISelPseudosID); - // Add passes that optimize machine instructions in SSA form. if (getOptLevel() != CodeGenOpt::None) { addMachineSSAOptimization(); @@ -898,13 +899,9 @@ void TargetPassConfig::addMachinePasses() { // Run register allocation and passes that are tightly coupled with it, // including phi elimination and scheduling. if (getOptimizeRegAlloc()) - addOptimizedRegAlloc(createRegAllocPass(true)); - else { - if (RegAlloc != &useDefaultRegisterAllocator && - RegAlloc != &createFastRegisterAllocator) - report_fatal_error("Must use fast (default) register allocator for unoptimized regalloc."); - addFastRegAlloc(createRegAllocPass(false)); - } + addOptimizedRegAlloc(); + else + addFastRegAlloc(); // Run post-ra passes. addPostRegAlloc(); @@ -1039,10 +1036,6 @@ bool TargetPassConfig::getOptimizeRegAlloc() const { llvm_unreachable("Invalid optimize-regalloc state"); } -/// RegisterRegAlloc's global Registry tracks allocator registration. -MachinePassRegistry<RegisterRegAlloc::FunctionPassCtor> - RegisterRegAlloc::Registry; - /// A dummy default pass factory indicates whether the register allocator is /// overridden on the command line. static llvm::once_flag InitializeDefaultRegisterAllocatorFlag; @@ -1053,12 +1046,8 @@ defaultRegAlloc("default", useDefaultRegisterAllocator); static void initializeDefaultRegisterAllocatorOnce() { - RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault(); - - if (!Ctor) { - Ctor = RegAlloc; + if (!RegisterRegAlloc::getDefault()) RegisterRegAlloc::setDefault(RegAlloc); - } } /// Instantiate the default register allocator pass for this target for either @@ -1098,6 +1087,33 @@ FunctionPass *TargetPassConfig::createRegAllocPass(bool Optimized) { return createTargetRegisterAllocator(Optimized); } +bool TargetPassConfig::addRegAssignmentFast() { + if (RegAlloc != &useDefaultRegisterAllocator && + RegAlloc != &createFastRegisterAllocator) + report_fatal_error("Must use fast (default) register allocator for unoptimized regalloc."); + + addPass(createRegAllocPass(false)); + return true; +} + +bool TargetPassConfig::addRegAssignmentOptimized() { + // Add the selected register allocation pass. + addPass(createRegAllocPass(true)); + + // Allow targets to change the register assignments before rewriting. + addPreRewrite(); + + // Finally rewrite virtual registers. + addPass(&VirtRegRewriterID); + // Perform stack slot coloring and post-ra machine LICM. + // + // FIXME: Re-enable coloring with register when it's capable of adding + // kill markers. + addPass(&StackSlotColoringID); + + return true; +} + /// Return true if the default global register allocator is in use and /// has not be overriden on the command line with '-regalloc=...' bool TargetPassConfig::usingDefaultRegAlloc() const { @@ -1106,18 +1122,17 @@ bool TargetPassConfig::usingDefaultRegAlloc() const { /// Add the minimum set of target-independent passes that are required for /// register allocation. No coalescing or scheduling. -void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { +void TargetPassConfig::addFastRegAlloc() { addPass(&PHIEliminationID, false); addPass(&TwoAddressInstructionPassID, false); - if (RegAllocPass) - addPass(RegAllocPass); + addRegAssignmentFast(); } /// Add standard target-independent passes that are tightly coupled with /// optimized register allocation, including coalescing, machine instruction /// scheduling, and register allocation itself. -void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { +void TargetPassConfig::addOptimizedRegAlloc() { addPass(&DetectDeadLanesID, false); addPass(&ProcessImplicitDefsID, false); @@ -1149,21 +1164,10 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { // PreRA instruction scheduling. addPass(&MachineSchedulerID); - if (RegAllocPass) { - // Add the selected register allocation pass. - addPass(RegAllocPass); - - // Allow targets to change the register assignments before rewriting. - addPreRewrite(); - - // Finally rewrite virtual registers. - addPass(&VirtRegRewriterID); - - // Perform stack slot coloring and post-ra machine LICM. - // - // FIXME: Re-enable coloring with register when it's capable of adding - // kill markers. - addPass(&StackSlotColoringID); + if (addRegAssignmentOptimized()) { + // Allow targets to expand pseudo instructions depending on the choice of + // registers before MachineCopyPropagation. + addPostRewrite(); // Copy propagate to forward register uses and try to eliminate COPYs that // were not coalesced. @@ -1221,3 +1225,11 @@ bool TargetPassConfig::isGlobalISelAbortEnabled() const { bool TargetPassConfig::reportDiagnosticWhenGlobalISelFallback() const { return TM->Options.GlobalISelAbort == GlobalISelAbortMode::DisableWithDiag; } + +bool TargetPassConfig::isGISelCSEEnabled() const { + return true; +} + +std::unique_ptr<CSEConfigBase> TargetPassConfig::getCSEConfig() const { + return make_unique<CSEConfigBase>(); +} diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp index 661dc18f7a85..f1b2ecf3243b 100644 --- a/lib/CodeGen/TargetRegisterInfo.cpp +++ b/lib/CodeGen/TargetRegisterInfo.cpp @@ -1,9 +1,8 @@ //==- TargetRegisterInfo.cpp - Target Register Information Implementation --==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -14,6 +13,7 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -398,6 +398,7 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg, const std::pair<unsigned, SmallVector<unsigned, 4>> &Hints_MRI = MRI.getRegAllocationHints(VirtReg); + SmallSet<unsigned, 32> HintedRegs; // First hint may be a target hint. bool Skip = (Hints_MRI.first != 0); for (auto Reg : Hints_MRI.second) { @@ -411,6 +412,10 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg, if (VRM && isVirtualRegister(Phys)) Phys = VRM->getPhys(Phys); + // Don't add the same reg twice (Hints_MRI may contain multiple virtual + // registers allocated to the same physreg). + if (!HintedRegs.insert(Phys).second) + continue; // Check that Phys is a valid hint in VirtReg's register class. if (!isPhysicalRegister(Phys)) continue; diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp index 3cff31ad4933..195279719ad4 100644 --- a/lib/CodeGen/TargetSchedule.cpp +++ b/lib/CodeGen/TargetSchedule.cpp @@ -1,9 +1,8 @@ //===- llvm/Target/TargetSchedule.cpp - Sched Machine Model ---------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/TargetSubtargetInfo.cpp b/lib/CodeGen/TargetSubtargetInfo.cpp index fa29c05fd6c2..59eb2f9c88cb 100644 --- a/lib/CodeGen/TargetSubtargetInfo.cpp +++ b/lib/CodeGen/TargetSubtargetInfo.cpp @@ -1,9 +1,8 @@ //===- TargetSubtargetInfo.cpp - General Target Information ----------------==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -12,24 +11,16 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/ADT/Optional.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetSchedule.h" -#include "llvm/MC/MCInst.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/raw_ostream.h" -#include <string> using namespace llvm; TargetSubtargetInfo::TargetSubtargetInfo( const Triple &TT, StringRef CPU, StringRef FS, - ArrayRef<SubtargetFeatureKV> PF, ArrayRef<SubtargetFeatureKV> PD, - const SubtargetInfoKV *ProcSched, const MCWriteProcResEntry *WPR, + ArrayRef<SubtargetFeatureKV> PF, ArrayRef<SubtargetSubTypeKV> PD, + const MCWriteProcResEntry *WPR, const MCWriteLatencyEntry *WL, const MCReadAdvanceEntry *RA, const InstrStage *IS, const unsigned *OC, const unsigned *FP) - : MCSubtargetInfo(TT, CPU, FS, PF, PD, ProcSched, WPR, WL, RA, IS, OC, FP) { + : MCSubtargetInfo(TT, CPU, FS, PF, PD, WPR, WL, RA, IS, OC, FP) { } TargetSubtargetInfo::~TargetSubtargetInfo() = default; @@ -67,50 +58,4 @@ bool TargetSubtargetInfo::useAA() const { return false; } -static std::string createSchedInfoStr(unsigned Latency, double RThroughput) { - static const char *SchedPrefix = " sched: ["; - std::string Comment; - raw_string_ostream CS(Comment); - if (RThroughput != 0.0) - CS << SchedPrefix << Latency << format(":%2.2f", RThroughput) - << "]"; - else - CS << SchedPrefix << Latency << ":?]"; - CS.flush(); - return Comment; -} - -/// Returns string representation of scheduler comment -std::string TargetSubtargetInfo::getSchedInfoStr(const MachineInstr &MI) const { - if (MI.isPseudo() || MI.isTerminator()) - return std::string(); - // We don't cache TSchedModel because it depends on TargetInstrInfo - // that could be changed during the compilation - TargetSchedModel TSchedModel; - TSchedModel.init(this); - unsigned Latency = TSchedModel.computeInstrLatency(&MI); - double RThroughput = TSchedModel.computeReciprocalThroughput(&MI); - return createSchedInfoStr(Latency, RThroughput); -} - -/// Returns string representation of scheduler comment -std::string TargetSubtargetInfo::getSchedInfoStr(MCInst const &MCI) const { - // We don't cache TSchedModel because it depends on TargetInstrInfo - // that could be changed during the compilation - TargetSchedModel TSchedModel; - TSchedModel.init(this); - unsigned Latency; - if (TSchedModel.hasInstrSchedModel()) - Latency = TSchedModel.computeInstrLatency(MCI); - else if (TSchedModel.hasInstrItineraries()) { - auto *ItinData = TSchedModel.getInstrItineraries(); - Latency = ItinData->getStageLatency( - getInstrInfo()->get(MCI.getOpcode()).getSchedClass()); - } else - return std::string(); - double RThroughput = TSchedModel.computeReciprocalThroughput(MCI); - return createSchedInfoStr(Latency, RThroughput); -} - -void TargetSubtargetInfo::mirFileLoaded(MachineFunction &MF) const { -} +void TargetSubtargetInfo::mirFileLoaded(MachineFunction &MF) const { } diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 4b72f6a84ca1..43d876646967 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1,9 +1,8 @@ //===- TwoAddressInstructionPass.cpp - Two-Address instruction pass -------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -1245,8 +1244,13 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI, ++NumAggrCommuted; // There might be more than two commutable operands, update BaseOp and // continue scanning. + // FIXME: This assumes that the new instruction's operands are in the + // same positions and were simply swapped. BaseOpReg = OtherOpReg; BaseOpKilled = OtherOpKilled; + // Resamples OpsNum in case the number of operands was reduced. This + // happens with X86. + OpsNum = MI->getDesc().getNumOperands(); continue; } // If this was a commute based on kill, we won't do better continuing. diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp index 5288ca672774..177bab32bccc 100644 --- a/lib/CodeGen/UnreachableBlockElim.cpp +++ b/lib/CodeGen/UnreachableBlockElim.cpp @@ -1,9 +1,8 @@ //===-- UnreachableBlockElim.cpp - Remove unreachable blocks for codegen --===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -38,43 +37,13 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; -static bool eliminateUnreachableBlock(Function &F) { - df_iterator_default_set<BasicBlock*> Reachable; - - // Mark all reachable blocks. - for (BasicBlock *BB : depth_first_ext(&F, Reachable)) - (void)BB/* Mark all reachable blocks */; - - // Loop over all dead blocks, remembering them and deleting all instructions - // in them. - std::vector<BasicBlock*> DeadBlocks; - for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) - if (!Reachable.count(&*I)) { - BasicBlock *BB = &*I; - DeadBlocks.push_back(BB); - while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) { - PN->replaceAllUsesWith(Constant::getNullValue(PN->getType())); - BB->getInstList().pop_front(); - } - for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) - (*SI)->removePredecessor(BB); - BB->dropAllReferences(); - } - - // Actually remove the blocks now. - for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) { - DeadBlocks[i]->eraseFromParent(); - } - - return !DeadBlocks.empty(); -} - namespace { class UnreachableBlockElimLegacyPass : public FunctionPass { bool runOnFunction(Function &F) override { - return eliminateUnreachableBlock(F); + return llvm::EliminateUnreachableBlocks(F); } public: @@ -99,7 +68,7 @@ FunctionPass *llvm::createUnreachableBlockEliminationPass() { PreservedAnalyses UnreachableBlockElimPass::run(Function &F, FunctionAnalysisManager &AM) { - bool Changed = eliminateUnreachableBlock(F); + bool Changed = llvm::EliminateUnreachableBlocks(F); if (!Changed) return PreservedAnalyses::all(); PreservedAnalyses PA; diff --git a/lib/CodeGen/ValueTypes.cpp b/lib/CodeGen/ValueTypes.cpp index adb7075de651..a911cdcbec9d 100644 --- a/lib/CodeGen/ValueTypes.cpp +++ b/lib/CodeGen/ValueTypes.cpp @@ -1,9 +1,8 @@ //===----------- ValueTypes.cpp - Implementation of EVT methods -----------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -166,11 +165,18 @@ std::string EVT::getEVTString() const { case MVT::v128i16: return "v128i16"; case MVT::v1i32: return "v1i32"; case MVT::v2i32: return "v2i32"; + case MVT::v3i32: return "v3i32"; case MVT::v4i32: return "v4i32"; + case MVT::v5i32: return "v5i32"; case MVT::v8i32: return "v8i32"; case MVT::v16i32: return "v16i32"; case MVT::v32i32: return "v32i32"; case MVT::v64i32: return "v64i32"; + case MVT::v128i32: return "v128i32"; + case MVT::v256i32: return "v256i32"; + case MVT::v512i32: return "v512i32"; + case MVT::v1024i32:return "v1024i32"; + case MVT::v2048i32:return "v2048i32"; case MVT::v1i64: return "v1i64"; case MVT::v2i64: return "v2i64"; case MVT::v4i64: return "v4i64"; @@ -183,16 +189,25 @@ std::string EVT::getEVTString() const { case MVT::v2f16: return "v2f16"; case MVT::v4f16: return "v4f16"; case MVT::v8f16: return "v8f16"; + case MVT::v3f32: return "v3f32"; case MVT::v4f32: return "v4f32"; + case MVT::v5f32: return "v5f32"; case MVT::v8f32: return "v8f32"; case MVT::v16f32: return "v16f32"; + case MVT::v32f32: return "v32f32"; + case MVT::v64f32: return "v64f32"; + case MVT::v128f32: return "v128f32"; + case MVT::v256f32: return "v256f32"; + case MVT::v512f32: return "v512f32"; + case MVT::v1024f32:return "v1024f32"; + case MVT::v2048f32:return "v2048f32"; case MVT::v1f64: return "v1f64"; case MVT::v2f64: return "v2f64"; case MVT::v4f64: return "v4f64"; case MVT::v8f64: return "v8f64"; case MVT::Metadata:return "Metadata"; case MVT::Untyped: return "Untyped"; - case MVT::ExceptRef: return "ExceptRef"; + case MVT::exnref : return "exnref"; } } @@ -247,11 +262,18 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { case MVT::v128i16: return VectorType::get(Type::getInt16Ty(Context), 128); case MVT::v1i32: return VectorType::get(Type::getInt32Ty(Context), 1); case MVT::v2i32: return VectorType::get(Type::getInt32Ty(Context), 2); + case MVT::v3i32: return VectorType::get(Type::getInt32Ty(Context), 3); case MVT::v4i32: return VectorType::get(Type::getInt32Ty(Context), 4); + case MVT::v5i32: return VectorType::get(Type::getInt32Ty(Context), 5); case MVT::v8i32: return VectorType::get(Type::getInt32Ty(Context), 8); case MVT::v16i32: return VectorType::get(Type::getInt32Ty(Context), 16); case MVT::v32i32: return VectorType::get(Type::getInt32Ty(Context), 32); case MVT::v64i32: return VectorType::get(Type::getInt32Ty(Context), 64); + case MVT::v128i32: return VectorType::get(Type::getInt32Ty(Context), 128); + case MVT::v256i32: return VectorType::get(Type::getInt32Ty(Context), 256); + case MVT::v512i32: return VectorType::get(Type::getInt32Ty(Context), 512); + case MVT::v1024i32:return VectorType::get(Type::getInt32Ty(Context), 1024); + case MVT::v2048i32:return VectorType::get(Type::getInt32Ty(Context), 2048); case MVT::v1i64: return VectorType::get(Type::getInt64Ty(Context), 1); case MVT::v2i64: return VectorType::get(Type::getInt64Ty(Context), 2); case MVT::v4i64: return VectorType::get(Type::getInt64Ty(Context), 4); @@ -264,9 +286,18 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { case MVT::v8f16: return VectorType::get(Type::getHalfTy(Context), 8); case MVT::v1f32: return VectorType::get(Type::getFloatTy(Context), 1); case MVT::v2f32: return VectorType::get(Type::getFloatTy(Context), 2); + case MVT::v3f32: return VectorType::get(Type::getFloatTy(Context), 3); case MVT::v4f32: return VectorType::get(Type::getFloatTy(Context), 4); + case MVT::v5f32: return VectorType::get(Type::getFloatTy(Context), 5); case MVT::v8f32: return VectorType::get(Type::getFloatTy(Context), 8); - case MVT::v16f32: return VectorType::get(Type::getFloatTy(Context), 16); + case MVT::v16f32: return VectorType::get(Type::getFloatTy(Context), 16); + case MVT::v32f32: return VectorType::get(Type::getFloatTy(Context), 32); + case MVT::v64f32: return VectorType::get(Type::getFloatTy(Context), 64); + case MVT::v128f32: return VectorType::get(Type::getFloatTy(Context), 128); + case MVT::v256f32: return VectorType::get(Type::getFloatTy(Context), 256); + case MVT::v512f32: return VectorType::get(Type::getFloatTy(Context), 512); + case MVT::v1024f32:return VectorType::get(Type::getFloatTy(Context), 1024); + case MVT::v2048f32:return VectorType::get(Type::getFloatTy(Context), 2048); case MVT::v1f64: return VectorType::get(Type::getDoubleTy(Context), 1); case MVT::v2f64: return VectorType::get(Type::getDoubleTy(Context), 2); case MVT::v4f64: return VectorType::get(Type::getDoubleTy(Context), 4); diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index ed7bef667e77..4a06704a8876 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -1,9 +1,8 @@ //===- llvm/CodeGen/VirtRegMap.cpp - Virtual Register Map -----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -385,7 +384,7 @@ void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) const { // give us additional liveness information: The target (super-)register // must not be valid before this point. Replace the COPY with a KILL // instruction to maintain this information. - if (MI.getOperand(0).isUndef() || MI.getNumOperands() > 2) { + if (MI.getOperand(1).isUndef() || MI.getNumOperands() > 2) { MI.setDesc(TII->get(TargetOpcode::KILL)); LLVM_DEBUG(dbgs() << " replace by: " << MI); return; diff --git a/lib/CodeGen/WasmEHPrepare.cpp b/lib/CodeGen/WasmEHPrepare.cpp index e5002eb95346..865a1cfbf43a 100644 --- a/lib/CodeGen/WasmEHPrepare.cpp +++ b/lib/CodeGen/WasmEHPrepare.cpp @@ -1,14 +1,14 @@ //===-- WasmEHPrepare - Prepare excepton handling for WebAssembly --------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This transformation is designed for use by code generators which use -// WebAssembly exception handling scheme. +// WebAssembly exception handling scheme. This currently supports C++ +// exceptions. // // WebAssembly exception handling uses Windows exception IR for the middle level // representation. This pass does the following transformation for every @@ -23,53 +23,20 @@ // // - After: // catchpad ... -// exn = wasm.catch(0); // 0 is a tag for C++ -// wasm.landingpad.index(index); +// exn = wasm.extract.exception(); // // Only add below in case it's not a single catch (...) +// wasm.landingpad.index(index); // __wasm_lpad_context.lpad_index = index; // __wasm_lpad_context.lsda = wasm.lsda(); // _Unwind_CallPersonality(exn); -// int selector = __wasm.landingpad_context.selector; +// selector = __wasm.landingpad_context.selector; // ... // -// Also, does the following for a cleanuppad block with a call to -// __clang_call_terminate(): -// - Before: -// cleanuppad ... -// exn = wasm.get.exception(); -// __clang_call_terminate(exn); -// -// - After: -// cleanuppad ... -// exn = wasm.catch(0); // 0 is a tag for C++ -// __clang_call_terminate(exn); -// -// -// * Background: WebAssembly EH instructions -// WebAssembly's try and catch instructions are structured as follows: -// try -// instruction* -// catch (C++ tag) -// instruction* -// ... -// catch_all -// instruction* -// try_end -// -// A catch instruction in WebAssembly does not correspond to a C++ catch clause. -// In WebAssembly, there is a single catch instruction for all C++ exceptions. -// There can be more catch instructions for exceptions in other languages, but -// they are not generated for now. catch_all catches all exceptions including -// foreign exceptions (e.g. JavaScript). We turn catchpads into catch (C++ tag) -// and cleanuppads into catch_all, with one exception: cleanuppad with a call to -// __clang_call_terminate should be both in catch (C++ tag) and catch_all. -// // // * Background: Direct personality function call // In WebAssembly EH, the VM is responsible for unwinding the stack once an // exception is thrown. After the stack is unwound, the control flow is -// transfered to WebAssembly 'catch' instruction, which returns a caught -// exception object. +// transfered to WebAssembly 'catch' instruction. // // Unwinding the stack is not done by libunwind but the VM, so the personality // function in libcxxabi cannot be called from libunwind during the unwinding @@ -137,19 +104,19 @@ class WasmEHPrepare : public FunctionPass { Value *LSDAField = nullptr; // lsda field Value *SelectorField = nullptr; // selector - Function *ThrowF = nullptr; // wasm.throw() intrinsic - Function *CatchF = nullptr; // wasm.catch.extract() intrinsic - Function *LPadIndexF = nullptr; // wasm.landingpad.index() intrinsic - Function *LSDAF = nullptr; // wasm.lsda() intrinsic - Function *GetExnF = nullptr; // wasm.get.exception() intrinsic - Function *GetSelectorF = nullptr; // wasm.get.ehselector() intrinsic - Function *CallPersonalityF = nullptr; // _Unwind_CallPersonality() wrapper - Function *ClangCallTermF = nullptr; // __clang_call_terminate() function + Function *ThrowF = nullptr; // wasm.throw() intrinsic + Function *LPadIndexF = nullptr; // wasm.landingpad.index() intrinsic + Function *LSDAF = nullptr; // wasm.lsda() intrinsic + Function *GetExnF = nullptr; // wasm.get.exception() intrinsic + Function *ExtractExnF = nullptr; // wasm.extract.exception() intrinsic + Function *GetSelectorF = nullptr; // wasm.get.ehselector() intrinsic + FunctionCallee CallPersonalityF = + nullptr; // _Unwind_CallPersonality() wrapper bool prepareEHPads(Function &F); bool prepareThrows(Function &F); - void prepareEHPad(BasicBlock *BB, unsigned Index); + void prepareEHPad(BasicBlock *BB, bool NeedLSDA, unsigned Index = 0); void prepareTerminateCleanupPad(BasicBlock *BB); public: @@ -209,14 +176,12 @@ bool WasmEHPrepare::prepareThrows(Function &F) { // wasm.throw() intinsic, which will be lowered to wasm 'throw' instruction. ThrowF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_throw); - // Insert an unreachable instruction after a call to @llvm.wasm.throw and // delete all following instructions within the BB, and delete all the dead // children of the BB as well. for (User *U : ThrowF->users()) { - // A call to @llvm.wasm.throw() is only generated from - // __builtin_wasm_throw() builtin call within libcxxabi, and cannot be an - // InvokeInst. + // A call to @llvm.wasm.throw() is only generated from __cxa_throw() + // builtin call within libcxxabi, and cannot be an InvokeInst. auto *ThrowI = cast<CallInst>(U); if (ThrowI->getFunction() != &F) continue; @@ -263,8 +228,6 @@ bool WasmEHPrepare::prepareEHPads(Function &F) { SelectorField = IRB.CreateConstGEP2_32(LPadContextTy, LPadContextGV, 0, 2, "selector_gep"); - // wasm.catch() intinsic, which will be lowered to wasm 'catch' instruction. - CatchF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_catch); // wasm.landingpad.index() intrinsic, which is to specify landingpad index LPadIndexF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_landingpad_index); // wasm.lsda() intrinsic. Returns the address of LSDA table for the current @@ -275,14 +238,18 @@ bool WasmEHPrepare::prepareEHPads(Function &F) { GetExnF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_get_exception); GetSelectorF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_get_ehselector); - // _Unwind_CallPersonality() wrapper function, which calls the personality - CallPersonalityF = cast<Function>(M.getOrInsertFunction( - "_Unwind_CallPersonality", IRB.getInt32Ty(), IRB.getInt8PtrTy())); - CallPersonalityF->setDoesNotThrow(); + // wasm.extract.exception() is the same as wasm.get.exception() but it does + // not take a token argument. This will be lowered down to EXTRACT_EXCEPTION + // pseudo instruction in instruction selection, which will be expanded using + // 'br_on_exn' instruction later. + ExtractExnF = + Intrinsic::getDeclaration(&M, Intrinsic::wasm_extract_exception); - // __clang_call_terminate() function, which is inserted by clang in case a - // cleanup throws - ClangCallTermF = M.getFunction("__clang_call_terminate"); + // _Unwind_CallPersonality() wrapper function, which calls the personality + CallPersonalityF = M.getOrInsertFunction( + "_Unwind_CallPersonality", IRB.getInt32Ty(), IRB.getInt8PtrTy()); + if (Function *F = dyn_cast<Function>(CallPersonalityF.getCallee())) + F->setDoesNotThrow(); unsigned Index = 0; for (auto *BB : CatchPads) { @@ -290,60 +257,52 @@ bool WasmEHPrepare::prepareEHPads(Function &F) { // In case of a single catch (...), we don't need to emit LSDA if (CPI->getNumArgOperands() == 1 && cast<Constant>(CPI->getArgOperand(0))->isNullValue()) - prepareEHPad(BB, -1); + prepareEHPad(BB, false); else - prepareEHPad(BB, Index++); + prepareEHPad(BB, true, Index++); } - if (!ClangCallTermF) - return !CatchPads.empty(); - - // Cleanuppads will turn into catch_all later, but cleanuppads with a call to - // __clang_call_terminate() is a special case. __clang_call_terminate() takes - // an exception object, so we have to duplicate call in both 'catch <C++ tag>' - // and 'catch_all' clauses. Here we only insert a call to catch; the - // duplication will be done later. In catch_all, the exception object will be - // set to null. + // Cleanup pads don't need LSDA. for (auto *BB : CleanupPads) - for (auto &I : *BB) - if (auto *CI = dyn_cast<CallInst>(&I)) - if (CI->getCalledValue() == ClangCallTermF) - prepareEHPad(BB, -1); + prepareEHPad(BB, false); return true; } -void WasmEHPrepare::prepareEHPad(BasicBlock *BB, unsigned Index) { +// Prepare an EH pad for Wasm EH handling. If NeedLSDA is false, Index is +// ignored. +void WasmEHPrepare::prepareEHPad(BasicBlock *BB, bool NeedLSDA, + unsigned Index) { assert(BB->isEHPad() && "BB is not an EHPad!"); IRBuilder<> IRB(BB->getContext()); - IRB.SetInsertPoint(&*BB->getFirstInsertionPt()); - // The argument to wasm.catch() is the tag for C++ exceptions, which we set to - // 0 for this module. - // Pseudocode: void *exn = wasm.catch(0); - Instruction *Exn = IRB.CreateCall(CatchF, IRB.getInt32(0), "exn"); - // Replace the return value of wasm.get.exception() with the return value from - // wasm.catch(). + auto *FPI = cast<FuncletPadInst>(BB->getFirstNonPHI()); Instruction *GetExnCI = nullptr, *GetSelectorCI = nullptr; for (auto &U : FPI->uses()) { if (auto *CI = dyn_cast<CallInst>(U.getUser())) { if (CI->getCalledValue() == GetExnF) GetExnCI = CI; - else if (CI->getCalledValue() == GetSelectorF) + if (CI->getCalledValue() == GetSelectorF) GetSelectorCI = CI; } } - assert(GetExnCI && "wasm.get.exception() call does not exist"); - GetExnCI->replaceAllUsesWith(Exn); + // Cleanup pads w/o __clang_call_terminate call do not have any of + // wasm.get.exception() or wasm.get.ehselector() calls. We need to do nothing. + if (!GetExnCI) { + assert(!GetSelectorCI && + "wasm.get.ehselector() cannot exist w/o wasm.get.exception()"); + return; + } + + Instruction *ExtractExnCI = IRB.CreateCall(ExtractExnF, {}, "exn"); + GetExnCI->replaceAllUsesWith(ExtractExnCI); GetExnCI->eraseFromParent(); // In case it is a catchpad with single catch (...) or a cleanuppad, we don't // need to call personality function because we don't need a selector. - if (FPI->getNumArgOperands() == 0 || - (FPI->getNumArgOperands() == 1 && - cast<Constant>(FPI->getArgOperand(0))->isNullValue())) { + if (!NeedLSDA) { if (GetSelectorCI) { assert(GetSelectorCI->use_empty() && "wasm.get.ehselector() still has uses!"); @@ -351,7 +310,7 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, unsigned Index) { } return; } - IRB.SetInsertPoint(Exn->getNextNode()); + IRB.SetInsertPoint(ExtractExnCI->getNextNode()); // This is to create a map of <landingpad EH label, landingpad index> in // SelectionDAGISel, which is to be used in EHStreamer to emit LSDA tables. @@ -373,12 +332,13 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, unsigned Index) { IRB.CreateStore(IRB.CreateCall(LSDAF), LSDAField); // Pseudocode: _Unwind_CallPersonality(exn); - CallInst *PersCI = - IRB.CreateCall(CallPersonalityF, Exn, OperandBundleDef("funclet", CPI)); + CallInst *PersCI = IRB.CreateCall(CallPersonalityF, ExtractExnCI, + OperandBundleDef("funclet", CPI)); PersCI->setDoesNotThrow(); // Pseudocode: int selector = __wasm.landingpad_context.selector; - Instruction *Selector = IRB.CreateLoad(SelectorField, "selector"); + Instruction *Selector = + IRB.CreateLoad(IRB.getInt32Ty(), SelectorField, "selector"); // Replace the return value from wasm.get.ehselector() with the selector value // loaded from __wasm_lpad_context.selector. @@ -388,15 +348,15 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, unsigned Index) { } void llvm::calculateWasmEHInfo(const Function *F, WasmEHFuncInfo &EHInfo) { + // If an exception is not caught by a catchpad (i.e., it is a foreign + // exception), it will unwind to its parent catchswitch's unwind destination. + // We don't record an unwind destination for cleanuppads because every + // exception should be caught by it. for (const auto &BB : *F) { if (!BB.isEHPad()) continue; const Instruction *Pad = BB.getFirstNonPHI(); - // If an exception is not caught by a catchpad (i.e., it is a foreign - // exception), it will unwind to its parent catchswitch's unwind - // destination. We don't record an unwind destination for cleanuppads - // because every exception should be caught by it. if (const auto *CatchPad = dyn_cast<CatchPadInst>(Pad)) { const auto *UnwindBB = CatchPad->getCatchSwitch()->getUnwindDest(); if (!UnwindBB) @@ -409,22 +369,4 @@ void llvm::calculateWasmEHInfo(const Function *F, WasmEHFuncInfo &EHInfo) { EHInfo.setEHPadUnwindDest(&BB, UnwindBB); } } - - // Record the unwind destination for invoke and cleanupret instructions. - for (const auto &BB : *F) { - const Instruction *TI = BB.getTerminator(); - BasicBlock *UnwindBB = nullptr; - if (const auto *Invoke = dyn_cast<InvokeInst>(TI)) - UnwindBB = Invoke->getUnwindDest(); - else if (const auto *CleanupRet = dyn_cast<CleanupReturnInst>(TI)) - UnwindBB = CleanupRet->getUnwindDest(); - if (!UnwindBB) - continue; - const Instruction *UnwindPad = UnwindBB->getFirstNonPHI(); - if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(UnwindPad)) - // Currently there should be only one handler per a catchswitch. - EHInfo.setThrowUnwindDest(&BB, *CatchSwitch->handlers().begin()); - else // cleanuppad - EHInfo.setThrowUnwindDest(&BB, UnwindBB); - } } diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp index 6a15240fa6e0..cdf79374e974 100644 --- a/lib/CodeGen/WinEHPrepare.cpp +++ b/lib/CodeGen/WinEHPrepare.cpp @@ -1,9 +1,8 @@ //===-- WinEHPrepare - Prepare exception handling for code generation ---===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -1080,7 +1079,8 @@ AllocaInst *WinEHPrepare::insertPHILoads(PHINode *PN, Function &F) { SpillSlot = new AllocaInst(PN->getType(), DL->getAllocaAddrSpace(), nullptr, Twine(PN->getName(), ".wineh.spillslot"), &F.getEntryBlock().front()); - Value *V = new LoadInst(SpillSlot, Twine(PN->getName(), ".wineh.reload"), + Value *V = new LoadInst(PN->getType(), SpillSlot, + Twine(PN->getName(), ".wineh.reload"), &*PHIBlock->getFirstInsertionPt()); PN->replaceAllUsesWith(V); return SpillSlot; @@ -1222,14 +1222,16 @@ void WinEHPrepare::replaceUseWithLoad(Value *V, Use &U, AllocaInst *&SpillSlot, Value *&Load = Loads[IncomingBlock]; // Insert the load into the predecessor block if (!Load) - Load = new LoadInst(SpillSlot, Twine(V->getName(), ".wineh.reload"), - /*Volatile=*/false, IncomingBlock->getTerminator()); + Load = new LoadInst(V->getType(), SpillSlot, + Twine(V->getName(), ".wineh.reload"), + /*isVolatile=*/false, IncomingBlock->getTerminator()); U.set(Load); } else { // Reload right before the old use. - auto *Load = new LoadInst(SpillSlot, Twine(V->getName(), ".wineh.reload"), - /*Volatile=*/false, UsingInst); + auto *Load = new LoadInst(V->getType(), SpillSlot, + Twine(V->getName(), ".wineh.reload"), + /*isVolatile=*/false, UsingInst); U.set(Load); } } diff --git a/lib/CodeGen/XRayInstrumentation.cpp b/lib/CodeGen/XRayInstrumentation.cpp index 32a7457c2060..19c59e9542b4 100644 --- a/lib/CodeGen/XRayInstrumentation.cpp +++ b/lib/CodeGen/XRayInstrumentation.cpp @@ -1,9 +1,8 @@ //===- XRayInstrumentation.cpp - Adds XRay instrumentation to functions. --===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -111,6 +110,8 @@ void XRayInstrumentation::replaceRetWithPatchableRet( for (auto &MO : T.operands()) MIB.add(MO); Terminators.push_back(&T); + if (T.isCall()) + MF.updateCallSiteInfo(&T); } } } |