diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2015-12-30 13:13:10 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2015-12-30 13:13:10 +0000 |
commit | 7d523365ff1a3cc95bc058b33102500f61e8166d (patch) | |
tree | b466a4817f79516eb1df8eae92bccf62ecc84003 /contrib/llvm/lib/Target/WebAssembly | |
parent | e3b65fde506060bec5cd110fcf03b440bd0eea1d (diff) | |
parent | dd58ef019b700900793a1eb48b52123db01b654e (diff) |
Update llvm to trunk r256633.
Notes
Notes:
svn path=/projects/clang380-import/; revision=292941
Diffstat (limited to 'contrib/llvm/lib/Target/WebAssembly')
57 files changed, 7385 insertions, 338 deletions
diff --git a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp index fbb985aaafbb..7ce3a00ae360 100644 --- a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp +++ b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp @@ -13,7 +13,9 @@ //===----------------------------------------------------------------------===// #include "InstPrinter/WebAssemblyInstPrinter.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "WebAssembly.h" +#include "WebAssemblyMachineFunctionInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" @@ -21,11 +23,13 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include <cctype> +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; #define DEBUG_TYPE "asm-printer" +#include "WebAssemblyGenAsmWriter.inc" + WebAssemblyInstPrinter::WebAssemblyInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, const MCRegisterInfo &MRI) @@ -33,11 +37,93 @@ WebAssemblyInstPrinter::WebAssemblyInstPrinter(const MCAsmInfo &MAI, void WebAssemblyInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { - llvm_unreachable("TODO: implement printRegName"); + assert(RegNo != WebAssemblyFunctionInfo::UnusedReg); + // Note that there's an implicit get_local/set_local here! + OS << "$" << RegNo; } void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot, - const MCSubtargetInfo &STI) { - llvm_unreachable("TODO: implement printInst"); + const MCSubtargetInfo & /*STI*/) { + // Print the instruction (this uses the AsmStrings from the .td files). + printInstruction(MI, OS); + + // Print any additional variadic operands. + const MCInstrDesc &Desc = MII.get(MI->getOpcode()); + if (Desc.isVariadic()) + for (auto i = Desc.getNumOperands(), e = MI->getNumOperands(); i < e; ++i) { + if (i != 0) + OS << ", "; + printOperand(MI, i, OS); + } + + // Print any added annotation. + printAnnotation(OS, Annot); +} + +static std::string toString(const APFloat &FP) { + static const size_t BufBytes = 128; + char buf[BufBytes]; + if (FP.isNaN()) + assert((FP.bitwiseIsEqual(APFloat::getQNaN(FP.getSemantics())) || + FP.bitwiseIsEqual( + APFloat::getQNaN(FP.getSemantics(), /*Negative=*/true))) && + "convertToHexString handles neither SNaN nor NaN payloads"); + // Use C99's hexadecimal floating-point representation. + auto Written = FP.convertToHexString( + buf, /*hexDigits=*/0, /*upperCase=*/false, APFloat::rmNearestTiesToEven); + (void)Written; + assert(Written != 0); + assert(Written < BufBytes); + return buf; +} + +void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isReg()) { + unsigned WAReg = Op.getReg(); + if (int(WAReg) >= 0) + printRegName(O, WAReg); + else if (OpNo >= MII.get(MI->getOpcode()).getNumDefs()) + O << "$pop" << (WAReg & INT32_MAX); + else if (WAReg != WebAssemblyFunctionInfo::UnusedReg) + O << "$push" << (WAReg & INT32_MAX); + else + O << "$discard"; + // Add a '=' suffix if this is a def. + if (OpNo < MII.get(MI->getOpcode()).getNumDefs()) + O << '='; + } else if (Op.isImm()) { + switch (MI->getOpcode()) { + case WebAssembly::PARAM: + case WebAssembly::RESULT: + case WebAssembly::LOCAL: + O << WebAssembly::TypeToString(MVT::SimpleValueType(Op.getImm())); + break; + default: + O << Op.getImm(); + break; + } + } else if (Op.isFPImm()) + O << toString(APFloat(Op.getFPImm())); + else { + assert(Op.isExpr() && "unknown operand kind in printOperand"); + Op.getExpr()->print(O, &MAI); + } +} + +const char *llvm::WebAssembly::TypeToString(MVT Ty) { + switch (Ty.SimpleTy) { + case MVT::i32: + return "i32"; + case MVT::i64: + return "i64"; + case MVT::f32: + return "f32"; + case MVT::f64: + return "f64"; + default: + llvm_unreachable("unsupported type"); + } } diff --git a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h index 70fcef214ce2..39a16f59fd78 100644 --- a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h +++ b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h @@ -16,14 +16,13 @@ #define LLVM_LIB_TARGET_WEBASSEMBLY_INSTPRINTER_WEBASSEMBLYINSTPRINTER_H #include "llvm/MC/MCInstPrinter.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/CodeGen/MachineValueType.h" namespace llvm { -class MCOperand; class MCSubtargetInfo; -class WebAssemblyInstPrinter : public MCInstPrinter { +class WebAssemblyInstPrinter final : public MCInstPrinter { public: WebAssemblyInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, const MCRegisterInfo &MRI); @@ -31,8 +30,21 @@ public: void printRegName(raw_ostream &OS, unsigned RegNo) const override; void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot, const MCSubtargetInfo &STI) override; + + // Used by tblegen code. + void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + // Autogenerated by tblgen. + void printInstruction(const MCInst *MI, raw_ostream &O); + static const char *getRegisterName(unsigned RegNo); }; +namespace WebAssembly { + +const char *TypeToString(MVT Ty); + +} // end namespace WebAssembly + } // end namespace llvm #endif diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp new file mode 100644 index 000000000000..b158ccb46f99 --- /dev/null +++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp @@ -0,0 +1,103 @@ +//===-- WebAssemblyAsmBackend.cpp - WebAssembly Assembler Backend ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements the WebAssemblyAsmBackend class. +/// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCDirectives.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { +class WebAssemblyAsmBackend final : public MCAsmBackend { + bool Is64Bit; + +public: + explicit WebAssemblyAsmBackend(bool Is64Bit) + : MCAsmBackend(), Is64Bit(Is64Bit) {} + ~WebAssemblyAsmBackend() override {} + + void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + uint64_t Value, bool IsPCRel) const override; + + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override; + + // No instruction requires relaxation + bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const override { + return false; + } + + unsigned getNumFixupKinds() const override { + // We currently just use the generic fixups in MCFixup.h and don't have any + // target-specific fixups. + return 0; + } + + bool mayNeedRelaxation(const MCInst &Inst) const override { return false; } + + void relaxInstruction(const MCInst &Inst, MCInst &Res) const override {} + + bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override; +}; + +bool WebAssemblyAsmBackend::writeNopData(uint64_t Count, + MCObjectWriter *OW) const { + if (Count == 0) + return true; + + // FIXME: Do something. + return false; +} + +void WebAssemblyAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, + unsigned DataSize, uint64_t Value, + bool IsPCRel) const { + const MCFixupKindInfo &Info = getFixupKindInfo(Fixup.getKind()); + unsigned NumBytes = RoundUpToAlignment(Info.TargetSize, 8); + if (!Value) + return; // Doesn't change encoding. + + // Shift the value into position. + Value <<= Info.TargetOffset; + + unsigned Offset = Fixup.getOffset(); + assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!"); + + // For each byte of the fragment that the fixup touches, mask in the + // bits from the fixup value. + for (unsigned i = 0; i != NumBytes; ++i) + Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff); +} + +MCObjectWriter * +WebAssemblyAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const { + return createWebAssemblyELFObjectWriter(OS, Is64Bit, 0); +} +} // end anonymous namespace + +MCAsmBackend *llvm::createWebAssemblyAsmBackend(const Target &T, + const MCRegisterInfo &MRI, + const Triple &TT, + StringRef CPU) { + return new WebAssemblyAsmBackend(TT.isArch64Bit()); +} diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp new file mode 100644 index 000000000000..c47a3d9094e5 --- /dev/null +++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp @@ -0,0 +1,54 @@ +//===-- WebAssemblyELFObjectWriter.cpp - WebAssembly ELF Writer -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file handles ELF-specific object emission, converting LLVM's +/// internal fixups into the appropriate relocations. +/// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/Support/ErrorHandling.h" +using namespace llvm; + +namespace { +class WebAssemblyELFObjectWriter final : public MCELFObjectTargetWriter { +public: + WebAssemblyELFObjectWriter(bool Is64Bit, uint8_t OSABI); + +protected: + unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, + bool IsPCRel) const override; +}; +} // end anonymous namespace + +// FIXME: Use EM_NONE as a temporary hack. Should we decide to pursue ELF +// writing seriously, we should email generic-abi@googlegroups.com and ask +// for our own ELF code. +WebAssemblyELFObjectWriter::WebAssemblyELFObjectWriter(bool Is64Bit, + uint8_t OSABI) + : MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_NONE, + /*HasRelocationAddend=*/true) {} + +unsigned WebAssemblyELFObjectWriter::GetRelocType(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel) const { + // FIXME: Do we need our own relocs? + return Fixup.getKind(); +} + +MCObjectWriter *llvm::createWebAssemblyELFObjectWriter(raw_pwrite_stream &OS, + bool Is64Bit, + uint8_t OSABI) { + MCELFObjectTargetWriter *MOTW = + new WebAssemblyELFObjectWriter(Is64Bit, OSABI); + return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true); +} diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp index 55346f71c6fc..d2617796ca99 100644 --- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp @@ -23,7 +23,7 @@ using namespace llvm; WebAssemblyMCAsmInfo::~WebAssemblyMCAsmInfo() {} WebAssemblyMCAsmInfo::WebAssemblyMCAsmInfo(const Triple &T) { - PointerSize = CalleeSaveStackSlotSize = T.isArch64Bit(); + PointerSize = CalleeSaveStackSlotSize = T.isArch64Bit() ? 8 : 4; // TODO: What should MaxInstLength be? @@ -41,9 +41,6 @@ WebAssemblyMCAsmInfo::WebAssemblyMCAsmInfo(const Triple &T) { COMMDirectiveAlignmentIsInBytes = false; LCOMMDirectiveAlignmentType = LCOMM::Log2Alignment; - HasDotTypeDotSizeDirective = false; - HasSingleParameterDotFile = false; - SupportsDebugInformation = true; // For now, WebAssembly does not support exceptions. diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h index d2b8fb7748fc..2dcf2cd3c892 100644 --- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h +++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h @@ -15,13 +15,13 @@ #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCASMINFO_H #define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCASMINFO_H -#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCAsmInfoELF.h" namespace llvm { class Triple; -class WebAssemblyMCAsmInfo final : public MCAsmInfo { +class WebAssemblyMCAsmInfo final : public MCAsmInfoELF { public: explicit WebAssemblyMCAsmInfo(const Triple &T); ~WebAssemblyMCAsmInfo() override; diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp new file mode 100644 index 000000000000..7c6c79eb5db2 --- /dev/null +++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp @@ -0,0 +1,100 @@ +//=- WebAssemblyMCCodeEmitter.cpp - Convert WebAssembly code to machine code -// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements the WebAssemblyMCCodeEmitter class. +/// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "mccodeemitter" + +namespace { +class WebAssemblyMCCodeEmitter final : public MCCodeEmitter { + const MCRegisterInfo &MRI; + +public: + WebAssemblyMCCodeEmitter(const MCInstrInfo &, const MCRegisterInfo &mri, + MCContext &) + : MRI(mri) {} + + ~WebAssemblyMCCodeEmitter() override {} + + /// TableGen'erated function for getting the binary encoding for an + /// instruction. + uint64_t getBinaryCodeForInstr(const MCInst &MI, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + + /// Return binary encoding of operand. If the machine operand requires + /// relocation, record the relocation and return zero. + unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + + uint64_t getMemoryOpValue(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + + void encodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const override; +}; +} // end anonymous namespace + +MCCodeEmitter *llvm::createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + MCContext &Ctx) { + return new WebAssemblyMCCodeEmitter(MCII, MRI, Ctx); +} + +unsigned WebAssemblyMCCodeEmitter::getMachineOpValue( + const MCInst &MI, const MCOperand &MO, SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + if (MO.isReg()) + return MRI.getEncodingValue(MO.getReg()); + if (MO.isImm()) + return static_cast<unsigned>(MO.getImm()); + + assert(MO.isExpr()); + + assert(MO.getExpr()->getKind() == MCExpr::SymbolRef); + + assert(false && "FIXME: not implemented yet"); + + return 0; +} + +void WebAssemblyMCCodeEmitter::encodeInstruction( + const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + assert(false && "FIXME: not implemented yet"); +} + +// Encode WebAssembly Memory Operand +uint64_t +WebAssemblyMCCodeEmitter::getMemoryOpValue(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + assert(false && "FIXME: not implemented yet"); + return 0; +} + +#include "WebAssemblyGenMCCodeEmitter.inc" diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp index 224aa773a80e..14cd295353d5 100644 --- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp @@ -26,25 +26,40 @@ using namespace llvm; #define DEBUG_TYPE "wasm-mc-target-desc" +#define GET_INSTRINFO_MC_DESC +#include "WebAssemblyGenInstrInfo.inc" + #define GET_SUBTARGETINFO_MC_DESC #include "WebAssemblyGenSubtargetInfo.inc" #define GET_REGINFO_MC_DESC #include "WebAssemblyGenRegisterInfo.inc" -static MCAsmInfo *createWebAssemblyMCAsmInfo(const MCRegisterInfo &MRI, +static MCAsmInfo *createWebAssemblyMCAsmInfo(const MCRegisterInfo & /*MRI*/, const Triple &TT) { - MCAsmInfo *MAI = new WebAssemblyMCAsmInfo(TT); - return MAI; + return new WebAssemblyMCAsmInfo(TT); +} + +static MCInstrInfo *createWebAssemblyMCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitWebAssemblyMCInstrInfo(X); + return X; +} + +static MCStreamer *createWebAssemblyMCStreamer(const Triple &T, MCContext &Ctx, + MCAsmBackend &MAB, + raw_pwrite_stream &OS, + MCCodeEmitter *Emitter, + bool RelaxAll) { + return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll); } static MCInstPrinter * -createWebAssemblyMCInstPrinter(const Triple &T, unsigned SyntaxVariant, +createWebAssemblyMCInstPrinter(const Triple & /*T*/, unsigned SyntaxVariant, const MCAsmInfo &MAI, const MCInstrInfo &MII, const MCRegisterInfo &MRI) { - if (SyntaxVariant == 0 || SyntaxVariant == 1) - return new WebAssemblyInstPrinter(MAI, MII, MRI); - return nullptr; + assert(SyntaxVariant == 0); + return new WebAssemblyInstPrinter(MAI, MII, MRI); } // Force static initialization. @@ -53,7 +68,19 @@ extern "C" void LLVMInitializeWebAssemblyTargetMC() { // Register the MC asm info. RegisterMCAsmInfoFn X(*T, createWebAssemblyMCAsmInfo); + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(*T, createWebAssemblyMCInstrInfo); + + // Register the object streamer + TargetRegistry::RegisterELFStreamer(*T, createWebAssemblyMCStreamer); + // Register the MCInstPrinter. TargetRegistry::RegisterMCInstPrinter(*T, createWebAssemblyMCInstPrinter); + + // Register the MC code emitter + TargetRegistry::RegisterMCCodeEmitter(*T, createWebAssemblyMCCodeEmitter); + + // Register the ASM Backend + TargetRegistry::RegisterMCAsmBackend(*T, createWebAssemblyAsmBackend); } } diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h index eebf5b72f62b..e78f73e3da95 100644 --- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -16,7 +16,6 @@ #define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H #include "llvm/Support/DataTypes.h" -#include <string> namespace llvm { @@ -34,13 +33,21 @@ class StringRef; class Target; class Triple; class raw_ostream; +class raw_pwrite_stream; extern Target TheWebAssemblyTarget32; extern Target TheWebAssemblyTarget64; +MCCodeEmitter *createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + MCContext &Ctx); + MCAsmBackend *createWebAssemblyAsmBackend(const Target &T, const MCRegisterInfo &MRI, - StringRef TT, StringRef CPU); + const Triple &TT, StringRef CPU); + +MCObjectWriter *createWebAssemblyELFObjectWriter(raw_pwrite_stream &OS, + bool Is64Bit, uint8_t OSABI); } // end namespace llvm @@ -50,6 +57,11 @@ MCAsmBackend *createWebAssemblyAsmBackend(const Target &T, #define GET_REGINFO_ENUM #include "WebAssemblyGenRegisterInfo.inc" +// Defines symbolic names for the WebAssembly instructions. +// +#define GET_INSTRINFO_ENUM +#include "WebAssemblyGenInstrInfo.inc" + #define GET_SUBTARGETINFO_ENUM #include "WebAssemblyGenSubtargetInfo.inc" diff --git a/contrib/llvm/lib/Target/WebAssembly/README.txt b/contrib/llvm/lib/Target/WebAssembly/README.txt index 63e02c455895..b97ea454165c 100644 --- a/contrib/llvm/lib/Target/WebAssembly/README.txt +++ b/contrib/llvm/lib/Target/WebAssembly/README.txt @@ -12,6 +12,16 @@ binary encoding of WebAssembly itself: * https://github.com/WebAssembly/design/blob/master/AstSemantics.md * https://github.com/WebAssembly/design/blob/master/BinaryEncoding.md +The backend is built, tested and archived on the following waterfall: + https://build.chromium.org/p/client.wasm.llvm/console + +The backend's bringup is done using the GCC torture test suite first since it +doesn't require C library support. Current known failures are in +known_gcc_test_failures.txt, all other tests should pass. The waterfall will +turn red if not. Once most of these pass, further testing will use LLVM's own +test suite. The tests can be run locally using: + github.com/WebAssembly/experimental/blob/master/buildbot/torture_test.py + Interesting work that remains to be done: * Write a pass to restructurize irreducible control flow. This needs to be done before register allocation to be efficient, because it may duplicate basic @@ -19,8 +29,60 @@ Interesting work that remains to be done: level. Note that LLVM's GPU code has such a pass, but it linearizes control flow (e.g. both sides of branches execute and are masked) which is undesirable for WebAssembly. -* Basic relooper to expose control flow as an AST. -* Figure out how to properly use MC for virtual ISAs. This may require some - refactoring of MC. + +//===---------------------------------------------------------------------===// + +set_local instructions have a return value. We should (a) model this, +and (b) write optimizations which take advantage of it. Keep in mind that +many set_local instructions are implicit! + +//===---------------------------------------------------------------------===// + +Br, br_if, and tableswitch instructions can support having a value on the +expression stack across the jump (sometimes). We should (a) model this, and +(b) extend the stackifier to utilize it. + +//===---------------------------------------------------------------------===// + +The min/max operators aren't exactly a<b?a:b because of NaN and negative zero +behavior. The ARM target has the same kind of min/max instructions and has +implemented optimizations for them; we should do similar optimizations for +WebAssembly. + +//===---------------------------------------------------------------------===// + +AArch64 runs SeparateConstOffsetFromGEPPass, followed by EarlyCSE and LICM. +Would these be useful to run for WebAssembly too? Also, it has an option to +run SimplifyCFG after running the AtomicExpand pass. Would this be useful for +us too? + +//===---------------------------------------------------------------------===// + +When is it profitable to set isAsCheapAsAMove on instructions in WebAssembly? + +//===---------------------------------------------------------------------===// + +Register stackification uses the EXPR_STACK physical register to impose +ordering dependencies on instructions with stack operands. This is pessimistic; +we should consider alternate ways to model stack dependencies. + +//===---------------------------------------------------------------------===// + +Lots of things could be done in WebAssemblyTargetTransformInfo.cpp. Similarly, +there are numerous optimization-related hooks that can be overridden in +WebAssemblyTargetLowering. + +//===---------------------------------------------------------------------===// + +Instead of the OptimizeReturned pass, which should consider preserving the +"returned" attribute through to MachineInstrs and extending the StoreResults +pass to do this optimization on calls too. That would also let the +WebAssemblyPeephole pass clean up dead defs for such calls, as it does for +stores. + +//===---------------------------------------------------------------------===// + +Memset/memcpy/memmove should be marked with the "returned" attribute somehow, +even when they are translated through intrinsics. //===---------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/WebAssembly/Relooper.cpp b/contrib/llvm/lib/Target/WebAssembly/Relooper.cpp new file mode 100644 index 000000000000..9b718ef094aa --- /dev/null +++ b/contrib/llvm/lib/Target/WebAssembly/Relooper.cpp @@ -0,0 +1,984 @@ +//===-- Relooper.cpp - Top-level interface for WebAssembly ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// \brief This implements the Relooper algorithm. This implementation includes +/// optimizations added since the original academic paper [1] was published. +/// +/// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In +/// Proceedings of the ACM international conference companion on Object +/// oriented programming systems languages and applications companion +/// (SPLASH '11). ACM, New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224 +/// http://doi.acm.org/10.1145/2048147.2048224 +/// +//===-------------------------------------------------------------------===// + +#include "Relooper.h" +#include "WebAssembly.h" + +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Function.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#include <cstring> +#include <cstdlib> +#include <functional> +#include <list> +#include <stack> +#include <string> + +#define DEBUG_TYPE "relooper" + +using namespace llvm; +using namespace Relooper; + +static cl::opt<int> RelooperSplittingFactor( + "relooper-splitting-factor", + cl::desc( + "How much to discount code size when deciding whether to split a node"), + cl::init(5)); + +static cl::opt<unsigned> RelooperMultipleSwitchThreshold( + "relooper-multiple-switch-threshold", + cl::desc( + "How many entries to allow in a multiple before we use a switch"), + cl::init(10)); + +static cl::opt<unsigned> RelooperNestingLimit( + "relooper-nesting-limit", + cl::desc( + "How much nesting is acceptable"), + cl::init(20)); + + +namespace { +/// +/// Implements the relooper algorithm for a function's blocks. +/// +/// Implementation details: The Relooper instance has +/// ownership of the blocks and shapes, and frees them when done. +/// +struct RelooperAlgorithm { + std::deque<Block *> Blocks; + std::deque<Shape *> Shapes; + Shape *Root; + bool MinSize; + int BlockIdCounter; + int ShapeIdCounter; + + RelooperAlgorithm(); + ~RelooperAlgorithm(); + + void AddBlock(Block *New, int Id = -1); + + // Calculates the shapes + void Calculate(Block *Entry); + + // Sets us to try to minimize size + void SetMinSize(bool MinSize_) { MinSize = MinSize_; } +}; + +struct RelooperAnalysis final : public FunctionPass { + static char ID; + RelooperAnalysis() : FunctionPass(ID) {} + const char *getPassName() const override { return "relooper"; } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + bool runOnFunction(Function &F) override; +}; +} + +// RelooperAnalysis + +char RelooperAnalysis::ID = 0; +FunctionPass *llvm::createWebAssemblyRelooper() { + return new RelooperAnalysis(); +} + +bool RelooperAnalysis::runOnFunction(Function &F) { + DEBUG(dbgs() << "Relooping function '" << F.getName() << "'\n"); + RelooperAlgorithm R; + // FIXME: remove duplication between relooper's and LLVM's BBs. + std::map<const BasicBlock *, Block *> BB2B; + std::map<const Block *, const BasicBlock *> B2BB; + for (const BasicBlock &BB : F) { + // FIXME: getName is wrong here, Code is meant to represent amount of code. + // FIXME: use BranchVarInit for switch. + Block *B = new Block(BB.getName().str().data(), /*BranchVarInit=*/nullptr); + R.AddBlock(B); + assert(BB2B.find(&BB) == BB2B.end() && "Inserting the same block twice"); + assert(B2BB.find(B) == B2BB.end() && "Inserting the same block twice"); + BB2B[&BB] = B; + B2BB[B] = &BB; + } + for (Block *B : R.Blocks) { + const BasicBlock *BB = B2BB[B]; + for (const BasicBlock *Successor : successors(BB)) + // FIXME: add branch's Condition and Code below. + B->AddBranchTo(BB2B[Successor], /*Condition=*/nullptr, /*Code=*/nullptr); + } + R.Calculate(BB2B[&F.getEntryBlock()]); + return false; // Analysis passes don't modify anything. +} + +// Helpers + +typedef MapVector<Block *, BlockSet> BlockBlockSetMap; +typedef std::list<Block *> BlockList; + +template <class T, class U> +static bool contains(const T &container, const U &contained) { + return container.count(contained); +} + + +// Branch + +Branch::Branch(const char *ConditionInit, const char *CodeInit) + : Ancestor(nullptr), Labeled(true) { + // FIXME: move from char* to LLVM data structures + Condition = ConditionInit ? strdup(ConditionInit) : nullptr; + Code = CodeInit ? strdup(CodeInit) : nullptr; +} + +Branch::~Branch() { + // FIXME: move from char* to LLVM data structures + free(static_cast<void *>(const_cast<char *>(Condition))); + free(static_cast<void *>(const_cast<char *>(Code))); +} + +// Block + +Block::Block(const char *CodeInit, const char *BranchVarInit) + : Parent(nullptr), Id(-1), IsCheckedMultipleEntry(false) { + // FIXME: move from char* to LLVM data structures + Code = strdup(CodeInit); + BranchVar = BranchVarInit ? strdup(BranchVarInit) : nullptr; +} + +Block::~Block() { + // FIXME: move from char* to LLVM data structures + free(static_cast<void *>(const_cast<char *>(Code))); + free(static_cast<void *>(const_cast<char *>(BranchVar))); +} + +void Block::AddBranchTo(Block *Target, const char *Condition, + const char *Code) { + assert(!contains(BranchesOut, Target) && + "cannot add more than one branch to the same target"); + BranchesOut[Target] = make_unique<Branch>(Condition, Code); +} + +// Relooper + +RelooperAlgorithm::RelooperAlgorithm() + : Root(nullptr), MinSize(false), BlockIdCounter(1), + ShapeIdCounter(0) { // block ID 0 is reserved for clearings +} + +RelooperAlgorithm::~RelooperAlgorithm() { + for (auto Curr : Blocks) + delete Curr; + for (auto Curr : Shapes) + delete Curr; +} + +void RelooperAlgorithm::AddBlock(Block *New, int Id) { + New->Id = Id == -1 ? BlockIdCounter++ : Id; + Blocks.push_back(New); +} + +struct RelooperRecursor { + RelooperAlgorithm *Parent; + RelooperRecursor(RelooperAlgorithm *ParentInit) : Parent(ParentInit) {} +}; + +void RelooperAlgorithm::Calculate(Block *Entry) { + // Scan and optimize the input + struct PreOptimizer : public RelooperRecursor { + PreOptimizer(RelooperAlgorithm *Parent) : RelooperRecursor(Parent) {} + BlockSet Live; + + void FindLive(Block *Root) { + BlockList ToInvestigate; + ToInvestigate.push_back(Root); + while (!ToInvestigate.empty()) { + Block *Curr = ToInvestigate.front(); + ToInvestigate.pop_front(); + if (contains(Live, Curr)) + continue; + Live.insert(Curr); + for (const auto &iter : Curr->BranchesOut) + ToInvestigate.push_back(iter.first); + } + } + + // If a block has multiple entries but no exits, and it is small enough, it + // is useful to split it. A common example is a C++ function where + // everything ends up at a final exit block and does some RAII cleanup. + // Without splitting, we will be forced to introduce labelled loops to + // allow reaching the final block + void SplitDeadEnds() { + unsigned TotalCodeSize = 0; + for (const auto &Curr : Live) { + TotalCodeSize += strlen(Curr->Code); + } + BlockSet Splits; + BlockSet Removed; + for (const auto &Original : Live) { + if (Original->BranchesIn.size() <= 1 || + !Original->BranchesOut.empty()) + continue; // only dead ends, for now + if (contains(Original->BranchesOut, Original)) + continue; // cannot split a looping node + if (strlen(Original->Code) * (Original->BranchesIn.size() - 1) > + TotalCodeSize / RelooperSplittingFactor) + continue; // if splitting increases raw code size by a significant + // amount, abort + // Split the node (for simplicity, we replace all the blocks, even + // though we could have reused the original) + DEBUG(dbgs() << " Splitting '" << Original->Code << "'\n"); + for (const auto &Prior : Original->BranchesIn) { + Block *Split = new Block(Original->Code, Original->BranchVar); + Parent->AddBlock(Split, Original->Id); + Split->BranchesIn.insert(Prior); + std::unique_ptr<Branch> Details; + Details.swap(Prior->BranchesOut[Original]); + Prior->BranchesOut[Split] = make_unique<Branch>(Details->Condition, + Details->Code); + for (const auto &iter : Original->BranchesOut) { + Block *Post = iter.first; + Branch *Details = iter.second.get(); + Split->BranchesOut[Post] = make_unique<Branch>(Details->Condition, + Details->Code); + Post->BranchesIn.insert(Split); + } + Splits.insert(Split); + Removed.insert(Original); + } + for (const auto &iter : Original->BranchesOut) { + Block *Post = iter.first; + Post->BranchesIn.remove(Original); + } + } + for (const auto &iter : Splits) + Live.insert(iter); + for (const auto &iter : Removed) + Live.remove(iter); + } + }; + PreOptimizer Pre(this); + Pre.FindLive(Entry); + + // Add incoming branches from live blocks, ignoring dead code + for (unsigned i = 0; i < Blocks.size(); i++) { + Block *Curr = Blocks[i]; + if (!contains(Pre.Live, Curr)) + continue; + for (const auto &iter : Curr->BranchesOut) + iter.first->BranchesIn.insert(Curr); + } + + if (!MinSize) + Pre.SplitDeadEnds(); + + // Recursively process the graph + + struct Analyzer : public RelooperRecursor { + Analyzer(RelooperAlgorithm *Parent) : RelooperRecursor(Parent) {} + + // Add a shape to the list of shapes in this Relooper calculation + void Notice(Shape *New) { + New->Id = Parent->ShapeIdCounter++; + Parent->Shapes.push_back(New); + } + + // Create a list of entries from a block. If LimitTo is provided, only + // results in that set will appear + void GetBlocksOut(Block *Source, BlockSet &Entries, + BlockSet *LimitTo = nullptr) { + for (const auto &iter : Source->BranchesOut) + if (!LimitTo || contains(*LimitTo, iter.first)) + Entries.insert(iter.first); + } + + // Converts/processes all branchings to a specific target + void Solipsize(Block *Target, Branch::FlowType Type, Shape *Ancestor, + BlockSet &From) { + DEBUG(dbgs() << " Solipsize '" << Target->Code << "' type " << Type + << "\n"); + for (auto iter = Target->BranchesIn.begin(); + iter != Target->BranchesIn.end();) { + Block *Prior = *iter; + if (!contains(From, Prior)) { + iter++; + continue; + } + std::unique_ptr<Branch> PriorOut; + PriorOut.swap(Prior->BranchesOut[Target]); + PriorOut->Ancestor = Ancestor; + PriorOut->Type = Type; + if (MultipleShape *Multiple = dyn_cast<MultipleShape>(Ancestor)) + Multiple->Breaks++; // We are breaking out of this Multiple, so need a + // loop + iter++; // carefully increment iter before erasing + Target->BranchesIn.remove(Prior); + Target->ProcessedBranchesIn.insert(Prior); + Prior->ProcessedBranchesOut[Target].swap(PriorOut); + } + } + + Shape *MakeSimple(BlockSet &Blocks, Block *Inner, BlockSet &NextEntries) { + DEBUG(dbgs() << " MakeSimple inner block '" << Inner->Code << "'\n"); + SimpleShape *Simple = new SimpleShape; + Notice(Simple); + Simple->Inner = Inner; + Inner->Parent = Simple; + if (Blocks.size() > 1) { + Blocks.remove(Inner); + GetBlocksOut(Inner, NextEntries, &Blocks); + BlockSet JustInner; + JustInner.insert(Inner); + for (const auto &iter : NextEntries) + Solipsize(iter, Branch::Direct, Simple, JustInner); + } + return Simple; + } + + Shape *MakeLoop(BlockSet &Blocks, BlockSet &Entries, + BlockSet &NextEntries) { + // Find the inner blocks in this loop. Proceed backwards from the entries + // until + // you reach a seen block, collecting as you go. + BlockSet InnerBlocks; + BlockSet Queue = Entries; + while (!Queue.empty()) { + Block *Curr = *(Queue.begin()); + Queue.remove(*Queue.begin()); + if (!contains(InnerBlocks, Curr)) { + // This element is new, mark it as inner and remove from outer + InnerBlocks.insert(Curr); + Blocks.remove(Curr); + // Add the elements prior to it + for (const auto &iter : Curr->BranchesIn) + Queue.insert(iter); + } + } + assert(!InnerBlocks.empty()); + + for (const auto &Curr : InnerBlocks) { + for (const auto &iter : Curr->BranchesOut) { + Block *Possible = iter.first; + if (!contains(InnerBlocks, Possible)) + NextEntries.insert(Possible); + } + } + + LoopShape *Loop = new LoopShape(); + Notice(Loop); + + // Solipsize the loop, replacing with break/continue and marking branches + // as Processed (will not affect later calculations) + // A. Branches to the loop entries become a continue to this shape + for (const auto &iter : Entries) + Solipsize(iter, Branch::Continue, Loop, InnerBlocks); + // B. Branches to outside the loop (a next entry) become breaks on this + // shape + for (const auto &iter : NextEntries) + Solipsize(iter, Branch::Break, Loop, InnerBlocks); + // Finish up + Shape *Inner = Process(InnerBlocks, Entries, nullptr); + Loop->Inner = Inner; + return Loop; + } + + // For each entry, find the independent group reachable by it. The + // independent group is the entry itself, plus all the blocks it can + // reach that cannot be directly reached by another entry. Note that we + // ignore directly reaching the entry itself by another entry. + // @param Ignore - previous blocks that are irrelevant + void FindIndependentGroups(BlockSet &Entries, + BlockBlockSetMap &IndependentGroups, + BlockSet *Ignore = nullptr) { + typedef std::map<Block *, Block *> BlockBlockMap; + + struct HelperClass { + BlockBlockSetMap &IndependentGroups; + BlockBlockMap Ownership; // For each block, which entry it belongs to. + // We have reached it from there. + + HelperClass(BlockBlockSetMap &IndependentGroupsInit) + : IndependentGroups(IndependentGroupsInit) {} + void InvalidateWithChildren(Block *New) { + // Being in the list means you need to be invalidated + BlockList ToInvalidate; + ToInvalidate.push_back(New); + while (!ToInvalidate.empty()) { + Block *Invalidatee = ToInvalidate.front(); + ToInvalidate.pop_front(); + Block *Owner = Ownership[Invalidatee]; + // Owner may have been invalidated, do not add to + // IndependentGroups! + if (contains(IndependentGroups, Owner)) + IndependentGroups[Owner].remove(Invalidatee); + if (Ownership[Invalidatee]) { // may have been seen before and + // invalidated already + Ownership[Invalidatee] = nullptr; + for (const auto &iter : Invalidatee->BranchesOut) { + Block *Target = iter.first; + BlockBlockMap::iterator Known = Ownership.find(Target); + if (Known != Ownership.end()) { + Block *TargetOwner = Known->second; + if (TargetOwner) + ToInvalidate.push_back(Target); + } + } + } + } + } + }; + HelperClass Helper(IndependentGroups); + + // We flow out from each of the entries, simultaneously. + // When we reach a new block, we add it as belonging to the one we got to + // it from. + // If we reach a new block that is already marked as belonging to someone, + // it is reachable by two entries and is not valid for any of them. + // Remove it and all it can reach that have been visited. + + // Being in the queue means we just added this item, and + // we need to add its children + BlockList Queue; + for (const auto &Entry : Entries) { + Helper.Ownership[Entry] = Entry; + IndependentGroups[Entry].insert(Entry); + Queue.push_back(Entry); + } + while (!Queue.empty()) { + Block *Curr = Queue.front(); + Queue.pop_front(); + Block *Owner = Helper.Ownership[Curr]; // Curr must be in the ownership + // map if we are in the queue + if (!Owner) + continue; // we have been invalidated meanwhile after being reached + // from two entries + // Add all children + for (const auto &iter : Curr->BranchesOut) { + Block *New = iter.first; + BlockBlockMap::iterator Known = Helper.Ownership.find(New); + if (Known == Helper.Ownership.end()) { + // New node. Add it, and put it in the queue + Helper.Ownership[New] = Owner; + IndependentGroups[Owner].insert(New); + Queue.push_back(New); + continue; + } + Block *NewOwner = Known->second; + if (!NewOwner) + continue; // We reached an invalidated node + if (NewOwner != Owner) + // Invalidate this and all reachable that we have seen - we reached + // this from two locations + Helper.InvalidateWithChildren(New); + // otherwise, we have the same owner, so do nothing + } + } + + // Having processed all the interesting blocks, we remain with just one + // potential issue: + // If a->b, and a was invalidated, but then b was later reached by + // someone else, we must invalidate b. To check for this, we go over all + // elements in the independent groups, if an element has a parent which + // does *not* have the same owner, we/ must remove it and all its + // children. + + for (const auto &iter : Entries) { + BlockSet &CurrGroup = IndependentGroups[iter]; + BlockList ToInvalidate; + for (const auto &iter : CurrGroup) { + Block *Child = iter; + for (const auto &iter : Child->BranchesIn) { + Block *Parent = iter; + if (Ignore && contains(*Ignore, Parent)) + continue; + if (Helper.Ownership[Parent] != Helper.Ownership[Child]) + ToInvalidate.push_back(Child); + } + } + while (!ToInvalidate.empty()) { + Block *Invalidatee = ToInvalidate.front(); + ToInvalidate.pop_front(); + Helper.InvalidateWithChildren(Invalidatee); + } + } + + // Remove empty groups + for (const auto &iter : Entries) + if (IndependentGroups[iter].empty()) + IndependentGroups.erase(iter); + } + + Shape *MakeMultiple(BlockSet &Blocks, BlockSet &Entries, + BlockBlockSetMap &IndependentGroups, Shape *Prev, + BlockSet &NextEntries) { + bool Fused = isa<SimpleShape>(Prev); + MultipleShape *Multiple = new MultipleShape(); + Notice(Multiple); + BlockSet CurrEntries; + for (auto &iter : IndependentGroups) { + Block *CurrEntry = iter.first; + BlockSet &CurrBlocks = iter.second; + // Create inner block + CurrEntries.clear(); + CurrEntries.insert(CurrEntry); + for (const auto &CurrInner : CurrBlocks) { + // Remove the block from the remaining blocks + Blocks.remove(CurrInner); + // Find new next entries and fix branches to them + for (auto iter = CurrInner->BranchesOut.begin(); + iter != CurrInner->BranchesOut.end();) { + Block *CurrTarget = iter->first; + auto Next = iter; + Next++; + if (!contains(CurrBlocks, CurrTarget)) { + NextEntries.insert(CurrTarget); + Solipsize(CurrTarget, Branch::Break, Multiple, CurrBlocks); + } + iter = Next; // increment carefully because Solipsize can remove us + } + } + Multiple->InnerMap[CurrEntry->Id] = + Process(CurrBlocks, CurrEntries, nullptr); + // If we are not fused, then our entries will actually be checked + if (!Fused) + CurrEntry->IsCheckedMultipleEntry = true; + } + // Add entries not handled as next entries, they are deferred + for (const auto &Entry : Entries) + if (!contains(IndependentGroups, Entry)) + NextEntries.insert(Entry); + // The multiple has been created, we can decide how to implement it + if (Multiple->InnerMap.size() >= RelooperMultipleSwitchThreshold) { + Multiple->UseSwitch = true; + Multiple->Breaks++; // switch captures breaks + } + return Multiple; + } + + // Main function. + // Process a set of blocks with specified entries, returns a shape + // The Make* functions receive a NextEntries. If they fill it with data, + // those are the entries for the ->Next block on them, and the blocks + // are what remains in Blocks (which Make* modify). In this way + // we avoid recursing on Next (imagine a long chain of Simples, if we + // recursed we could blow the stack). + Shape *Process(BlockSet &Blocks, BlockSet &InitialEntries, Shape *Prev) { + BlockSet *Entries = &InitialEntries; + BlockSet TempEntries[2]; + int CurrTempIndex = 0; + BlockSet *NextEntries; + Shape *Ret = nullptr; + + auto Make = [&](Shape *Temp) { + if (Prev) + Prev->Next = Temp; + if (!Ret) + Ret = Temp; + Prev = Temp; + Entries = NextEntries; + }; + + while (1) { + CurrTempIndex = 1 - CurrTempIndex; + NextEntries = &TempEntries[CurrTempIndex]; + NextEntries->clear(); + + if (Entries->empty()) + return Ret; + if (Entries->size() == 1) { + Block *Curr = *(Entries->begin()); + if (Curr->BranchesIn.empty()) { + // One entry, no looping ==> Simple + Make(MakeSimple(Blocks, Curr, *NextEntries)); + if (NextEntries->empty()) + return Ret; + continue; + } + // One entry, looping ==> Loop + Make(MakeLoop(Blocks, *Entries, *NextEntries)); + if (NextEntries->empty()) + return Ret; + continue; + } + + // More than one entry, try to eliminate through a Multiple groups of + // independent blocks from an entry/ies. It is important to remove + // through multiples as opposed to looping since the former is more + // performant. + BlockBlockSetMap IndependentGroups; + FindIndependentGroups(*Entries, IndependentGroups); + + if (!IndependentGroups.empty()) { + // We can handle a group in a multiple if its entry cannot be reached + // by another group. + // Note that it might be reachable by itself - a loop. But that is + // fine, we will create a loop inside the multiple block (which + // is the performant order to do it). + for (auto iter = IndependentGroups.begin(); + iter != IndependentGroups.end();) { + Block *Entry = iter->first; + BlockSet &Group = iter->second; + auto curr = iter++; // iterate carefully, we may delete + for (BlockSet::iterator iterBranch = Entry->BranchesIn.begin(); + iterBranch != Entry->BranchesIn.end(); iterBranch++) { + Block *Origin = *iterBranch; + if (!contains(Group, Origin)) { + // Reached from outside the group, so we cannot handle this + IndependentGroups.erase(curr); + break; + } + } + } + + // As an optimization, if we have 2 independent groups, and one is a + // small dead end, we can handle only that dead end. + // The other then becomes a Next - without nesting in the code and + // recursion in the analysis. + // TODO: if the larger is the only dead end, handle that too + // TODO: handle >2 groups + // TODO: handle not just dead ends, but also that do not branch to the + // NextEntries. However, must be careful there since we create a + // Next, and that Next can prevent eliminating a break (since we no + // longer naturally reach the same place), which may necessitate a + // one-time loop, which makes the unnesting pointless. + if (IndependentGroups.size() == 2) { + // Find the smaller one + auto iter = IndependentGroups.begin(); + Block *SmallEntry = iter->first; + auto SmallSize = iter->second.size(); + iter++; + Block *LargeEntry = iter->first; + auto LargeSize = iter->second.size(); + if (SmallSize != LargeSize) { // ignore the case where they are + // identical - keep things symmetrical + // there + if (SmallSize > LargeSize) { + Block *Temp = SmallEntry; + SmallEntry = LargeEntry; + LargeEntry = Temp; // Note: we did not flip the Sizes too, they + // are now invalid. TODO: use the smaller + // size as a limit? + } + // Check if dead end + bool DeadEnd = true; + BlockSet &SmallGroup = IndependentGroups[SmallEntry]; + for (const auto &Curr : SmallGroup) { + for (const auto &iter : Curr->BranchesOut) { + Block *Target = iter.first; + if (!contains(SmallGroup, Target)) { + DeadEnd = false; + break; + } + } + if (!DeadEnd) + break; + } + if (DeadEnd) + IndependentGroups.erase(LargeEntry); + } + } + + if (!IndependentGroups.empty()) + // Some groups removable ==> Multiple + Make(MakeMultiple(Blocks, *Entries, IndependentGroups, Prev, + *NextEntries)); + if (NextEntries->empty()) + return Ret; + continue; + } + // No independent groups, must be loopable ==> Loop + Make(MakeLoop(Blocks, *Entries, *NextEntries)); + if (NextEntries->empty()) + return Ret; + continue; + } + } + }; + + // Main + + BlockSet AllBlocks; + for (const auto &Curr : Pre.Live) { + AllBlocks.insert(Curr); + } + + BlockSet Entries; + Entries.insert(Entry); + Root = Analyzer(this).Process(AllBlocks, Entries, nullptr); + assert(Root); + + /// + /// Relooper post-optimizer + /// + struct PostOptimizer { + RelooperAlgorithm *Parent; + std::stack<Shape *> LoopStack; + + PostOptimizer(RelooperAlgorithm *ParentInit) : Parent(ParentInit) {} + + void ShapeSwitch(Shape* var, + std::function<void (SimpleShape*)> simple, + std::function<void (MultipleShape*)> multiple, + std::function<void (LoopShape*)> loop) { + switch (var->getKind()) { + case Shape::SK_Simple: { + simple(cast<SimpleShape>(var)); + break; + } + case Shape::SK_Multiple: { + multiple(cast<MultipleShape>(var)); + break; + } + case Shape::SK_Loop: { + loop(cast<LoopShape>(var)); + break; + } + } + } + + // Find the blocks that natural control flow can get us directly to, or + // through a multiple that we ignore + void FollowNaturalFlow(Shape *S, BlockSet &Out) { + ShapeSwitch(S, [&](SimpleShape* Simple) { + Out.insert(Simple->Inner); + }, [&](MultipleShape* Multiple) { + for (const auto &iter : Multiple->InnerMap) { + FollowNaturalFlow(iter.second, Out); + } + FollowNaturalFlow(Multiple->Next, Out); + }, [&](LoopShape* Loop) { + FollowNaturalFlow(Loop->Inner, Out); + }); + } + + void FindNaturals(Shape *Root, Shape *Otherwise = nullptr) { + if (Root->Next) { + Root->Natural = Root->Next; + FindNaturals(Root->Next, Otherwise); + } else { + Root->Natural = Otherwise; + } + + ShapeSwitch(Root, [](SimpleShape* Simple) { + }, [&](MultipleShape* Multiple) { + for (const auto &iter : Multiple->InnerMap) { + FindNaturals(iter.second, Root->Natural); + } + }, [&](LoopShape* Loop){ + FindNaturals(Loop->Inner, Loop->Inner); + }); + } + + // Remove unneeded breaks and continues. + // A flow operation is trivially unneeded if the shape we naturally get to + // by normal code execution is the same as the flow forces us to. + void RemoveUnneededFlows(Shape *Root, Shape *Natural = nullptr, + LoopShape *LastLoop = nullptr, + unsigned Depth = 0) { + BlockSet NaturalBlocks; + FollowNaturalFlow(Natural, NaturalBlocks); + Shape *Next = Root; + while (Next) { + Root = Next; + Next = nullptr; + ShapeSwitch( + Root, + [&](SimpleShape* Simple) { + if (Simple->Inner->BranchVar) + LastLoop = + nullptr; // a switch clears out the loop (TODO: only for + // breaks, not continue) + + if (Simple->Next) { + if (!Simple->Inner->BranchVar && + Simple->Inner->ProcessedBranchesOut.size() == 2 && + Depth < RelooperNestingLimit) { + // If there is a next block, we already know at Simple + // creation time to make direct branches, and we can do + // nothing more in general. But, we try to optimize the + // case of a break and a direct: This would normally be + // if (break?) { break; } .. + // but if we make sure to nest the else, we can save the + // break, + // if (!break?) { .. } + // This is also better because the more canonical nested + // form is easier to further optimize later. The + // downside is more nesting, which adds to size in builds with + // whitespace. + // Note that we avoid switches, as it complicates control flow + // and is not relevant for the common case we optimize here. + bool Found = false; + bool Abort = false; + for (const auto &iter : Simple->Inner->ProcessedBranchesOut) { + Block *Target = iter.first; + Branch *Details = iter.second.get(); + if (Details->Type == Branch::Break) { + Found = true; + if (!contains(NaturalBlocks, Target)) + Abort = true; + } else if (Details->Type != Branch::Direct) + Abort = true; + } + if (Found && !Abort) { + for (const auto &iter : Simple->Inner->ProcessedBranchesOut) { + Branch *Details = iter.second.get(); + if (Details->Type == Branch::Break) { + Details->Type = Branch::Direct; + if (MultipleShape *Multiple = + dyn_cast<MultipleShape>(Details->Ancestor)) + Multiple->Breaks--; + } else { + assert(Details->Type == Branch::Direct); + Details->Type = Branch::Nested; + } + } + } + Depth++; // this optimization increases depth, for us and all + // our next chain (i.e., until this call returns) + } + Next = Simple->Next; + } else { + // If there is no next then Natural is where we will + // go to by doing nothing, so we can potentially optimize some + // branches to direct. + for (const auto &iter : Simple->Inner->ProcessedBranchesOut) { + Block *Target = iter.first; + Branch *Details = iter.second.get(); + if (Details->Type != Branch::Direct && + contains(NaturalBlocks, + Target)) { // note: cannot handle split blocks + Details->Type = Branch::Direct; + if (MultipleShape *Multiple = + dyn_cast<MultipleShape>(Details->Ancestor)) + Multiple->Breaks--; + } else if (Details->Type == Branch::Break && LastLoop && + LastLoop->Natural == Details->Ancestor->Natural) { + // it is important to simplify breaks, as simpler breaks + // enable other optimizations + Details->Labeled = false; + if (MultipleShape *Multiple = + dyn_cast<MultipleShape>(Details->Ancestor)) + Multiple->Breaks--; + } + } + } + }, [&](MultipleShape* Multiple) + { + for (const auto &iter : Multiple->InnerMap) { + RemoveUnneededFlows(iter.second, Multiple->Next, + Multiple->Breaks ? nullptr : LastLoop, + Depth + 1); + } + Next = Multiple->Next; + }, [&](LoopShape* Loop) + { + RemoveUnneededFlows(Loop->Inner, Loop->Inner, Loop, Depth + 1); + Next = Loop->Next; + }); + } + } + + // After we know which loops exist, we can calculate which need to be + // labeled + void FindLabeledLoops(Shape *Root) { + Shape *Next = Root; + while (Next) { + Root = Next; + Next = nullptr; + + ShapeSwitch( + Root, + [&](SimpleShape *Simple) { + MultipleShape *Fused = dyn_cast<MultipleShape>(Root->Next); + // If we are fusing a Multiple with a loop into this Simple, then + // visit it now + if (Fused && Fused->Breaks) + LoopStack.push(Fused); + if (Simple->Inner->BranchVar) + LoopStack.push(nullptr); // a switch means breaks are now useless, + // push a dummy + if (Fused) { + if (Fused->UseSwitch) + LoopStack.push(nullptr); // a switch means breaks are now + // useless, push a dummy + for (const auto &iter : Fused->InnerMap) { + FindLabeledLoops(iter.second); + } + } + for (const auto &iter : Simple->Inner->ProcessedBranchesOut) { + Branch *Details = iter.second.get(); + if (Details->Type == Branch::Break || + Details->Type == Branch::Continue) { + assert(!LoopStack.empty()); + if (Details->Ancestor != LoopStack.top() && Details->Labeled) { + if (MultipleShape *Multiple = + dyn_cast<MultipleShape>(Details->Ancestor)) { + Multiple->Labeled = true; + } else { + LoopShape *Loop = cast<LoopShape>(Details->Ancestor); + Loop->Labeled = true; + } + } else { + Details->Labeled = false; + } + } + if (Fused && Fused->UseSwitch) + LoopStack.pop(); + if (Simple->Inner->BranchVar) + LoopStack.pop(); + if (Fused && Fused->Breaks) + LoopStack.pop(); + if (Fused) + Next = Fused->Next; + else + Next = Root->Next; + } + } + , [&](MultipleShape* Multiple) { + if (Multiple->Breaks) + LoopStack.push(Multiple); + for (const auto &iter : Multiple->InnerMap) + FindLabeledLoops(iter.second); + if (Multiple->Breaks) + LoopStack.pop(); + Next = Root->Next; + } + , [&](LoopShape* Loop) { + LoopStack.push(Loop); + FindLabeledLoops(Loop->Inner); + LoopStack.pop(); + Next = Root->Next; + }); + } + } + + void Process(Shape * Root) { + FindNaturals(Root); + RemoveUnneededFlows(Root); + FindLabeledLoops(Root); + } + }; + + PostOptimizer(this).Process(Root); +} diff --git a/contrib/llvm/lib/Target/WebAssembly/Relooper.h b/contrib/llvm/lib/Target/WebAssembly/Relooper.h new file mode 100644 index 000000000000..7c564de82f34 --- /dev/null +++ b/contrib/llvm/lib/Target/WebAssembly/Relooper.h @@ -0,0 +1,186 @@ +//===-- Relooper.h - Top-level interface for WebAssembly ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===-------------------------------------------------------------------===// +/// +/// \file +/// \brief This defines an optimized C++ implemention of the Relooper +/// algorithm, originally developed as part of Emscripten, which +/// generates a structured AST from arbitrary control flow. +/// +//===-------------------------------------------------------------------===// + +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/Support/Casting.h" + +#include <cassert> +#include <cstdarg> +#include <cstdio> +#include <deque> +#include <list> +#include <map> +#include <memory> +#include <set> + +namespace llvm { + +namespace Relooper { + +struct Block; +struct Shape; + +/// +/// Info about a branching from one block to another +/// +struct Branch { + enum FlowType { + Direct = 0, // We will directly reach the right location through other + // means, no need for continue or break + Break = 1, + Continue = 2, + Nested = 3 // This code is directly reached, but we must be careful to + // ensure it is nested in an if - it is not reached + // unconditionally, other code paths exist alongside it that we need to make + // sure do not intertwine + }; + Shape + *Ancestor; // If not nullptr, this shape is the relevant one for purposes + // of getting to the target block. We break or continue on it + Branch::FlowType + Type; // If Ancestor is not nullptr, this says whether to break or + // continue + bool Labeled; // If a break or continue, whether we need to use a label + const char *Condition; // The condition for which we branch. For example, + // "my_var == 1". Conditions are checked one by one. + // One of the conditions should have nullptr as the + // condition, in which case it is the default + // FIXME: move from char* to LLVM data structures + const char *Code; // If provided, code that is run right before the branch is + // taken. This is useful for phis + // FIXME: move from char* to LLVM data structures + + Branch(const char *ConditionInit, const char *CodeInit = nullptr); + ~Branch(); +}; + +typedef SetVector<Block *> BlockSet; +typedef MapVector<Block *, Branch *> BlockBranchMap; +typedef MapVector<Block *, std::unique_ptr<Branch>> OwningBlockBranchMap; + +/// +/// Represents a basic block of code - some instructions that end with a +/// control flow modifier (a branch, return or throw). +/// +struct Block { + // Branches become processed after we finish the shape relevant to them. For + // example, when we recreate a loop, branches to the loop start become + // continues and are now processed. When we calculate what shape to generate + // from a set of blocks, we ignore processed branches. Blocks own the Branch + // objects they use, and destroy them when done. + OwningBlockBranchMap BranchesOut; + BlockSet BranchesIn; + OwningBlockBranchMap ProcessedBranchesOut; + BlockSet ProcessedBranchesIn; + Shape *Parent; // The shape we are directly inside + int Id; // A unique identifier, defined when added to relooper. Note that this + // uniquely identifies a *logical* block - if we split it, the two + // instances have the same content *and* the same Id + const char *Code; // The string representation of the code in this block. + // Owning pointer (we copy the input) + // FIXME: move from char* to LLVM data structures + const char *BranchVar; // A variable whose value determines where we go; if + // this is not nullptr, emit a switch on that variable + // FIXME: move from char* to LLVM data structures + bool IsCheckedMultipleEntry; // If true, we are a multiple entry, so reaching + // us requires setting the label variable + + Block(const char *CodeInit, const char *BranchVarInit); + ~Block(); + + void AddBranchTo(Block *Target, const char *Condition, + const char *Code = nullptr); +}; + +/// +/// Represents a structured control flow shape +/// +struct Shape { + int Id; // A unique identifier. Used to identify loops, labels are Lx where x + // is the Id. Defined when added to relooper + Shape *Next; // The shape that will appear in the code right after this one + Shape *Natural; // The shape that control flow gets to naturally (if there is + // Next, then this is Next) + + /// Discriminator for LLVM-style RTTI (dyn_cast<> et al.) + enum ShapeKind { SK_Simple, SK_Multiple, SK_Loop }; + +private: + ShapeKind Kind; + +public: + ShapeKind getKind() const { return Kind; } + + Shape(ShapeKind KindInit) : Id(-1), Next(nullptr), Kind(KindInit) {} +}; + +/// +/// Simple: No control flow at all, just instructions. +/// +struct SimpleShape : public Shape { + Block *Inner; + + SimpleShape() : Shape(SK_Simple), Inner(nullptr) {} + + static bool classof(const Shape *S) { return S->getKind() == SK_Simple; } +}; + +/// +/// A shape that may be implemented with a labeled loop. +/// +struct LabeledShape : public Shape { + bool Labeled; // If we have a loop, whether it needs to be labeled + + LabeledShape(ShapeKind KindInit) : Shape(KindInit), Labeled(false) {} +}; + +// Blocks with the same id were split and are identical, so we just care about +// ids in Multiple entries +typedef std::map<int, Shape *> IdShapeMap; + +/// +/// Multiple: A shape with more than one entry. If the next block to +/// be entered is among them, we run it and continue to +/// the next shape, otherwise we continue immediately to the +/// next shape. +/// +struct MultipleShape : public LabeledShape { + IdShapeMap InnerMap; // entry block ID -> shape + int Breaks; // If we have branches on us, we need a loop (or a switch). This + // is a counter of requirements, + // if we optimize it to 0, the loop is unneeded + bool UseSwitch; // Whether to switch on label as opposed to an if-else chain + + MultipleShape() : LabeledShape(SK_Multiple), Breaks(0), UseSwitch(false) {} + + static bool classof(const Shape *S) { return S->getKind() == SK_Multiple; } +}; + +/// +/// Loop: An infinite loop. +/// +struct LoopShape : public LabeledShape { + Shape *Inner; + + LoopShape() : LabeledShape(SK_Loop), Inner(nullptr) {} + + static bool classof(const Shape *S) { return S->getKind() == SK_Loop; } +}; + +} // namespace Relooper + +} // namespace llvm diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssembly.h b/contrib/llvm/lib/Target/WebAssembly/WebAssembly.h index 3ff19d46f437..e972da5af74f 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssembly.h +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssembly.h @@ -23,8 +23,22 @@ namespace llvm { class WebAssemblyTargetMachine; class FunctionPass; +FunctionPass *createWebAssemblyOptimizeReturned(); + FunctionPass *createWebAssemblyISelDag(WebAssemblyTargetMachine &TM, CodeGenOpt::Level OptLevel); +FunctionPass *createWebAssemblyArgumentMove(); + +FunctionPass *createWebAssemblyStoreResults(); +FunctionPass *createWebAssemblyRegStackify(); +FunctionPass *createWebAssemblyRegColoring(); +FunctionPass *createWebAssemblyPEI(); +FunctionPass *createWebAssemblyCFGStackify(); +FunctionPass *createWebAssemblyLowerBrUnless(); +FunctionPass *createWebAssemblyRegNumbering(); +FunctionPass *createWebAssemblyPeephole(); + +FunctionPass *createWebAssemblyRelooper(); } // end namespace llvm diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssembly.td b/contrib/llvm/lib/Target/WebAssembly/WebAssembly.td index a123bf6f66b6..551ad9345154 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssembly.td +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssembly.td @@ -6,10 +6,11 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// This is a target description file for the WebAssembly architecture, which is -// also known as "wasm". -// +/// +/// \file +/// \brief This is a target description file for the WebAssembly architecture, +/// which is also known as "wasm". +/// //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// @@ -50,6 +51,9 @@ def WebAssemblyInstrInfo : InstrInfo; // Minimal Viable Product. def : ProcessorModel<"mvp", NoSchedModel, []>; +// Generic processor: latest stable version. +def : ProcessorModel<"generic", NoSchedModel, []>; + // Latest and greatest experimental version of WebAssembly. Bugs included! def : ProcessorModel<"bleeding-edge", NoSchedModel, [FeatureSIMD128]>; diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp new file mode 100644 index 000000000000..3893c408cf63 --- /dev/null +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp @@ -0,0 +1,110 @@ +//===-- WebAssemblyArgumentMove.cpp - Argument instruction moving ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file moves ARGUMENT instructions after ScheduleDAG scheduling. +/// +/// Arguments are really live-in registers, however, since we use virtual +/// registers and LLVM doesn't support live-in virtual registers, we're +/// currently making do with ARGUMENT instructions which are placed at the top +/// of the entry block. The trick is to get them to *stay* at the top of the +/// entry block. +/// +/// The ARGUMENTS physical register keeps these instructions pinned in place +/// during liveness-aware CodeGen passes, however one thing which does not +/// respect this is the ScheduleDAG scheduler. This pass is therefore run +/// immediately after that. +/// +/// This is all hopefully a temporary solution until we find a better solution +/// for describing the live-in nature of arguments. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-argument-move" + +namespace { +class WebAssemblyArgumentMove final : public MachineFunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyArgumentMove() : MachineFunctionPass(ID) {} + + const char *getPassName() const override { + return "WebAssembly Argument Move"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addPreserved<MachineBlockFrequencyInfo>(); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} // end anonymous namespace + +char WebAssemblyArgumentMove::ID = 0; +FunctionPass *llvm::createWebAssemblyArgumentMove() { + return new WebAssemblyArgumentMove(); +} + +/// Test whether the given instruction is an ARGUMENT. +static bool IsArgument(const MachineInstr *MI) { + switch (MI->getOpcode()) { + case WebAssembly::ARGUMENT_I32: + case WebAssembly::ARGUMENT_I64: + case WebAssembly::ARGUMENT_F32: + case WebAssembly::ARGUMENT_F64: + return true; + default: + return false; + } +} + +bool WebAssemblyArgumentMove::runOnMachineFunction(MachineFunction &MF) { + DEBUG({ + dbgs() << "********** Argument Move **********\n" + << "********** Function: " << MF.getName() << '\n'; + }); + + bool Changed = false; + MachineBasicBlock &EntryMBB = MF.front(); + MachineBasicBlock::iterator InsertPt = EntryMBB.end(); + + // Look for the first NonArg instruction. + for (auto MII = EntryMBB.begin(), MIE = EntryMBB.end(); MII != MIE; ++MII) { + MachineInstr *MI = MII; + if (!IsArgument(MI)) { + InsertPt = MII; + break; + } + } + + // Now move any argument instructions later in the block + // to before our first NonArg instruction. + for (auto I = InsertPt, E = EntryMBB.end(); I != E; ++I) { + MachineInstr *MI = I; + if (IsArgument(MI)) { + EntryMBB.insert(InsertPt, MI->removeFromParent()); + Changed = true; + } + } + + return Changed; +} diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp new file mode 100644 index 000000000000..0d2b4d9debb9 --- /dev/null +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -0,0 +1,285 @@ +//===-- WebAssemblyAsmPrinter.cpp - WebAssembly LLVM assembly writer ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file contains a printer that converts from our internal +/// representation of machine-dependent LLVM code to the WebAssembly assembly +/// language. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "InstPrinter/WebAssemblyInstPrinter.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblyMCInstLower.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblyRegisterInfo.h" +#include "WebAssemblySubtarget.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "asm-printer" + +namespace { + +class WebAssemblyAsmPrinter final : public AsmPrinter { + const MachineRegisterInfo *MRI; + const WebAssemblyFunctionInfo *MFI; + +public: + WebAssemblyAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer) + : AsmPrinter(TM, std::move(Streamer)), MRI(nullptr), MFI(nullptr) {} + +private: + const char *getPassName() const override { + return "WebAssembly Assembly Printer"; + } + + //===------------------------------------------------------------------===// + // MachineFunctionPass Implementation. + //===------------------------------------------------------------------===// + + bool runOnMachineFunction(MachineFunction &MF) override { + MRI = &MF.getRegInfo(); + MFI = MF.getInfo<WebAssemblyFunctionInfo>(); + return AsmPrinter::runOnMachineFunction(MF); + } + + //===------------------------------------------------------------------===// + // AsmPrinter Implementation. + //===------------------------------------------------------------------===// + + void EmitJumpTableInfo() override; + void EmitConstantPool() override; + void EmitFunctionBodyStart() override; + void EmitInstruction(const MachineInstr *MI) override; + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &OS) override; + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &OS) override; + + MVT getRegType(unsigned RegNo) const; + const char *toString(MVT VT) const; + std::string regToString(const MachineOperand &MO); +}; + +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// Helpers. +//===----------------------------------------------------------------------===// + +MVT WebAssemblyAsmPrinter::getRegType(unsigned RegNo) const { + const TargetRegisterClass *TRC = + TargetRegisterInfo::isVirtualRegister(RegNo) ? + MRI->getRegClass(RegNo) : + MRI->getTargetRegisterInfo()->getMinimalPhysRegClass(RegNo); + for (MVT T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) + if (TRC->hasType(T)) + return T; + DEBUG(errs() << "Unknown type for register number: " << RegNo); + llvm_unreachable("Unknown register type"); + return MVT::Other; +} + +std::string WebAssemblyAsmPrinter::regToString(const MachineOperand &MO) { + unsigned RegNo = MO.getReg(); + assert(TargetRegisterInfo::isVirtualRegister(RegNo) && + "Unlowered physical register encountered during assembly printing"); + assert(!MFI->isVRegStackified(RegNo)); + unsigned WAReg = MFI->getWAReg(RegNo); + assert(WAReg != WebAssemblyFunctionInfo::UnusedReg); + return '$' + utostr(WAReg); +} + +const char *WebAssemblyAsmPrinter::toString(MVT VT) const { + return WebAssembly::TypeToString(VT); +} + +//===----------------------------------------------------------------------===// +// WebAssemblyAsmPrinter Implementation. +//===----------------------------------------------------------------------===// + +void WebAssemblyAsmPrinter::EmitConstantPool() { + assert(MF->getConstantPool()->getConstants().empty() && + "WebAssembly disables constant pools"); +} + +void WebAssemblyAsmPrinter::EmitJumpTableInfo() { + // Nothing to do; jump tables are incorporated into the instruction stream. +} + +static void ComputeLegalValueVTs(const Function &F, const TargetMachine &TM, + Type *Ty, SmallVectorImpl<MVT> &ValueVTs) { + const DataLayout &DL(F.getParent()->getDataLayout()); + const WebAssemblyTargetLowering &TLI = + *TM.getSubtarget<WebAssemblySubtarget>(F).getTargetLowering(); + SmallVector<EVT, 4> VTs; + ComputeValueVTs(TLI, DL, Ty, VTs); + + for (EVT VT : VTs) { + unsigned NumRegs = TLI.getNumRegisters(F.getContext(), VT); + MVT RegisterVT = TLI.getRegisterType(F.getContext(), VT); + for (unsigned i = 0; i != NumRegs; ++i) + ValueVTs.push_back(RegisterVT); + } +} + +void WebAssemblyAsmPrinter::EmitFunctionBodyStart() { + if (!MFI->getParams().empty()) { + MCInst Param; + Param.setOpcode(WebAssembly::PARAM); + for (MVT VT : MFI->getParams()) + Param.addOperand(MCOperand::createImm(VT.SimpleTy)); + EmitToStreamer(*OutStreamer, Param); + } + + SmallVector<MVT, 4> ResultVTs; + const Function &F(*MF->getFunction()); + ComputeLegalValueVTs(F, TM, F.getReturnType(), ResultVTs); + // If the return type needs to be legalized it will get converted into + // passing a pointer. + if (ResultVTs.size() == 1) { + MCInst Result; + Result.setOpcode(WebAssembly::RESULT); + Result.addOperand(MCOperand::createImm(ResultVTs.front().SimpleTy)); + EmitToStreamer(*OutStreamer, Result); + } + + bool AnyWARegs = false; + MCInst Local; + Local.setOpcode(WebAssembly::LOCAL); + for (unsigned Idx = 0, IdxE = MRI->getNumVirtRegs(); Idx != IdxE; ++Idx) { + unsigned VReg = TargetRegisterInfo::index2VirtReg(Idx); + unsigned WAReg = MFI->getWAReg(VReg); + // Don't declare unused registers. + if (WAReg == WebAssemblyFunctionInfo::UnusedReg) + continue; + // Don't redeclare parameters. + if (WAReg < MFI->getParams().size()) + continue; + // Don't declare stackified registers. + if (int(WAReg) < 0) + continue; + Local.addOperand(MCOperand::createImm(getRegType(VReg).SimpleTy)); + AnyWARegs = true; + } + auto &PhysRegs = MFI->getPhysRegs(); + for (unsigned PReg = 0; PReg < PhysRegs.size(); ++PReg) { + if (PhysRegs[PReg] == -1U) + continue; + Local.addOperand(MCOperand::createImm(getRegType(PReg).SimpleTy)); + AnyWARegs = true; + } + if (AnyWARegs) + EmitToStreamer(*OutStreamer, Local); + + AsmPrinter::EmitFunctionBodyStart(); +} + +void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) { + DEBUG(dbgs() << "EmitInstruction: " << *MI << '\n'); + + switch (MI->getOpcode()) { + case WebAssembly::ARGUMENT_I32: + case WebAssembly::ARGUMENT_I64: + case WebAssembly::ARGUMENT_F32: + case WebAssembly::ARGUMENT_F64: + // These represent values which are live into the function entry, so there's + // no instruction to emit. + break; + case WebAssembly::LOOP_END: + // This is a no-op which just exists to tell AsmPrinter.cpp that there's a + // fallthrough which nevertheless requires a label for the destination here. + break; + default: { + WebAssemblyMCInstLower MCInstLowering(OutContext, *this); + MCInst TmpInst; + MCInstLowering.Lower(MI, TmpInst); + EmitToStreamer(*OutStreamer, TmpInst); + break; + } + } +} + +bool WebAssemblyAsmPrinter::PrintAsmOperand(const MachineInstr *MI, + unsigned OpNo, unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &OS) { + if (AsmVariant != 0) + report_fatal_error("There are no defined alternate asm variants"); + + // First try the generic code, which knows about modifiers like 'c' and 'n'. + if (!AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, OS)) + return false; + + if (!ExtraCode) { + const MachineOperand &MO = MI->getOperand(OpNo); + switch (MO.getType()) { + case MachineOperand::MO_Immediate: + OS << MO.getImm(); + return false; + case MachineOperand::MO_Register: + OS << regToString(MO); + return false; + case MachineOperand::MO_GlobalAddress: + getSymbol(MO.getGlobal())->print(OS, MAI); + printOffset(MO.getOffset(), OS); + return false; + case MachineOperand::MO_ExternalSymbol: + GetExternalSymbolSymbol(MO.getSymbolName())->print(OS, MAI); + printOffset(MO.getOffset(), OS); + return false; + case MachineOperand::MO_MachineBasicBlock: + MO.getMBB()->getSymbol()->print(OS, MAI); + return false; + default: + break; + } + } + + return true; +} + +bool WebAssemblyAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &OS) { + if (AsmVariant != 0) + report_fatal_error("There are no defined alternate asm variants"); + + if (!ExtraCode) { + // TODO: For now, we just hard-code 0 as the constant offset; teach + // SelectInlineAsmMemoryOperand how to do address mode matching. + OS << "0(" + regToString(MI->getOperand(OpNo)) + ')'; + return false; + } + + return AsmPrinter::PrintAsmMemoryOperand(MI, OpNo, AsmVariant, ExtraCode, OS); +} + +// Force static initialization. +extern "C" void LLVMInitializeWebAssemblyAsmPrinter() { + RegisterAsmPrinter<WebAssemblyAsmPrinter> X(TheWebAssemblyTarget32); + RegisterAsmPrinter<WebAssemblyAsmPrinter> Y(TheWebAssemblyTarget64); +} diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp new file mode 100644 index 000000000000..e9671ee07e69 --- /dev/null +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp @@ -0,0 +1,468 @@ +//===-- WebAssemblyCFGStackify.cpp - CFG Stackification -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements a CFG stacking pass. +/// +/// This pass reorders the blocks in a function to put them into a reverse +/// post-order [0], with special care to keep the order as similar as possible +/// to the original order, and to keep loops contiguous even in the case of +/// split backedges. +/// +/// Then, it inserts BLOCK and LOOP markers to mark the start of scopes, since +/// scope boundaries serve as the labels for WebAssembly's control transfers. +/// +/// This is sufficient to convert arbitrary CFGs into a form that works on +/// WebAssembly, provided that all loops are single-entry. +/// +/// [0] https://en.wikipedia.org/wiki/Depth-first_search#Vertex_orderings +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblySubtarget.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-cfg-stackify" + +namespace { +class WebAssemblyCFGStackify final : public MachineFunctionPass { + const char *getPassName() const override { + return "WebAssembly CFG Stackify"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyCFGStackify() : MachineFunctionPass(ID) {} +}; +} // end anonymous namespace + +char WebAssemblyCFGStackify::ID = 0; +FunctionPass *llvm::createWebAssemblyCFGStackify() { + return new WebAssemblyCFGStackify(); +} + +static void EliminateMultipleEntryLoops(MachineFunction &MF, + const MachineLoopInfo &MLI) { + SmallPtrSet<MachineBasicBlock *, 8> InSet; + for (scc_iterator<MachineFunction *> I = scc_begin(&MF), E = scc_end(&MF); + I != E; ++I) { + const std::vector<MachineBasicBlock *> &CurrentSCC = *I; + + // Skip trivial SCCs. + if (CurrentSCC.size() == 1) + continue; + + InSet.insert(CurrentSCC.begin(), CurrentSCC.end()); + MachineBasicBlock *Header = nullptr; + for (MachineBasicBlock *MBB : CurrentSCC) { + for (MachineBasicBlock *Pred : MBB->predecessors()) { + if (InSet.count(Pred)) + continue; + if (!Header) { + Header = MBB; + break; + } + // TODO: Implement multiple-entry loops. + report_fatal_error("multiple-entry loops are not supported yet"); + } + } + assert(MLI.isLoopHeader(Header)); + + InSet.clear(); + } +} + +namespace { +/// Post-order traversal stack entry. +struct POStackEntry { + MachineBasicBlock *MBB; + SmallVector<MachineBasicBlock *, 0> Succs; + + POStackEntry(MachineBasicBlock *MBB, MachineFunction &MF, + const MachineLoopInfo &MLI); +}; +} // end anonymous namespace + +static bool LoopContains(const MachineLoop *Loop, + const MachineBasicBlock *MBB) { + return Loop ? Loop->contains(MBB) : true; +} + +POStackEntry::POStackEntry(MachineBasicBlock *MBB, MachineFunction &MF, + const MachineLoopInfo &MLI) + : MBB(MBB), Succs(MBB->successors()) { + // RPO is not a unique form, since at every basic block with multiple + // successors, the DFS has to pick which order to visit the successors in. + // Sort them strategically (see below). + MachineLoop *Loop = MLI.getLoopFor(MBB); + MachineFunction::iterator Next = next(MachineFunction::iterator(MBB)); + MachineBasicBlock *LayoutSucc = Next == MF.end() ? nullptr : &*Next; + std::stable_sort( + Succs.begin(), Succs.end(), + [=, &MLI](const MachineBasicBlock *A, const MachineBasicBlock *B) { + if (A == B) + return false; + + // Keep loops contiguous by preferring the block that's in the same + // loop. + bool LoopContainsA = LoopContains(Loop, A); + bool LoopContainsB = LoopContains(Loop, B); + if (LoopContainsA && !LoopContainsB) + return true; + if (!LoopContainsA && LoopContainsB) + return false; + + // Minimize perturbation by preferring the block which is the immediate + // layout successor. + if (A == LayoutSucc) + return true; + if (B == LayoutSucc) + return false; + + // TODO: More sophisticated orderings may be profitable here. + + return false; + }); +} + +/// Return the "bottom" block of a loop. This differs from +/// MachineLoop::getBottomBlock in that it works even if the loop is +/// discontiguous. +static MachineBasicBlock *LoopBottom(const MachineLoop *Loop) { + MachineBasicBlock *Bottom = Loop->getHeader(); + for (MachineBasicBlock *MBB : Loop->blocks()) + if (MBB->getNumber() > Bottom->getNumber()) + Bottom = MBB; + return Bottom; +} + +/// Sort the blocks in RPO, taking special care to make sure that loops are +/// contiguous even in the case of split backedges. +/// +/// TODO: Determine whether RPO is actually worthwhile, or whether we should +/// move to just a stable-topological-sort-based approach that would preserve +/// more of the original order. +static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI) { + // Note that we do our own RPO rather than using + // "llvm/ADT/PostOrderIterator.h" because we want control over the order that + // successors are visited in (see above). Also, we can sort the blocks in the + // MachineFunction as we go. + SmallPtrSet<MachineBasicBlock *, 16> Visited; + SmallVector<POStackEntry, 16> Stack; + + MachineBasicBlock *EntryBlock = &*MF.begin(); + Visited.insert(EntryBlock); + Stack.push_back(POStackEntry(EntryBlock, MF, MLI)); + + for (;;) { + POStackEntry &Entry = Stack.back(); + SmallVectorImpl<MachineBasicBlock *> &Succs = Entry.Succs; + if (!Succs.empty()) { + MachineBasicBlock *Succ = Succs.pop_back_val(); + if (Visited.insert(Succ).second) + Stack.push_back(POStackEntry(Succ, MF, MLI)); + continue; + } + + // Put the block in its position in the MachineFunction. + MachineBasicBlock &MBB = *Entry.MBB; + MBB.moveBefore(&*MF.begin()); + + // Branch instructions may utilize a fallthrough, so update them if a + // fallthrough has been added or removed. + if (!MBB.empty() && MBB.back().isTerminator() && !MBB.back().isBranch() && + !MBB.back().isBarrier()) + report_fatal_error( + "Non-branch terminator with fallthrough cannot yet be rewritten"); + if (MBB.empty() || !MBB.back().isTerminator() || MBB.back().isBranch()) + MBB.updateTerminator(); + + Stack.pop_back(); + if (Stack.empty()) + break; + } + + // Now that we've sorted the blocks in RPO, renumber them. + MF.RenumberBlocks(); + +#ifndef NDEBUG + SmallSetVector<MachineLoop *, 8> OnStack; + + // Insert a sentinel representing the degenerate loop that starts at the + // function entry block and includes the entire function as a "loop" that + // executes once. + OnStack.insert(nullptr); + + for (auto &MBB : MF) { + assert(MBB.getNumber() >= 0 && "Renumbered blocks should be non-negative."); + + MachineLoop *Loop = MLI.getLoopFor(&MBB); + if (Loop && &MBB == Loop->getHeader()) { + // Loop header. The loop predecessor should be sorted above, and the other + // predecessors should be backedges below. + for (auto Pred : MBB.predecessors()) + assert( + (Pred->getNumber() < MBB.getNumber() || Loop->contains(Pred)) && + "Loop header predecessors must be loop predecessors or backedges"); + assert(OnStack.insert(Loop) && "Loops should be declared at most once."); + } else { + // Not a loop header. All predecessors should be sorted above. + for (auto Pred : MBB.predecessors()) + assert(Pred->getNumber() < MBB.getNumber() && + "Non-loop-header predecessors should be topologically sorted"); + assert(OnStack.count(MLI.getLoopFor(&MBB)) && + "Blocks must be nested in their loops"); + } + while (OnStack.size() > 1 && &MBB == LoopBottom(OnStack.back())) + OnStack.pop_back(); + } + assert(OnStack.pop_back_val() == nullptr && + "The function entry block shouldn't actually be a loop header"); + assert(OnStack.empty() && + "Control flow stack pushes and pops should be balanced."); +#endif +} + +/// Test whether Pred has any terminators explicitly branching to MBB, as +/// opposed to falling through. Note that it's possible (eg. in unoptimized +/// code) for a branch instruction to both branch to a block and fallthrough +/// to it, so we check the actual branch operands to see if there are any +/// explicit mentions. +static bool ExplicitlyBranchesTo(MachineBasicBlock *Pred, MachineBasicBlock *MBB) { + for (MachineInstr &MI : Pred->terminators()) + for (MachineOperand &MO : MI.explicit_operands()) + if (MO.isMBB() && MO.getMBB() == MBB) + return true; + return false; +} + +/// Insert a BLOCK marker for branches to MBB (if needed). +static void PlaceBlockMarker(MachineBasicBlock &MBB, MachineFunction &MF, + SmallVectorImpl<MachineBasicBlock *> &ScopeTops, + const WebAssemblyInstrInfo &TII, + const MachineLoopInfo &MLI, + MachineDominatorTree &MDT) { + // First compute the nearest common dominator of all forward non-fallthrough + // predecessors so that we minimize the time that the BLOCK is on the stack, + // which reduces overall stack height. + MachineBasicBlock *Header = nullptr; + bool IsBranchedTo = false; + int MBBNumber = MBB.getNumber(); + for (MachineBasicBlock *Pred : MBB.predecessors()) + if (Pred->getNumber() < MBBNumber) { + Header = Header ? MDT.findNearestCommonDominator(Header, Pred) : Pred; + if (ExplicitlyBranchesTo(Pred, &MBB)) + IsBranchedTo = true; + } + if (!Header) + return; + if (!IsBranchedTo) + return; + + assert(&MBB != &MF.front() && "Header blocks shouldn't have predecessors"); + MachineBasicBlock *LayoutPred = &*prev(MachineFunction::iterator(&MBB)); + + // If the nearest common dominator is inside a more deeply nested context, + // walk out to the nearest scope which isn't more deeply nested. + for (MachineFunction::iterator I(LayoutPred), E(Header); I != E; --I) { + if (MachineBasicBlock *ScopeTop = ScopeTops[I->getNumber()]) { + if (ScopeTop->getNumber() > Header->getNumber()) { + // Skip over an intervening scope. + I = next(MachineFunction::iterator(ScopeTop)); + } else { + // We found a scope level at an appropriate depth. + Header = ScopeTop; + break; + } + } + } + + // If there's a loop which ends just before MBB which contains Header, we can + // reuse its label instead of inserting a new BLOCK. + for (MachineLoop *Loop = MLI.getLoopFor(LayoutPred); + Loop && Loop->contains(LayoutPred); Loop = Loop->getParentLoop()) + if (Loop && LoopBottom(Loop) == LayoutPred && Loop->contains(Header)) + return; + + // Decide where in Header to put the BLOCK. + MachineBasicBlock::iterator InsertPos; + MachineLoop *HeaderLoop = MLI.getLoopFor(Header); + if (HeaderLoop && MBB.getNumber() > LoopBottom(HeaderLoop)->getNumber()) { + // Header is the header of a loop that does not lexically contain MBB, so + // the BLOCK needs to be above the LOOP. + InsertPos = Header->begin(); + } else { + // Otherwise, insert the BLOCK as late in Header as we can, but before the + // beginning of the local expression tree and any nested BLOCKs. + InsertPos = Header->getFirstTerminator(); + while (InsertPos != Header->begin() && + prev(InsertPos)->definesRegister(WebAssembly::EXPR_STACK) && + prev(InsertPos)->getOpcode() != WebAssembly::LOOP) + --InsertPos; + } + + // Add the BLOCK. + BuildMI(*Header, InsertPos, DebugLoc(), TII.get(WebAssembly::BLOCK)) + .addMBB(&MBB); + + // Track the farthest-spanning scope that ends at this point. + int Number = MBB.getNumber(); + if (!ScopeTops[Number] || + ScopeTops[Number]->getNumber() > Header->getNumber()) + ScopeTops[Number] = Header; +} + +/// Insert a LOOP marker for a loop starting at MBB (if it's a loop header). +static void PlaceLoopMarker(MachineBasicBlock &MBB, MachineFunction &MF, + SmallVectorImpl<MachineBasicBlock *> &ScopeTops, + const WebAssemblyInstrInfo &TII, + const MachineLoopInfo &MLI) { + MachineLoop *Loop = MLI.getLoopFor(&MBB); + if (!Loop || Loop->getHeader() != &MBB) + return; + + // The operand of a LOOP is the first block after the loop. If the loop is the + // bottom of the function, insert a dummy block at the end. + MachineBasicBlock *Bottom = LoopBottom(Loop); + auto Iter = next(MachineFunction::iterator(Bottom)); + if (Iter == MF.end()) { + MachineBasicBlock *Label = MF.CreateMachineBasicBlock(); + // Give it a fake predecessor so that AsmPrinter prints its label. + Label->addSuccessor(Label); + MF.push_back(Label); + Iter = next(MachineFunction::iterator(Bottom)); + } + MachineBasicBlock *AfterLoop = &*Iter; + BuildMI(MBB, MBB.begin(), DebugLoc(), TII.get(WebAssembly::LOOP)) + .addMBB(AfterLoop); + + // Emit a special no-op telling the asm printer that we need a label to close + // the loop scope, even though the destination is only reachable by + // fallthrough. + if (!Bottom->back().isBarrier()) + BuildMI(*Bottom, Bottom->end(), DebugLoc(), TII.get(WebAssembly::LOOP_END)); + + assert((!ScopeTops[AfterLoop->getNumber()] || + ScopeTops[AfterLoop->getNumber()]->getNumber() < MBB.getNumber()) && + "With RPO we should visit the outer-most loop for a block first."); + if (!ScopeTops[AfterLoop->getNumber()]) + ScopeTops[AfterLoop->getNumber()] = &MBB; +} + +/// Insert LOOP and BLOCK markers at appropriate places. +static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI, + const WebAssemblyInstrInfo &TII, + MachineDominatorTree &MDT) { + // For each block whose label represents the end of a scope, record the block + // which holds the beginning of the scope. This will allow us to quickly skip + // over scoped regions when walking blocks. We allocate one more than the + // number of blocks in the function to accommodate for the possible fake block + // we may insert at the end. + SmallVector<MachineBasicBlock *, 8> ScopeTops(MF.getNumBlockIDs() + 1); + + for (auto &MBB : MF) { + // Place the LOOP for MBB if MBB is the header of a loop. + PlaceLoopMarker(MBB, MF, ScopeTops, TII, MLI); + + // Place the BLOCK for MBB if MBB is branched to from above. + PlaceBlockMarker(MBB, MF, ScopeTops, TII, MLI, MDT); + } +} + +#ifndef NDEBUG +static bool +IsOnStack(const SmallVectorImpl<std::pair<MachineBasicBlock *, bool>> &Stack, + const MachineBasicBlock *MBB) { + for (const auto &Pair : Stack) + if (Pair.first == MBB) + return true; + return false; +} +#endif + +bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "********** CFG Stackifying **********\n" + "********** Function: " + << MF.getName() << '\n'); + + const auto &MLI = getAnalysis<MachineLoopInfo>(); + auto &MDT = getAnalysis<MachineDominatorTree>(); + const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); + + // RPO sorting needs all loops to be single-entry. + EliminateMultipleEntryLoops(MF, MLI); + + // Sort the blocks in RPO, with contiguous loops. + SortBlocks(MF, MLI); + + // Place the BLOCK and LOOP markers to indicate the beginnings of scopes. + PlaceMarkers(MF, MLI, TII, MDT); + +#ifndef NDEBUG + // Verify that block and loop beginnings and endings are in LIFO order, and + // that all references to blocks are to blocks on the stack at the point of + // the reference. + SmallVector<std::pair<MachineBasicBlock *, bool>, 0> Stack; + for (auto &MBB : MF) { + while (!Stack.empty() && Stack.back().first == &MBB) + if (Stack.back().second) { + assert(Stack.size() >= 2); + Stack.pop_back(); + Stack.pop_back(); + } else { + assert(Stack.size() >= 1); + Stack.pop_back(); + } + for (auto &MI : MBB) + switch (MI.getOpcode()) { + case WebAssembly::LOOP: + Stack.push_back(std::make_pair(&MBB, false)); + Stack.push_back(std::make_pair(MI.getOperand(0).getMBB(), true)); + break; + case WebAssembly::BLOCK: + Stack.push_back(std::make_pair(MI.getOperand(0).getMBB(), false)); + break; + default: + // Verify that all referenced blocks are in scope. A reference to a + // block with a negative number is invalid, but can happen with inline + // asm, so we shouldn't assert on it, but instead let CodeGen properly + // fail on it. + for (const MachineOperand &MO : MI.explicit_operands()) + if (MO.isMBB() && MO.getMBB()->getNumber() >= 0) + assert(IsOnStack(Stack, MO.getMBB())); + break; + } + } + assert(Stack.empty()); +#endif + + return true; +} diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp new file mode 100644 index 000000000000..1b761b1a9d73 --- /dev/null +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp @@ -0,0 +1,81 @@ +//===-- WebAssemblyFastISel.cpp - WebAssembly FastISel implementation -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file defines the WebAssembly-specific support for the FastISel +/// class. Some of the target-specific code is generated by tablegen in the file +/// WebAssemblyGenFastISel.inc, which is #included here. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblySubtarget.h" +#include "WebAssemblyTargetMachine.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Operator.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-fastisel" + +namespace { + +class WebAssemblyFastISel final : public FastISel { + /// Keep a pointer to the WebAssemblySubtarget around so that we can make the + /// right decision when generating code for different targets. + const WebAssemblySubtarget *Subtarget; + LLVMContext *Context; + + // Call handling routines. +private: +public: + // Backend specific FastISel code. + WebAssemblyFastISel(FunctionLoweringInfo &FuncInfo, + const TargetLibraryInfo *LibInfo) + : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) { + Subtarget = &FuncInfo.MF->getSubtarget<WebAssemblySubtarget>(); + Context = &FuncInfo.Fn->getContext(); + } + + bool fastSelectInstruction(const Instruction *I) override; + +#include "WebAssemblyGenFastISel.inc" +}; + +} // end anonymous namespace + +bool WebAssemblyFastISel::fastSelectInstruction(const Instruction *I) { + switch (I->getOpcode()) { + default: + break; + // TODO: add fast-isel selection cases here... + } + + // Fall back to target-independent instruction selection. + return selectOperator(I, I->getOpcode()); +} + +FastISel *WebAssembly::createFastISel(FunctionLoweringInfo &FuncInfo, + const TargetLibraryInfo *LibInfo) { + return new WebAssemblyFastISel(FuncInfo, LibInfo); +} diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp index e4ca82e963c2..0eefd57f1f2c 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp @@ -35,11 +35,20 @@ using namespace llvm; #define DEBUG_TYPE "wasm-frame-info" // TODO: Implement a red zone? +// TODO: wasm64 +// TODO: Prolog/epilog should be stackified too. This pass runs after register +// stackification, so we'll have to do it manually. +// TODO: Emit TargetOpcode::CFI_INSTRUCTION instructions /// Return true if the specified function should have a dedicated frame pointer /// register. bool WebAssemblyFrameLowering::hasFP(const MachineFunction &MF) const { - llvm_unreachable("TODO: implement hasFP"); + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const auto *RegInfo = + MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo(); + return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() || + MFI->hasStackMap() || MFI->hasPatchPoint() || + RegInfo->needsStackRealignment(MF); } /// Under normal circumstances, when a frame pointer is not required, we reserve @@ -52,23 +61,115 @@ bool WebAssemblyFrameLowering::hasReservedCallFrame( return !MF.getFrameInfo()->hasVarSizedObjects(); } + +/// Adjust the stack pointer by a constant amount. +static void adjustStackPointer(unsigned StackSize, + bool AdjustUp, + MachineFunction& MF, + MachineBasicBlock& MBB, + const TargetInstrInfo* TII, + MachineBasicBlock::iterator InsertPt, + const DebugLoc& DL) { + auto &MRI = MF.getRegInfo(); + unsigned SPReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + auto *SPSymbol = MF.createExternalSymbolName("__stack_pointer"); + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), SPReg) + .addExternalSymbol(SPSymbol); + // This MachinePointerInfo should reference __stack_pointer as well but + // doesn't because MachinePointerInfo() takes a GV which we don't have for + // __stack_pointer. TODO: check if PseudoSourceValue::ExternalSymbolCallEntry + // is appropriate instead. (likewise for EmitEpologue below) + auto *LoadMMO = new MachineMemOperand(MachinePointerInfo(), + MachineMemOperand::MOLoad, 4, 4); + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::LOAD_I32), SPReg) + .addImm(0) + .addReg(SPReg) + .addMemOperand(LoadMMO); + // Add/Subtract the frame size + unsigned OffsetReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) + .addImm(StackSize); + BuildMI(MBB, InsertPt, DL, + TII->get(AdjustUp ? WebAssembly::ADD_I32 : WebAssembly::SUB_I32), + WebAssembly::SP32) + .addReg(SPReg) + .addReg(OffsetReg); + // The SP32 register now has the new stacktop. Also write it back to memory. + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) + .addExternalSymbol(SPSymbol); + auto *MMO = new MachineMemOperand(MachinePointerInfo(), + MachineMemOperand::MOStore, 4, 4); + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::STORE_I32), WebAssembly::SP32) + .addImm(0) + .addReg(OffsetReg) + .addReg(WebAssembly::SP32) + .addMemOperand(MMO); +} + void WebAssemblyFrameLowering::eliminateCallFramePseudoInstr( MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { - llvm_unreachable("TODO: implement eliminateCallFramePseudoInstr"); + const auto *TII = + static_cast<const WebAssemblyInstrInfo*>(MF.getSubtarget().getInstrInfo()); + DebugLoc DL = I->getDebugLoc(); + unsigned Opc = I->getOpcode(); + bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode(); + unsigned Amount = I->getOperand(0).getImm(); + if (Amount) + adjustStackPointer(Amount, IsDestroy, MF, MBB, + TII, I, DL); + MBB.erase(I); } void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { - llvm_unreachable("TODO: implement emitPrologue"); + // TODO: Do ".setMIFlag(MachineInstr::FrameSetup)" on emitted instructions + auto *MFI = MF.getFrameInfo(); + assert(MFI->getCalleeSavedInfo().empty() && + "WebAssembly should not have callee-saved registers"); + assert(!hasFP(MF) && "Functions needing frame pointers not yet supported"); + uint64_t StackSize = MFI->getStackSize(); + if (!StackSize && (!MFI->adjustsStack() || MFI->getMaxCallFrameSize() == 0)) + return; + + const auto *TII = MF.getSubtarget().getInstrInfo(); + + auto InsertPt = MBB.begin(); + DebugLoc DL; + + adjustStackPointer(StackSize, false, MF, MBB, TII, InsertPt, DL); } void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { - llvm_unreachable("TODO: implement emitEpilogue"); -} + uint64_t StackSize = MF.getFrameInfo()->getStackSize(); + if (!StackSize) + return; + const auto *TII = MF.getSubtarget().getInstrInfo(); + auto &MRI = MF.getRegInfo(); + unsigned OffsetReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + auto InsertPt = MBB.getFirstTerminator(); + DebugLoc DL; + + if (InsertPt != MBB.end()) { + DL = InsertPt->getDebugLoc(); + } -void WebAssemblyFrameLowering::processFunctionBeforeCalleeSavedScan( - MachineFunction &MF, RegScavenger *RS) const { - llvm_unreachable("TODO: implement processFunctionBeforeCalleeSavedScan"); + // Restore the stack pointer. Without FP its value is just SP32 - stacksize + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) + .addImm(StackSize); + auto *SPSymbol = MF.createExternalSymbolName("__stack_pointer"); + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::ADD_I32), WebAssembly::SP32) + .addReg(WebAssembly::SP32) + .addReg(OffsetReg); + // Re-use OffsetReg to hold the address of the stacktop + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) + .addExternalSymbol(SPSymbol); + auto *MMO = new MachineMemOperand(MachinePointerInfo(), + MachineMemOperand::MOStore, 4, 4); + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::STORE_I32), WebAssembly::SP32) + .addImm(0) + .addReg(OffsetReg) + .addReg(WebAssembly::SP32) + .addMemOperand(MMO); } diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h index 0b112d02c0bf..5f4708fe77ed 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h @@ -38,9 +38,6 @@ public: bool hasFP(const MachineFunction &MF) const override; bool hasReservedCallFrame(const MachineFunction &MF) const override; - - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const override; }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISD.def b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISD.def new file mode 100644 index 000000000000..3a03fa55b220 --- /dev/null +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISD.def @@ -0,0 +1,25 @@ +//- WebAssemblyISD.def - WebAssembly ISD ---------------------------*- C++ -*-// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file describes the various WebAssembly ISD node types. +/// +//===----------------------------------------------------------------------===// + +// NOTE: NO INCLUDE GUARD DESIRED! + +HANDLE_NODETYPE(CALL1) +HANDLE_NODETYPE(CALL0) +HANDLE_NODETYPE(RETURN) +HANDLE_NODETYPE(ARGUMENT) +HANDLE_NODETYPE(Wrapper) +HANDLE_NODETYPE(BR_IF) +HANDLE_NODETYPE(TABLESWITCH) + +// add memory opcodes starting at ISD::FIRST_TARGET_MEMORY_OPCODE here... diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp index 518ef332a6c7..8390f797c43e 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp @@ -56,13 +56,68 @@ public: SDNode *Select(SDNode *Node) override; + bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, + std::vector<SDValue> &OutOps) override; + +// Include the pieces autogenerated from the target description. +#include "WebAssemblyGenDAGISel.inc" + private: // add select functions here... }; } // end anonymous namespace SDNode *WebAssemblyDAGToDAGISel::Select(SDNode *Node) { - llvm_unreachable("TODO: implement Select"); + // Dump information about the Node being selected. + DEBUG(errs() << "Selecting: "); + DEBUG(Node->dump(CurDAG)); + DEBUG(errs() << "\n"); + + // If we have a custom node, we already have selected! + if (Node->isMachineOpcode()) { + DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); + Node->setNodeId(-1); + return nullptr; + } + + // Few custom selection stuff. + SDNode *ResNode = nullptr; + EVT VT = Node->getValueType(0); + + switch (Node->getOpcode()) { + default: + break; + // If we need WebAssembly-specific selection, it would go here. + (void)VT; + } + + // Select the default instruction. + ResNode = SelectCode(Node); + + DEBUG(errs() << "=> "); + if (ResNode == nullptr || ResNode == Node) + DEBUG(Node->dump(CurDAG)); + else + DEBUG(ResNode->dump(CurDAG)); + DEBUG(errs() << "\n"); + + return ResNode; +} + +bool WebAssemblyDAGToDAGISel::SelectInlineAsmMemoryOperand( + const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { + switch (ConstraintID) { + case InlineAsm::Constraint_i: + case InlineAsm::Constraint_m: + // We just support simple memory operands that just have a single address + // operand and need no special handling. + OutOps.push_back(Op); + return false; + default: + break; + } + + return true; } /// This pass converts a legalized DAG into a WebAssembly-specific DAG, ready diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 4184eb6dc5a6..7a89f788c1ad 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -17,10 +17,13 @@ #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "WebAssemblyTargetMachine.h" -#include "WebAssemblyTargetObjectFile.h" #include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/Function.h" #include "llvm/IR/Intrinsics.h" #include "llvm/Support/CommandLine.h" @@ -32,14 +35,254 @@ using namespace llvm; #define DEBUG_TYPE "wasm-lower" +namespace { +// Diagnostic information for unimplemented or unsupported feature reporting. +// TODO: This code is copied from BPF and AMDGPU; consider factoring it out +// and sharing code. +class DiagnosticInfoUnsupported final : public DiagnosticInfo { +private: + // Debug location where this diagnostic is triggered. + DebugLoc DLoc; + const Twine &Description; + const Function &Fn; + SDValue Value; + + static int KindID; + + static int getKindID() { + if (KindID == 0) + KindID = llvm::getNextAvailablePluginDiagnosticKind(); + return KindID; + } + +public: + DiagnosticInfoUnsupported(SDLoc DLoc, const Function &Fn, const Twine &Desc, + SDValue Value) + : DiagnosticInfo(getKindID(), DS_Error), DLoc(DLoc.getDebugLoc()), + Description(Desc), Fn(Fn), Value(Value) {} + + void print(DiagnosticPrinter &DP) const override { + std::string Str; + raw_string_ostream OS(Str); + + if (DLoc) { + auto DIL = DLoc.get(); + StringRef Filename = DIL->getFilename(); + unsigned Line = DIL->getLine(); + unsigned Column = DIL->getColumn(); + OS << Filename << ':' << Line << ':' << Column << ' '; + } + + OS << "in function " << Fn.getName() << ' ' << *Fn.getFunctionType() << '\n' + << Description; + if (Value) + Value->print(OS); + OS << '\n'; + OS.flush(); + DP << Str; + } + + static bool classof(const DiagnosticInfo *DI) { + return DI->getKind() == getKindID(); + } +}; + +int DiagnosticInfoUnsupported::KindID = 0; +} // end anonymous namespace + WebAssemblyTargetLowering::WebAssemblyTargetLowering( const TargetMachine &TM, const WebAssemblySubtarget &STI) : TargetLowering(TM), Subtarget(&STI) { + auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32; + + // Booleans always contain 0 or 1. + setBooleanContents(ZeroOrOneBooleanContent); // WebAssembly does not produce floating-point exceptions on normal floating // point operations. setHasFloatingPointExceptions(false); // We don't know the microarchitecture here, so just reduce register pressure. setSchedulingPreference(Sched::RegPressure); + // Tell ISel that we have a stack pointer. + setStackPointerRegisterToSaveRestore( + Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32); + // Set up the register classes. + addRegisterClass(MVT::i32, &WebAssembly::I32RegClass); + addRegisterClass(MVT::i64, &WebAssembly::I64RegClass); + addRegisterClass(MVT::f32, &WebAssembly::F32RegClass); + addRegisterClass(MVT::f64, &WebAssembly::F64RegClass); + // Compute derived properties from the register classes. + computeRegisterProperties(Subtarget->getRegisterInfo()); + + setOperationAction(ISD::GlobalAddress, MVTPtr, Custom); + setOperationAction(ISD::ExternalSymbol, MVTPtr, Custom); + setOperationAction(ISD::JumpTable, MVTPtr, Custom); + + // Take the default expansion for va_arg, va_copy, and va_end. There is no + // default action for va_start, so we do that custom. + setOperationAction(ISD::VASTART, MVT::Other, Custom); + setOperationAction(ISD::VAARG, MVT::Other, Expand); + setOperationAction(ISD::VACOPY, MVT::Other, Expand); + setOperationAction(ISD::VAEND, MVT::Other, Expand); + + for (auto T : {MVT::f32, MVT::f64}) { + // Don't expand the floating-point types to constant pools. + setOperationAction(ISD::ConstantFP, T, Legal); + // Expand floating-point comparisons. + for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE, + ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE}) + setCondCodeAction(CC, T, Expand); + // Expand floating-point library function operators. + for (auto Op : {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOWI, ISD::FPOW, + ISD::FREM, ISD::FMA}) + setOperationAction(Op, T, Expand); + // Note supported floating-point library function operators that otherwise + // default to expand. + for (auto Op : + {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT, ISD::FRINT}) + setOperationAction(Op, T, Legal); + // Support minnan and maxnan, which otherwise default to expand. + setOperationAction(ISD::FMINNAN, T, Legal); + setOperationAction(ISD::FMAXNAN, T, Legal); + } + + for (auto T : {MVT::i32, MVT::i64}) { + // Expand unavailable integer operations. + for (auto Op : + {ISD::BSWAP, ISD::ROTL, ISD::ROTR, ISD::SMUL_LOHI, ISD::UMUL_LOHI, + ISD::MULHS, ISD::MULHU, ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS, + ISD::SRA_PARTS, ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC, + ISD::SUBE}) { + setOperationAction(Op, T, Expand); + } + } + + // As a special case, these operators use the type to mean the type to + // sign-extend from. + for (auto T : {MVT::i1, MVT::i8, MVT::i16, MVT::i32}) + setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand); + + // Dynamic stack allocation: use the default expansion. + setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand); + + setOperationAction(ISD::FrameIndex, MVT::i32, Custom); + + // Expand these forms; we pattern-match the forms that we can handle in isel. + for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) + for (auto Op : {ISD::BR_CC, ISD::SELECT_CC}) + setOperationAction(Op, T, Expand); + + // We have custom switch handling. + setOperationAction(ISD::BR_JT, MVT::Other, Custom); + + // WebAssembly doesn't have: + // - Floating-point extending loads. + // - Floating-point truncating stores. + // - i1 extending loads. + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + for (auto T : MVT::integer_valuetypes()) + for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD}) + setLoadExtAction(Ext, T, MVT::i1, Promote); + + // Trap lowers to wasm unreachable + setOperationAction(ISD::TRAP, MVT::Other, Legal); +} + +FastISel *WebAssemblyTargetLowering::createFastISel( + FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const { + return WebAssembly::createFastISel(FuncInfo, LibInfo); +} + +bool WebAssemblyTargetLowering::isOffsetFoldingLegal( + const GlobalAddressSDNode * /*GA*/) const { + // All offsets can be folded. + return true; +} + +MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/, + EVT VT) const { + unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1); + if (BitWidth > 1 && BitWidth < 8) + BitWidth = 8; + + if (BitWidth > 64) { + BitWidth = 64; + assert(BitWidth >= Log2_32_Ceil(VT.getSizeInBits()) && + "64-bit shift counts ought to be enough for anyone"); + } + + MVT Result = MVT::getIntegerVT(BitWidth); + assert(Result != MVT::INVALID_SIMPLE_VALUE_TYPE && + "Unable to represent scalar shift amount type"); + return Result; +} + +const char * +WebAssemblyTargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (static_cast<WebAssemblyISD::NodeType>(Opcode)) { + case WebAssemblyISD::FIRST_NUMBER: + break; +#define HANDLE_NODETYPE(NODE) \ + case WebAssemblyISD::NODE: \ + return "WebAssemblyISD::" #NODE; +#include "WebAssemblyISD.def" +#undef HANDLE_NODETYPE + } + return nullptr; +} + +std::pair<unsigned, const TargetRegisterClass *> +WebAssemblyTargetLowering::getRegForInlineAsmConstraint( + const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { + // First, see if this is a constraint that directly corresponds to a + // WebAssembly register class. + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 'r': + assert(VT != MVT::iPTR && "Pointer MVT not expected here"); + if (VT.isInteger() && !VT.isVector()) { + if (VT.getSizeInBits() <= 32) + return std::make_pair(0U, &WebAssembly::I32RegClass); + if (VT.getSizeInBits() <= 64) + return std::make_pair(0U, &WebAssembly::I64RegClass); + } + break; + default: + break; + } + } + + return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); +} + +bool WebAssemblyTargetLowering::isCheapToSpeculateCttz() const { + // Assume ctz is a relatively cheap operation. + return true; +} + +bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz() const { + // Assume clz is a relatively cheap operation. + return true; +} + +bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL, + const AddrMode &AM, + Type *Ty, + unsigned AS) const { + // WebAssembly offsets are added as unsigned without wrapping. The + // isLegalAddressingMode gives us no way to determine if wrapping could be + // happening, so we approximate this by accepting only non-negative offsets. + if (AM.BaseOffs < 0) + return false; + + // WebAssembly has no scale register operands. + if (AM.Scale != 0) + return false; + + // Everything else is legal. + return true; } //===----------------------------------------------------------------------===// @@ -50,16 +293,359 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( // Lowering Code //===----------------------------------------------------------------------===// +static void fail(SDLoc DL, SelectionDAG &DAG, const char *msg) { + MachineFunction &MF = DAG.getMachineFunction(); + DAG.getContext()->diagnose( + DiagnosticInfoUnsupported(DL, *MF.getFunction(), msg, SDValue())); +} + +// Test whether the given calling convention is supported. +static bool CallingConvSupported(CallingConv::ID CallConv) { + // We currently support the language-independent target-independent + // conventions. We don't yet have a way to annotate calls with properties like + // "cold", and we don't have any call-clobbered registers, so these are mostly + // all handled the same. + return CallConv == CallingConv::C || CallConv == CallingConv::Fast || + CallConv == CallingConv::Cold || + CallConv == CallingConv::PreserveMost || + CallConv == CallingConv::PreserveAll || + CallConv == CallingConv::CXX_FAST_TLS; +} + +SDValue +WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const { + SelectionDAG &DAG = CLI.DAG; + SDLoc DL = CLI.DL; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + MachineFunction &MF = DAG.getMachineFunction(); + + CallingConv::ID CallConv = CLI.CallConv; + if (!CallingConvSupported(CallConv)) + fail(DL, DAG, + "WebAssembly doesn't support language-specific or target-specific " + "calling conventions yet"); + if (CLI.IsPatchPoint) + fail(DL, DAG, "WebAssembly doesn't support patch point yet"); + + // WebAssembly doesn't currently support explicit tail calls. If they are + // required, fail. Otherwise, just disable them. + if ((CallConv == CallingConv::Fast && CLI.IsTailCall && + MF.getTarget().Options.GuaranteedTailCallOpt) || + (CLI.CS && CLI.CS->isMustTailCall())) + fail(DL, DAG, "WebAssembly doesn't support tail call yet"); + CLI.IsTailCall = false; + + SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; + + SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; + if (Ins.size() > 1) + fail(DL, DAG, "WebAssembly doesn't support more than 1 returned value yet"); + + SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; + for (const ISD::OutputArg &Out : Outs) { + if (Out.Flags.isByVal()) + fail(DL, DAG, "WebAssembly hasn't implemented byval arguments"); + if (Out.Flags.isNest()) + fail(DL, DAG, "WebAssembly hasn't implemented nest arguments"); + if (Out.Flags.isInAlloca()) + fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments"); + if (Out.Flags.isInConsecutiveRegs()) + fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments"); + if (Out.Flags.isInConsecutiveRegsLast()) + fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments"); + } + + bool IsVarArg = CLI.IsVarArg; + unsigned NumFixedArgs = CLI.NumFixedArgs; + auto PtrVT = getPointerTy(MF.getDataLayout()); + + // Analyze operands of the call, assigning locations to each operand. + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); + + if (IsVarArg) { + // Outgoing non-fixed arguments are placed at the top of the stack. First + // compute their offsets and the total amount of argument stack space + // needed. + for (SDValue Arg : + make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) { + EVT VT = Arg.getValueType(); + assert(VT != MVT::iPTR && "Legalized args should be concrete"); + Type *Ty = VT.getTypeForEVT(*DAG.getContext()); + unsigned Offset = + CCInfo.AllocateStack(MF.getDataLayout().getTypeAllocSize(Ty), + MF.getDataLayout().getABITypeAlignment(Ty)); + CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(), + Offset, VT.getSimpleVT(), + CCValAssign::Full)); + } + } + + unsigned NumBytes = CCInfo.getAlignedCallFrameSize(); + + SDValue NB; + if (NumBytes) { + NB = DAG.getConstant(NumBytes, DL, PtrVT, true); + Chain = DAG.getCALLSEQ_START(Chain, NB, DL); + } + + if (IsVarArg) { + // For non-fixed arguments, next emit stores to store the argument values + // to the stack at the offsets computed above. + SDValue SP = DAG.getCopyFromReg( + Chain, DL, getStackPointerRegisterToSaveRestore(), PtrVT); + unsigned ValNo = 0; + SmallVector<SDValue, 8> Chains; + for (SDValue Arg : + make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) { + assert(ArgLocs[ValNo].getValNo() == ValNo && + "ArgLocs should remain in order and only hold varargs args"); + unsigned Offset = ArgLocs[ValNo++].getLocMemOffset(); + SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, SP, + DAG.getConstant(Offset, DL, PtrVT)); + Chains.push_back(DAG.getStore(Chain, DL, Arg, Add, + MachinePointerInfo::getStack(MF, Offset), + false, false, 0)); + } + if (!Chains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); + } + + // Compute the operands for the CALLn node. + SmallVector<SDValue, 16> Ops; + Ops.push_back(Chain); + Ops.push_back(Callee); + + // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs + // isn't reliable. + Ops.append(OutVals.begin(), + IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end()); + + SmallVector<EVT, 8> Tys; + for (const auto &In : Ins) { + assert(!In.Flags.isByVal() && "byval is not valid for return values"); + assert(!In.Flags.isNest() && "nest is not valid for return values"); + if (In.Flags.isInAlloca()) + fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values"); + if (In.Flags.isInConsecutiveRegs()) + fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values"); + if (In.Flags.isInConsecutiveRegsLast()) + fail(DL, DAG, + "WebAssembly hasn't implemented cons regs last return values"); + // Ignore In.getOrigAlign() because all our arguments are passed in + // registers. + Tys.push_back(In.VT); + } + Tys.push_back(MVT::Other); + SDVTList TyList = DAG.getVTList(Tys); + SDValue Res = + DAG.getNode(Ins.empty() ? WebAssemblyISD::CALL0 : WebAssemblyISD::CALL1, + DL, TyList, Ops); + if (Ins.empty()) { + Chain = Res; + } else { + InVals.push_back(Res); + Chain = Res.getValue(1); + } + + if (NumBytes) { + SDValue Unused = DAG.getTargetConstant(0, DL, PtrVT); + Chain = DAG.getCALLSEQ_END(Chain, NB, Unused, SDValue(), DL); + } + + return Chain; +} + +bool WebAssemblyTargetLowering::CanLowerReturn( + CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/, + const SmallVectorImpl<ISD::OutputArg> &Outs, + LLVMContext & /*Context*/) const { + // WebAssembly can't currently handle returning tuples. + return Outs.size() <= 1; +} + +SDValue WebAssemblyTargetLowering::LowerReturn( + SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, SDLoc DL, + SelectionDAG &DAG) const { + assert(Outs.size() <= 1 && "WebAssembly can only return up to one value"); + if (!CallingConvSupported(CallConv)) + fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions"); + + SmallVector<SDValue, 4> RetOps(1, Chain); + RetOps.append(OutVals.begin(), OutVals.end()); + Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps); + + // Record the number and types of the return values. + for (const ISD::OutputArg &Out : Outs) { + assert(!Out.Flags.isByVal() && "byval is not valid for return values"); + assert(!Out.Flags.isNest() && "nest is not valid for return values"); + assert(Out.IsFixed && "non-fixed return value is not valid"); + if (Out.Flags.isInAlloca()) + fail(DL, DAG, "WebAssembly hasn't implemented inalloca results"); + if (Out.Flags.isInConsecutiveRegs()) + fail(DL, DAG, "WebAssembly hasn't implemented cons regs results"); + if (Out.Flags.isInConsecutiveRegsLast()) + fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results"); + } + + return Chain; +} + +SDValue WebAssemblyTargetLowering::LowerFormalArguments( + SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/, + const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { + MachineFunction &MF = DAG.getMachineFunction(); + + if (!CallingConvSupported(CallConv)) + fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions"); + + // Set up the incoming ARGUMENTS value, which serves to represent the liveness + // of the incoming values before they're represented by virtual registers. + MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS); + + for (const ISD::InputArg &In : Ins) { + if (In.Flags.isByVal()) + fail(DL, DAG, "WebAssembly hasn't implemented byval arguments"); + if (In.Flags.isInAlloca()) + fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments"); + if (In.Flags.isNest()) + fail(DL, DAG, "WebAssembly hasn't implemented nest arguments"); + if (In.Flags.isInConsecutiveRegs()) + fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments"); + if (In.Flags.isInConsecutiveRegsLast()) + fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments"); + // Ignore In.getOrigAlign() because all our arguments are passed in + // registers. + InVals.push_back( + In.Used + ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT, + DAG.getTargetConstant(InVals.size(), DL, MVT::i32)) + : DAG.getUNDEF(In.VT)); + + // Record the number and types of arguments. + MF.getInfo<WebAssemblyFunctionInfo>()->addParam(In.VT); + } + + // Incoming varargs arguments are on the stack and will be accessed through + // va_arg, so we don't need to do anything for them here. + + return Chain; +} + //===----------------------------------------------------------------------===// -// Other Lowering Code +// Custom lowering hooks. //===----------------------------------------------------------------------===// +SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op, + SelectionDAG &DAG) const { + switch (Op.getOpcode()) { + default: + llvm_unreachable("unimplemented operation lowering"); + return SDValue(); + case ISD::FrameIndex: + return LowerFrameIndex(Op, DAG); + case ISD::GlobalAddress: + return LowerGlobalAddress(Op, DAG); + case ISD::ExternalSymbol: + return LowerExternalSymbol(Op, DAG); + case ISD::JumpTable: + return LowerJumpTable(Op, DAG); + case ISD::BR_JT: + return LowerBR_JT(Op, DAG); + case ISD::VASTART: + return LowerVASTART(Op, DAG); + } +} + +SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op, + SelectionDAG &DAG) const { + int FI = cast<FrameIndexSDNode>(Op)->getIndex(); + return DAG.getTargetFrameIndex(FI, Op.getValueType()); +} + +SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + const auto *GA = cast<GlobalAddressSDNode>(Op); + EVT VT = Op.getValueType(); + assert(GA->getTargetFlags() == 0 && "WebAssembly doesn't set target flags"); + if (GA->getAddressSpace() != 0) + fail(DL, DAG, "WebAssembly only expects the 0 address space"); + return DAG.getNode( + WebAssemblyISD::Wrapper, DL, VT, + DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset())); +} + +SDValue +WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + const auto *ES = cast<ExternalSymbolSDNode>(Op); + EVT VT = Op.getValueType(); + assert(ES->getTargetFlags() == 0 && "WebAssembly doesn't set target flags"); + return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT, + DAG.getTargetExternalSymbol(ES->getSymbol(), VT)); +} + +SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op, + SelectionDAG &DAG) const { + // There's no need for a Wrapper node because we always incorporate a jump + // table operand into a TABLESWITCH instruction, rather than ever + // materializing it in a register. + const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); + return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(), + JT->getTargetFlags()); +} + +SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue Chain = Op.getOperand(0); + const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1)); + SDValue Index = Op.getOperand(2); + assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags"); + + SmallVector<SDValue, 8> Ops; + Ops.push_back(Chain); + Ops.push_back(Index); + + MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo(); + const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs; + + // TODO: For now, we just pick something arbitrary for a default case for now. + // We really want to sniff out the guard and put in the real default case (and + // delete the guard). + Ops.push_back(DAG.getBasicBlock(MBBs[0])); + + // Add an operand for each case. + for (auto MBB : MBBs) + Ops.push_back(DAG.getBasicBlock(MBB)); + + return DAG.getNode(WebAssemblyISD::TABLESWITCH, DL, MVT::Other, Ops); +} + +SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout()); + + // The incoming non-fixed arguments are placed on the top of the stack, with + // natural alignment, at the point of the call, so the base pointer is just + // the current frame pointer. + DAG.getMachineFunction().getFrameInfo()->setFrameAddressIsTaken(true); + unsigned FP = + Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction()); + SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FP, PtrVT); + const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); + return DAG.getStore(Op.getOperand(0), DL, FrameAddr, Op.getOperand(1), + MachinePointerInfo(SV), false, false, 0); +} + //===----------------------------------------------------------------------===// // WebAssembly Optimization Hooks //===----------------------------------------------------------------------===// - -MCSection *WebAssemblyTargetObjectFile::SelectSectionForGlobal( - const GlobalValue *GV, SectionKind Kind, Mangler &Mang, - const TargetMachine &TM) const { - return getDataSection(); -} diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h index efd60a7bacd6..e7232a042e12 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -22,10 +22,11 @@ namespace llvm { namespace WebAssemblyISD { -enum { +enum NodeType : unsigned { FIRST_NUMBER = ISD::BUILTIN_OP_END, - - // add memory opcodes starting at ISD::FIRST_TARGET_MEMORY_OPCODE here... +#define HANDLE_NODETYPE(NODE) NODE, +#include "WebAssemblyISD.def" +#undef HANDLE_NODETYPE }; } // end namespace WebAssemblyISD @@ -42,8 +43,51 @@ private: /// Keep a pointer to the WebAssemblySubtarget around so that we can make the /// right decision when generating code for different targets. const WebAssemblySubtarget *Subtarget; + + FastISel *createFastISel(FunctionLoweringInfo &FuncInfo, + const TargetLibraryInfo *LibInfo) const override; + bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; + MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override; + const char *getTargetNodeName(unsigned Opcode) const override; + std::pair<unsigned, const TargetRegisterClass *> + getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + StringRef Constraint, MVT VT) const override; + bool isCheapToSpeculateCttz() const override; + bool isCheapToSpeculateCtlz() const override; + bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, + unsigned AS) const override; + + SDValue LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const override; + bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, + bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + LLVMContext &Context) const override; + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, SDLoc dl, + SelectionDAG &DAG) const override; + SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, + bool IsVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + SDLoc DL, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const override; + + // Custom lowering hooks. + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; + SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; }; +namespace WebAssembly { +FastISel *createFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo); +} // end namespace WebAssembly + } // end namespace llvm #endif diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td index 6b5b6cd54173..cfa1519e6d99 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td @@ -12,10 +12,63 @@ /// //===----------------------------------------------------------------------===// -/* - * TODO(jfb): Add the following. - * - * call_direct: call function directly - * call_indirect: call function indirectly - * addressof: obtain a function pointer value for a given function - */ +// TODO: addr64: These currently assume the callee address is 32-bit. + +let Defs = [ARGUMENTS] in { + +// Call sequence markers. These have an immediate which represents the amount of +// stack space to allocate or free, which is used for varargs lowering. +let Uses = [SP32, SP64], Defs = [SP32, SP64], isCodeGenOnly = 1 in { +def ADJCALLSTACKDOWN : I<(outs), (ins i32imm:$amt), + [(WebAssemblycallseq_start timm:$amt)]>; +def ADJCALLSTACKUP : I<(outs), (ins i32imm:$amt, i32imm:$amt2), + [(WebAssemblycallseq_end timm:$amt, timm:$amt2)]>; +} // isCodeGenOnly = 1 + +multiclass CALL<WebAssemblyRegClass vt, string prefix> { + def CALL_#vt : I<(outs vt:$dst), (ins i32imm:$callee, variable_ops), + [(set vt:$dst, (WebAssemblycall1 (i32 imm:$callee)))], + !strconcat(prefix, "call\t$dst, $callee")>; + def CALL_INDIRECT_#vt : I<(outs vt:$dst), (ins I32:$callee, variable_ops), + [(set vt:$dst, (WebAssemblycall1 I32:$callee))], + !strconcat(prefix, "call_indirect\t$dst, $callee")>; +} +let Uses = [SP32, SP64], isCall = 1 in { + defm : CALL<I32, "i32.">; + defm : CALL<I64, "i64.">; + defm : CALL<F32, "f32.">; + defm : CALL<F64, "f64.">; + + def CALL_VOID : I<(outs), (ins i32imm:$callee, variable_ops), + [(WebAssemblycall0 (i32 imm:$callee))], + "call \t$callee">; + def CALL_INDIRECT_VOID : I<(outs), (ins I32:$callee, variable_ops), + [(WebAssemblycall0 I32:$callee)], + "call_indirect\t$callee">; +} // Uses = [SP32,SP64], isCall = 1 + +} // Defs = [ARGUMENTS] + +// Patterns for matching a direct call to a global address. +def : Pat<(i32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), + (CALL_I32 tglobaladdr:$callee)>; +def : Pat<(i64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), + (CALL_I64 tglobaladdr:$callee)>; +def : Pat<(f32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), + (CALL_F32 tglobaladdr:$callee)>; +def : Pat<(f64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), + (CALL_F64 tglobaladdr:$callee)>; +def : Pat<(WebAssemblycall0 (WebAssemblywrapper tglobaladdr:$callee)), + (CALL_VOID tglobaladdr:$callee)>; + +// Patterns for matching a direct call to an external symbol. +def : Pat<(i32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), + (CALL_I32 texternalsym:$callee)>; +def : Pat<(i64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), + (CALL_I64 texternalsym:$callee)>; +def : Pat<(f32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), + (CALL_F32 texternalsym:$callee)>; +def : Pat<(f64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), + (CALL_F64 texternalsym:$callee)>; +def : Pat<(WebAssemblycall0 (WebAssemblywrapper texternalsym:$callee)), + (CALL_VOID texternalsym:$callee)>; diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td new file mode 100644 index 000000000000..05efe8903413 --- /dev/null +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td @@ -0,0 +1,82 @@ +//===- WebAssemblyInstrControl.td-WebAssembly control-flow ------*- tablegen -*- +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief WebAssembly control-flow code-gen constructs. +/// +//===----------------------------------------------------------------------===// + +let Defs = [ARGUMENTS] in { + +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in { +// The condition operand is a boolean value which WebAssembly represents as i32. +def BR_IF : I<(outs), (ins I32:$cond, bb_op:$dst), + [(brcond I32:$cond, bb:$dst)], + "br_if \t$cond, $dst">; +let isCodeGenOnly = 1 in +def BR_UNLESS : I<(outs), (ins I32:$cond, bb_op:$dst), [], + "br_unless\t$cond, $dst">; +let isBarrier = 1 in { +def BR : I<(outs), (ins bb_op:$dst), + [(br bb:$dst)], + "br \t$dst">; +} // isBarrier = 1 +} // isBranch = 1, isTerminator = 1, hasCtrlDep = 1 + +} // Defs = [ARGUMENTS] + +def : Pat<(brcond (i32 (setne I32:$cond, 0)), bb:$dst), + (BR_IF I32:$cond, bb_op:$dst)>; +def : Pat<(brcond (i32 (seteq I32:$cond, 0)), bb:$dst), + (BR_UNLESS I32:$cond, bb_op:$dst)>; + +let Defs = [ARGUMENTS] in { + +// TODO: SelectionDAG's lowering insists on using a pointer as the index for +// jump tables, so in practice we don't ever use TABLESWITCH_I64 in wasm32 mode +// currently. +let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in { +def TABLESWITCH_I32 : I<(outs), (ins I32:$index, bb_op:$default, variable_ops), + [(WebAssemblytableswitch I32:$index, bb:$default)], + "tableswitch\t$index, $default">; +def TABLESWITCH_I64 : I<(outs), (ins I64:$index, bb_op:$default, variable_ops), + [(WebAssemblytableswitch I64:$index, bb:$default)], + "tableswitch\t$index, $default">; +} // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 + +// Placemarkers to indicate the start of a block or loop scope. These +// use/clobber EXPR_STACK to prevent them from being moved into the middle of +// an expression tree. +let Uses = [EXPR_STACK], Defs = [EXPR_STACK] in { +def BLOCK : I<(outs), (ins bb_op:$dst), [], "block \t$dst">; +def LOOP : I<(outs), (ins bb_op:$dst), [], "loop \t$dst">; +} // Uses = [EXPR_STACK], Defs = [EXPR_STACK] + +// No-op to indicate to the AsmPrinter that a loop ends here, so a +// basic block label is needed even if it wouldn't otherwise appear so. +let isTerminator = 1, hasCtrlDep = 1 in +def LOOP_END : I<(outs), (ins), []>; + +multiclass RETURN<WebAssemblyRegClass vt> { + def RETURN_#vt : I<(outs), (ins vt:$val), [(WebAssemblyreturn vt:$val)], + "return \t$val">; +} + +let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in { +let isReturn = 1 in { + defm : RETURN<I32>; + defm : RETURN<I64>; + defm : RETURN<F32>; + defm : RETURN<F64>; + def RETURN_VOID : I<(outs), (ins), [(WebAssemblyreturn)], "return">; +} // isReturn = 1 + def UNREACHABLE : I<(outs), (ins), [(trap)], "unreachable">; +} // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 + +} // Defs = [ARGUMENTS] diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td index 3fa29061b1de..931f4a913d0f 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td @@ -13,32 +13,99 @@ /// //===----------------------------------------------------------------------===// -/* - * TODO(jfb): Add the following. - * - * int32.wrap[int64]: wrap a 64-bit integer to a 32-bit integer - * int32.trunc_signed[float32]: truncate a 32-bit float to a signed 32-bit integer - * int32.trunc_signed[float64]: truncate a 64-bit float to a signed 32-bit integer - * int32.trunc_unsigned[float32]: truncate a 32-bit float to an unsigned 32-bit integer - * int32.trunc_unsigned[float64]: truncate a 64-bit float to an unsigned 32-bit integer - * int32.reinterpret[float32]: reinterpret the bits of a 32-bit float as a 32-bit integer - * int64.extend_signed[int32]: extend a signed 32-bit integer to a 64-bit integer - * int64.extend_unsigned[int32]: extend an unsigned 32-bit integer to a 64-bit integer - * int64.trunc_signed[float32]: truncate a 32-bit float to a signed 64-bit integer - * int64.trunc_signed[float64]: truncate a 64-bit float to a signed 64-bit integer - * int64.trunc_unsigned[float32]: truncate a 32-bit float to an unsigned 64-bit integer - * int64.trunc_unsigned[float64]: truncate a 64-bit float to an unsigned 64-bit integer - * int64.reinterpret[float64]: reinterpret the bits of a 64-bit float as a 64-bit integer - * float32.demote[float64]: demote a 64-bit float to a 32-bit float - * float32.cvt_signed[int32]: convert a signed 32-bit integer to a 32-bit float - * float32.cvt_signed[int64]: convert a signed 64-bit integer to a 32-bit float - * float32.cvt_unsigned[int32]: convert an unsigned 32-bit integer to a 32-bit float - * float32.cvt_unsigned[int64]: convert an unsigned 64-bit integer to a 32-bit float - * float32.reinterpret[int32]: reinterpret the bits of a 32-bit integer as a 32-bit float - * float64.promote[float32]: promote a 32-bit float to a 64-bit float - * float64.cvt_signed[int32]: convert a signed 32-bit integer to a 64-bit float - * float64.cvt_signed[int64]: convert a signed 64-bit integer to a 64-bit float - * float64.cvt_unsigned[int32]: convert an unsigned 32-bit integer to a 64-bit float - * float64.cvt_unsigned[int64]: convert an unsigned 64-bit integer to a 64-bit float - * float64.reinterpret[int64]: reinterpret the bits of a 64-bit integer as a 64-bit float - */ +let Defs = [ARGUMENTS] in { + +def I32_WRAP_I64 : I<(outs I32:$dst), (ins I64:$src), + [(set I32:$dst, (trunc I64:$src))], + "i32.wrap/i64\t$dst, $src">; + +def I64_EXTEND_S_I32 : I<(outs I64:$dst), (ins I32:$src), + [(set I64:$dst, (sext I32:$src))], + "i64.extend_s/i32\t$dst, $src">; +def I64_EXTEND_U_I32 : I<(outs I64:$dst), (ins I32:$src), + [(set I64:$dst, (zext I32:$src))], + "i64.extend_u/i32\t$dst, $src">; + +} // defs = [ARGUMENTS] + +// Expand a "don't care" extend into zero-extend (chosen over sign-extend +// somewhat arbitrarily, although it favors popular hardware architectures +// and is conceptually a simpler operation). +def : Pat<(i64 (anyext I32:$src)), (I64_EXTEND_U_I32 I32:$src)>; + +let Defs = [ARGUMENTS] in { + +// Conversion from floating point to integer traps on overflow and invalid. +let hasSideEffects = 1 in { +def I32_TRUNC_S_F32 : I<(outs I32:$dst), (ins F32:$src), + [(set I32:$dst, (fp_to_sint F32:$src))], + "i32.trunc_s/f32\t$dst, $src">; +def I32_TRUNC_U_F32 : I<(outs I32:$dst), (ins F32:$src), + [(set I32:$dst, (fp_to_uint F32:$src))], + "i32.trunc_u/f32\t$dst, $src">; +def I64_TRUNC_S_F32 : I<(outs I64:$dst), (ins F32:$src), + [(set I64:$dst, (fp_to_sint F32:$src))], + "i64.trunc_s/f32\t$dst, $src">; +def I64_TRUNC_U_F32 : I<(outs I64:$dst), (ins F32:$src), + [(set I64:$dst, (fp_to_uint F32:$src))], + "i64.trunc_u/f32\t$dst, $src">; +def I32_TRUNC_S_F64 : I<(outs I32:$dst), (ins F64:$src), + [(set I32:$dst, (fp_to_sint F64:$src))], + "i32.trunc_s/f64\t$dst, $src">; +def I32_TRUNC_U_F64 : I<(outs I32:$dst), (ins F64:$src), + [(set I32:$dst, (fp_to_uint F64:$src))], + "i32.trunc_u/f64\t$dst, $src">; +def I64_TRUNC_S_F64 : I<(outs I64:$dst), (ins F64:$src), + [(set I64:$dst, (fp_to_sint F64:$src))], + "i64.trunc_s/f64\t$dst, $src">; +def I64_TRUNC_U_F64 : I<(outs I64:$dst), (ins F64:$src), + [(set I64:$dst, (fp_to_uint F64:$src))], + "i64.trunc_u/f64\t$dst, $src">; +} // hasSideEffects = 1 + +def F32_CONVERT_S_I32 : I<(outs F32:$dst), (ins I32:$src), + [(set F32:$dst, (sint_to_fp I32:$src))], + "f32.convert_s/i32\t$dst, $src">; +def F32_CONVERT_U_I32 : I<(outs F32:$dst), (ins I32:$src), + [(set F32:$dst, (uint_to_fp I32:$src))], + "f32.convert_u/i32\t$dst, $src">; +def F64_CONVERT_S_I32 : I<(outs F64:$dst), (ins I32:$src), + [(set F64:$dst, (sint_to_fp I32:$src))], + "f64.convert_s/i32\t$dst, $src">; +def F64_CONVERT_U_I32 : I<(outs F64:$dst), (ins I32:$src), + [(set F64:$dst, (uint_to_fp I32:$src))], + "f64.convert_u/i32\t$dst, $src">; +def F32_CONVERT_S_I64 : I<(outs F32:$dst), (ins I64:$src), + [(set F32:$dst, (sint_to_fp I64:$src))], + "f32.convert_s/i64\t$dst, $src">; +def F32_CONVERT_U_I64 : I<(outs F32:$dst), (ins I64:$src), + [(set F32:$dst, (uint_to_fp I64:$src))], + "f32.convert_u/i64\t$dst, $src">; +def F64_CONVERT_S_I64 : I<(outs F64:$dst), (ins I64:$src), + [(set F64:$dst, (sint_to_fp I64:$src))], + "f64.convert_s/i64\t$dst, $src">; +def F64_CONVERT_U_I64 : I<(outs F64:$dst), (ins I64:$src), + [(set F64:$dst, (uint_to_fp I64:$src))], + "f64.convert_u/i64\t$dst, $src">; + +def F64_PROMOTE_F32 : I<(outs F64:$dst), (ins F32:$src), + [(set F64:$dst, (fextend F32:$src))], + "f64.promote/f32\t$dst, $src">; +def F32_DEMOTE_F64 : I<(outs F32:$dst), (ins F64:$src), + [(set F32:$dst, (fround F64:$src))], + "f32.demote/f64\t$dst, $src">; + +def I32_REINTERPRET_F32 : I<(outs I32:$dst), (ins F32:$src), + [(set I32:$dst, (bitconvert F32:$src))], + "i32.reinterpret/f32\t$dst, $src">; +def F32_REINTERPRET_I32 : I<(outs F32:$dst), (ins I32:$src), + [(set F32:$dst, (bitconvert I32:$src))], + "f32.reinterpret/i32\t$dst, $src">; +def I64_REINTERPRET_F64 : I<(outs I64:$dst), (ins F64:$src), + [(set I64:$dst, (bitconvert F64:$src))], + "i64.reinterpret/f64\t$dst, $src">; +def F64_REINTERPRET_I64 : I<(outs F64:$dst), (ins I64:$src), + [(set F64:$dst, (bitconvert I64:$src))], + "f64.reinterpret/i64\t$dst, $src">; + +} // Defs = [ARGUMENTS] diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td index 30ef6339d65a..5520c6de6732 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td @@ -12,33 +12,90 @@ /// //===----------------------------------------------------------------------===// -defm FADD : BinaryFP<fadd>; -defm FSUB : BinaryFP<fsub>; -defm FMUL : BinaryFP<fmul>; -defm FDIV : BinaryFP<fdiv>; -defm FABS : UnaryFP<fabs>; -defm FNEG : UnaryFP<fneg>; -defm COPYSIGN : BinaryFP<fcopysign>; -defm CEIL : UnaryFP<fceil>; -defm FLOOR : UnaryFP<ffloor>; -defm TRUNC : UnaryFP<ftrunc>; -defm NEARESTINT : UnaryFP<fnearbyint>; - -/* - * TODO(jfb): Add the following for 32-bit and 64-bit. - * - * float32.eq: compare equal - * float32.lt: less than - * float32.le: less than or equal - * float32.gt: greater than - * float32.ge: greater than or equal - */ - -defm SQRT : UnaryFP<fsqrt>; - -/* - * TODO(jfb): Add the following for 32-bit and 64-bit. - * - * float32.min: minimum (binary operator); if either operand is NaN, returns NaN - * float32.max: maximum (binary operator); if either operand is NaN, returns NaN - */ +let Defs = [ARGUMENTS] in { + +let isCommutable = 1 in +defm ADD : BinaryFP<fadd, "add ">; +defm SUB : BinaryFP<fsub, "sub ">; +let isCommutable = 1 in +defm MUL : BinaryFP<fmul, "mul ">; +defm DIV : BinaryFP<fdiv, "div ">; +defm SQRT : UnaryFP<fsqrt, "sqrt">; + +defm ABS : UnaryFP<fabs, "abs ">; +defm NEG : UnaryFP<fneg, "neg ">; +defm COPYSIGN : BinaryFP<fcopysign, "copysign">; + +let isCommutable = 1 in { +defm MIN : BinaryFP<fminnan, "min ">; +defm MAX : BinaryFP<fmaxnan, "max ">; +} // isCommutable = 1 + +defm CEIL : UnaryFP<fceil, "ceil">; +defm FLOOR : UnaryFP<ffloor, "floor">; +defm TRUNC : UnaryFP<ftrunc, "trunc">; +defm NEAREST : UnaryFP<fnearbyint, "nearest">; + +} // Defs = [ARGUMENTS] + +// DAGCombine oddly folds casts into the rhs of copysign. Unfold them. +def : Pat<(fcopysign F64:$lhs, F32:$rhs), + (COPYSIGN_F64 F64:$lhs, (F64_PROMOTE_F32 F32:$rhs))>; +def : Pat<(fcopysign F32:$lhs, F64:$rhs), + (COPYSIGN_F32 F32:$lhs, (F32_DEMOTE_F64 F64:$rhs))>; + +// WebAssembly doesn't expose inexact exceptions, so map frint to fnearbyint. +def : Pat<(frint f32:$src), (NEAREST_F32 f32:$src)>; +def : Pat<(frint f64:$src), (NEAREST_F64 f64:$src)>; + +let Defs = [ARGUMENTS] in { + +let isCommutable = 1 in { +defm EQ : ComparisonFP<SETOEQ, "eq ">; +defm NE : ComparisonFP<SETUNE, "ne ">; +} // isCommutable = 1 +defm LT : ComparisonFP<SETOLT, "lt ">; +defm LE : ComparisonFP<SETOLE, "le ">; +defm GT : ComparisonFP<SETOGT, "gt ">; +defm GE : ComparisonFP<SETOGE, "ge ">; + +} // Defs = [ARGUMENTS] + +// Don't care floating-point comparisons, supported via other comparisons. +def : Pat<(seteq f32:$lhs, f32:$rhs), (EQ_F32 f32:$lhs, f32:$rhs)>; +def : Pat<(setne f32:$lhs, f32:$rhs), (NE_F32 f32:$lhs, f32:$rhs)>; +def : Pat<(setlt f32:$lhs, f32:$rhs), (LT_F32 f32:$lhs, f32:$rhs)>; +def : Pat<(setle f32:$lhs, f32:$rhs), (LE_F32 f32:$lhs, f32:$rhs)>; +def : Pat<(setgt f32:$lhs, f32:$rhs), (GT_F32 f32:$lhs, f32:$rhs)>; +def : Pat<(setge f32:$lhs, f32:$rhs), (GE_F32 f32:$lhs, f32:$rhs)>; +def : Pat<(seteq f64:$lhs, f64:$rhs), (EQ_F64 f64:$lhs, f64:$rhs)>; +def : Pat<(setne f64:$lhs, f64:$rhs), (NE_F64 f64:$lhs, f64:$rhs)>; +def : Pat<(setlt f64:$lhs, f64:$rhs), (LT_F64 f64:$lhs, f64:$rhs)>; +def : Pat<(setle f64:$lhs, f64:$rhs), (LE_F64 f64:$lhs, f64:$rhs)>; +def : Pat<(setgt f64:$lhs, f64:$rhs), (GT_F64 f64:$lhs, f64:$rhs)>; +def : Pat<(setge f64:$lhs, f64:$rhs), (GE_F64 f64:$lhs, f64:$rhs)>; + +let Defs = [ARGUMENTS] in { + +def SELECT_F32 : I<(outs F32:$dst), (ins I32:$cond, F32:$lhs, F32:$rhs), + [(set F32:$dst, (select I32:$cond, F32:$lhs, F32:$rhs))], + "f32.select\t$dst, $cond, $lhs, $rhs">; +def SELECT_F64 : I<(outs F64:$dst), (ins I32:$cond, F64:$lhs, F64:$rhs), + [(set F64:$dst, (select I32:$cond, F64:$lhs, F64:$rhs))], + "f64.select\t$dst, $cond, $lhs, $rhs">; + +} // Defs = [ARGUMENTS] + +// ISD::SELECT requires its operand to conform to getBooleanContents, but +// WebAssembly's select interprets any non-zero value as true, so we can fold +// a setne with 0 into a select. +def : Pat<(select (i32 (setne I32:$cond, 0)), F32:$lhs, F32:$rhs), + (SELECT_F32 I32:$cond, F32:$lhs, F32:$rhs)>; +def : Pat<(select (i32 (setne I32:$cond, 0)), F64:$lhs, F64:$rhs), + (SELECT_F64 I32:$cond, F64:$lhs, F64:$rhs)>; + +// And again, this time with seteq instead of setne and the arms reversed. +def : Pat<(select (i32 (seteq I32:$cond, 0)), F32:$lhs, F32:$rhs), + (SELECT_F32 I32:$cond, F32:$rhs, F32:$lhs)>; +def : Pat<(select (i32 (seteq I32:$cond, 0)), F64:$lhs, F64:$rhs), + (SELECT_F64 I32:$cond, F64:$rhs, F64:$lhs)>; diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td index 513c36fa2ec2..8008dd32353a 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td @@ -1,4 +1,4 @@ -// WebAssemblyInstrFormats.td - WebAssembly Instruction Formats -*- tblgen -*-// +//=- WebAssemblyInstrFormats.td - WebAssembly Instr. Formats -*- tablegen -*-=// // // The LLVM Compiler Infrastructure // @@ -12,44 +12,68 @@ /// //===----------------------------------------------------------------------===// -// WebAssembly Instruction Format -class WebAssemblyInst<string cstr> : Instruction { +// WebAssembly Instruction Format. +class WebAssemblyInst<string asmstr> : Instruction { field bits<0> Inst; // Instruction encoding. let Namespace = "WebAssembly"; let Pattern = []; - let Constraints = cstr; + let AsmString = asmstr; } -// Normal instructions -class I<dag oops, dag iops, list<dag> pattern, string cstr = ""> - : WebAssemblyInst<cstr> { +// Normal instructions. +class I<dag oops, dag iops, list<dag> pattern, string asmstr = ""> + : WebAssemblyInst<asmstr> { dag OutOperandList = oops; dag InOperandList = iops; let Pattern = pattern; } // Unary and binary instructions, for the local types that WebAssembly supports. -multiclass UnaryInt<SDNode node> { - def _I32 : I<(outs Int32:$dst), (ins Int32:$src), - [(set Int32:$dst, (node Int32:$src))]>; - def _I64 : I<(outs Int64:$dst), (ins Int64:$src), - [(set Int64:$dst, (node Int64:$src))]>; -} -multiclass BinaryInt<SDNode node> { - def _I32 : I<(outs Int32:$dst), (ins Int32:$lhs, Int32:$rhs), - [(set Int32:$dst, (node Int32:$lhs, Int32:$rhs))]>; - def _I64 : I<(outs Int64:$dst), (ins Int64:$lhs, Int64:$rhs), - [(set Int64:$dst, (node Int64:$lhs, Int64:$rhs))]>; -} -multiclass UnaryFP<SDNode node> { - def _F32 : I<(outs Float32:$dst), (ins Float32:$src), - [(set Float32:$dst, (node Float32:$src))]>; - def _F64 : I<(outs Float64:$dst), (ins Float64:$src), - [(set Float64:$dst, (node Float64:$src))]>; -} -multiclass BinaryFP<SDNode node> { - def _F32 : I<(outs Float32:$dst), (ins Float32:$lhs, Float32:$rhs), - [(set Float32:$dst, (node Float32:$lhs, Float32:$rhs))]>; - def _F64 : I<(outs Float64:$dst), (ins Float64:$lhs, Float64:$rhs), - [(set Float64:$dst, (node Float64:$lhs, Float64:$rhs))]>; +multiclass UnaryInt<SDNode node, string name> { + def _I32 : I<(outs I32:$dst), (ins I32:$src), + [(set I32:$dst, (node I32:$src))], + !strconcat("i32.", !strconcat(name, "\t$dst, $src"))>; + def _I64 : I<(outs I64:$dst), (ins I64:$src), + [(set I64:$dst, (node I64:$src))], + !strconcat("i64.", !strconcat(name, "\t$dst, $src"))>; +} +multiclass BinaryInt<SDNode node, string name> { + def _I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs), + [(set I32:$dst, (node I32:$lhs, I32:$rhs))], + !strconcat("i32.", !strconcat(name, "\t$dst, $lhs, $rhs"))>; + def _I64 : I<(outs I64:$dst), (ins I64:$lhs, I64:$rhs), + [(set I64:$dst, (node I64:$lhs, I64:$rhs))], + !strconcat("i64.", !strconcat(name, "\t$dst, $lhs, $rhs"))>; +} +multiclass UnaryFP<SDNode node, string name> { + def _F32 : I<(outs F32:$dst), (ins F32:$src), + [(set F32:$dst, (node F32:$src))], + !strconcat("f32.", !strconcat(name, "\t$dst, $src"))>; + def _F64 : I<(outs F64:$dst), (ins F64:$src), + [(set F64:$dst, (node F64:$src))], + !strconcat("f64.", !strconcat(name, "\t$dst, $src"))>; +} +multiclass BinaryFP<SDNode node, string name> { + def _F32 : I<(outs F32:$dst), (ins F32:$lhs, F32:$rhs), + [(set F32:$dst, (node F32:$lhs, F32:$rhs))], + !strconcat("f32.", !strconcat(name, "\t$dst, $lhs, $rhs"))>; + def _F64 : I<(outs F64:$dst), (ins F64:$lhs, F64:$rhs), + [(set F64:$dst, (node F64:$lhs, F64:$rhs))], + !strconcat("f64.", !strconcat(name, "\t$dst, $lhs, $rhs"))>; +} +multiclass ComparisonInt<CondCode cond, string name> { + def _I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs), + [(set I32:$dst, (setcc I32:$lhs, I32:$rhs, cond))], + !strconcat("i32.", !strconcat(name, "\t$dst, $lhs, $rhs"))>; + def _I64 : I<(outs I32:$dst), (ins I64:$lhs, I64:$rhs), + [(set I32:$dst, (setcc I64:$lhs, I64:$rhs, cond))], + !strconcat("i64.", !strconcat(name, "\t$dst, $lhs, $rhs"))>; +} +multiclass ComparisonFP<CondCode cond, string name> { + def _F32 : I<(outs I32:$dst), (ins F32:$lhs, F32:$rhs), + [(set I32:$dst, (setcc F32:$lhs, F32:$rhs, cond))], + !strconcat("f32.", !strconcat(name, "\t$dst, $lhs, $rhs"))>; + def _F64 : I<(outs I32:$dst), (ins F64:$lhs, F64:$rhs), + [(set I32:$dst, (setcc F64:$lhs, F64:$rhs, cond))], + !strconcat("f64.", !strconcat(name, "\t$dst, $lhs, $rhs"))>; } diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp index ea8937c8f9f2..5e7663cdb506 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp @@ -24,5 +24,136 @@ using namespace llvm; #define DEBUG_TYPE "wasm-instr-info" +#define GET_INSTRINFO_CTOR_DTOR +#include "WebAssemblyGenInstrInfo.inc" + WebAssemblyInstrInfo::WebAssemblyInstrInfo(const WebAssemblySubtarget &STI) - : RI(STI.getTargetTriple()) {} + : WebAssemblyGenInstrInfo(WebAssembly::ADJCALLSTACKDOWN, + WebAssembly::ADJCALLSTACKUP), + RI(STI.getTargetTriple()) {} + +void WebAssemblyInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + DebugLoc DL, unsigned DestReg, + unsigned SrcReg, bool KillSrc) const { + // This method is called by post-RA expansion, which expects only pregs to + // exist. However we need to handle both here. + auto &MRI = MBB.getParent()->getRegInfo(); + const TargetRegisterClass *RC = TargetRegisterInfo::isVirtualRegister(DestReg) ? + MRI.getRegClass(DestReg) : + MRI.getTargetRegisterInfo()->getMinimalPhysRegClass(SrcReg); + + unsigned CopyLocalOpcode; + if (RC == &WebAssembly::I32RegClass) + CopyLocalOpcode = WebAssembly::COPY_LOCAL_I32; + else if (RC == &WebAssembly::I64RegClass) + CopyLocalOpcode = WebAssembly::COPY_LOCAL_I64; + else if (RC == &WebAssembly::F32RegClass) + CopyLocalOpcode = WebAssembly::COPY_LOCAL_F32; + else if (RC == &WebAssembly::F64RegClass) + CopyLocalOpcode = WebAssembly::COPY_LOCAL_F64; + else + llvm_unreachable("Unexpected register class"); + + BuildMI(MBB, I, DL, get(CopyLocalOpcode), DestReg) + .addReg(SrcReg, KillSrc ? RegState::Kill : 0); +} + +// Branch analysis. +bool WebAssemblyInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool /*AllowModify*/) const { + bool HaveCond = false; + for (MachineInstr &MI : MBB.terminators()) { + switch (MI.getOpcode()) { + default: + // Unhandled instruction; bail out. + return true; + case WebAssembly::BR_IF: + if (HaveCond) + return true; + Cond.push_back(MachineOperand::CreateImm(true)); + Cond.push_back(MI.getOperand(0)); + TBB = MI.getOperand(1).getMBB(); + HaveCond = true; + break; + case WebAssembly::BR_UNLESS: + if (HaveCond) + return true; + Cond.push_back(MachineOperand::CreateImm(false)); + Cond.push_back(MI.getOperand(0)); + TBB = MI.getOperand(1).getMBB(); + HaveCond = true; + break; + case WebAssembly::BR: + if (!HaveCond) + TBB = MI.getOperand(0).getMBB(); + else + FBB = MI.getOperand(0).getMBB(); + break; + } + if (MI.isBarrier()) + break; + } + + return false; +} + +unsigned WebAssemblyInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { + MachineBasicBlock::instr_iterator I = MBB.instr_end(); + unsigned Count = 0; + + while (I != MBB.instr_begin()) { + --I; + if (I->isDebugValue()) + continue; + if (!I->isTerminator()) + break; + // Remove the branch. + I->eraseFromParent(); + I = MBB.instr_end(); + ++Count; + } + + return Count; +} + +unsigned WebAssemblyInstrInfo::InsertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + ArrayRef<MachineOperand> Cond, + DebugLoc DL) const { + if (Cond.empty()) { + if (!TBB) + return 0; + + BuildMI(&MBB, DL, get(WebAssembly::BR)).addMBB(TBB); + return 1; + } + + assert(Cond.size() == 2 && "Expected a flag and a successor block"); + + if (Cond[0].getImm()) { + BuildMI(&MBB, DL, get(WebAssembly::BR_IF)) + .addOperand(Cond[1]) + .addMBB(TBB); + } else { + BuildMI(&MBB, DL, get(WebAssembly::BR_UNLESS)) + .addOperand(Cond[1]) + .addMBB(TBB); + } + if (!FBB) + return 1; + + BuildMI(&MBB, DL, get(WebAssembly::BR)).addMBB(FBB); + return 2; +} + +bool WebAssemblyInstrInfo::ReverseBranchCondition( + SmallVectorImpl<MachineOperand> &Cond) const { + assert(Cond.size() == 2 && "Expected a flag and a successor block"); + Cond.front() = MachineOperand::CreateImm(!Cond.front().getImm()); + return false; +} diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h index 1c4ae22f16d6..5ddd9b36f243 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h @@ -19,17 +19,35 @@ #include "WebAssemblyRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" +#define GET_INSTRINFO_HEADER +#include "WebAssemblyGenInstrInfo.inc" + namespace llvm { class WebAssemblySubtarget; -class WebAssemblyInstrInfo final { +class WebAssemblyInstrInfo final : public WebAssemblyGenInstrInfo { const WebAssemblyRegisterInfo RI; public: explicit WebAssemblyInstrInfo(const WebAssemblySubtarget &STI); const WebAssemblyRegisterInfo &getRegisterInfo() const { return RI; } + + void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + DebugLoc DL, unsigned DestReg, unsigned SrcReg, + bool KillSrc) const override; + + bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify = false) const override; + unsigned RemoveBranch(MachineBasicBlock &MBB) const override; + unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, + DebugLoc DL) const override; + bool + ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override; }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td index fe3ca76dc08a..f0b4ce7caf51 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td @@ -25,20 +25,48 @@ def HasSIMD128 : Predicate<"Subtarget->hasSIMD128()">, // WebAssembly-specific DAG Node Types. //===----------------------------------------------------------------------===// +def SDT_WebAssemblyCallSeqStart : SDCallSeqStart<[SDTCisVT<0, iPTR>]>; +def SDT_WebAssemblyCallSeqEnd : + SDCallSeqEnd<[SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>; +def SDT_WebAssemblyCall0 : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; +def SDT_WebAssemblyCall1 : SDTypeProfile<1, -1, [SDTCisPtrTy<1>]>; +def SDT_WebAssemblyTableswitch : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; +def SDT_WebAssemblyArgument : SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>; +def SDT_WebAssemblyReturn : SDTypeProfile<0, -1, []>; +def SDT_WebAssemblyWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, + SDTCisPtrTy<0>]>; + //===----------------------------------------------------------------------===// // WebAssembly-specific DAG Nodes. //===----------------------------------------------------------------------===// +def WebAssemblycallseq_start : + SDNode<"ISD::CALLSEQ_START", SDT_WebAssemblyCallSeqStart, + [SDNPHasChain, SDNPOutGlue]>; +def WebAssemblycallseq_end : + SDNode<"ISD::CALLSEQ_END", SDT_WebAssemblyCallSeqEnd, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; +def WebAssemblycall0 : SDNode<"WebAssemblyISD::CALL0", + SDT_WebAssemblyCall0, + [SDNPHasChain, SDNPVariadic]>; +def WebAssemblycall1 : SDNode<"WebAssemblyISD::CALL1", + SDT_WebAssemblyCall1, + [SDNPHasChain, SDNPVariadic]>; +def WebAssemblytableswitch : SDNode<"WebAssemblyISD::TABLESWITCH", + SDT_WebAssemblyTableswitch, + [SDNPHasChain, SDNPVariadic]>; +def WebAssemblyargument : SDNode<"WebAssemblyISD::ARGUMENT", + SDT_WebAssemblyArgument>; +def WebAssemblyreturn : SDNode<"WebAssemblyISD::RETURN", + SDT_WebAssemblyReturn, [SDNPHasChain]>; +def WebAssemblywrapper : SDNode<"WebAssemblyISD::Wrapper", + SDT_WebAssemblyWrapper>; + //===----------------------------------------------------------------------===// // WebAssembly-specific Operands. //===----------------------------------------------------------------------===// -/* - * TODO(jfb): Add the following. - * - * get_local: read the current value of a local variable - * set_local: set the current value of a local variable -*/ +def bb_op : Operand<OtherVT>; //===----------------------------------------------------------------------===// // WebAssembly Instruction Format Definitions. @@ -47,13 +75,86 @@ def HasSIMD128 : Predicate<"Subtarget->hasSIMD128()">, include "WebAssemblyInstrFormats.td" //===----------------------------------------------------------------------===// +// Additional instructions. +//===----------------------------------------------------------------------===// + +multiclass ARGUMENT<WebAssemblyRegClass vt> { + let hasSideEffects = 1, Uses = [ARGUMENTS], isCodeGenOnly = 1 in + def ARGUMENT_#vt : I<(outs vt:$res), (ins i32imm:$argno), + [(set vt:$res, (WebAssemblyargument timm:$argno))]>; +} +defm : ARGUMENT<I32>; +defm : ARGUMENT<I64>; +defm : ARGUMENT<F32>; +defm : ARGUMENT<F64>; + +let Defs = [ARGUMENTS] in { + +// get_local and set_local are not generated by instruction selection; they +// are implied by virtual register uses and defs in most contexts. However, +// they are explicitly emitted for special purposes. +multiclass LOCAL<WebAssemblyRegClass vt> { + def GET_LOCAL_#vt : I<(outs vt:$res), (ins i32imm:$regno), [], + "get_local\t$res, $regno">; + // TODO: set_local returns its operand value + def SET_LOCAL_#vt : I<(outs), (ins i32imm:$regno, vt:$src), [], + "set_local\t$regno, $src">; + + // COPY_LOCAL is not an actual instruction in wasm, but since we allow + // get_local and set_local to be implicit, we can have a COPY_LOCAL which + // is actually a no-op because all the work is done in the implied + // get_local and set_local. + let isAsCheapAsAMove = 1 in + def COPY_LOCAL_#vt : I<(outs vt:$res), (ins vt:$src), [], + "copy_local\t$res, $src">; +} +defm : LOCAL<I32>; +defm : LOCAL<I64>; +defm : LOCAL<F32>; +defm : LOCAL<F64>; + +let isMoveImm = 1 in { +def CONST_I32 : I<(outs I32:$res), (ins i32imm:$imm), + [(set I32:$res, imm:$imm)], + "i32.const\t$res, $imm">; +def CONST_I64 : I<(outs I64:$res), (ins i64imm:$imm), + [(set I64:$res, imm:$imm)], + "i64.const\t$res, $imm">; +def CONST_F32 : I<(outs F32:$res), (ins f32imm:$imm), + [(set F32:$res, fpimm:$imm)], + "f32.const\t$res, $imm">; +def CONST_F64 : I<(outs F64:$res), (ins f64imm:$imm), + [(set F64:$res, fpimm:$imm)], + "f64.const\t$res, $imm">; +} // isMoveImm = 1 + +} // Defs = [ARGUMENTS] + +def : Pat<(i32 (WebAssemblywrapper tglobaladdr:$dst)), + (CONST_I32 tglobaladdr:$dst)>; +def : Pat<(i32 (WebAssemblywrapper texternalsym:$dst)), + (CONST_I32 texternalsym:$dst)>; +def : Pat<(i32 (WebAssemblywrapper tjumptable:$dst)), + (CONST_I32 tjumptable:$dst)>; + +let Defs = [ARGUMENTS] in { + +// Function signature and local variable declaration "instructions". +def PARAM : I<(outs), (ins variable_ops), [], ".param \t">; +def RESULT : I<(outs), (ins variable_ops), [], ".result \t">; +def LOCAL : I<(outs), (ins variable_ops), [], ".local \t">; + +} // Defs = [ARGUMENTS] + +//===----------------------------------------------------------------------===// // Additional sets of instructions. //===----------------------------------------------------------------------===// include "WebAssemblyInstrMemory.td" include "WebAssemblyInstrCall.td" +include "WebAssemblyInstrControl.td" include "WebAssemblyInstrInteger.td" -include "WebAssemblyInstrFloat.td" include "WebAssemblyInstrConv.td" +include "WebAssemblyInstrFloat.td" include "WebAssemblyInstrAtomics.td" include "WebAssemblyInstrSIMD.td" diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td index 5f60fe81b1a2..09e5eafb85e9 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td @@ -12,34 +12,77 @@ /// //===----------------------------------------------------------------------===// -defm ADD : BinaryInt<add>; -defm SUB : BinaryInt<sub>; -defm MUL : BinaryInt<mul>; -defm SDIV : BinaryInt<sdiv>; -defm UDIV : BinaryInt<udiv>; -defm SREM : BinaryInt<srem>; -defm UREM : BinaryInt<urem>; -defm AND : BinaryInt<and>; -defm IOR : BinaryInt<or>; -defm XOR : BinaryInt<xor>; -defm SHL : BinaryInt<shl>; -defm SHR : BinaryInt<srl>; -defm SAR : BinaryInt<sra>; - -/* - * TODO(jfb): Add the following for 32-bit and 64-bit. - * - * int32.eq: signed-less compare equal - * int32.slt: signed less than - * int32.sle: signed less than or equal - * int32.ult: unsigned less than - * int32.ule: unsigned less than or equal - * int32.sgt: signed greater than - * int32.sge: signed greater than or equal - * int32.ugt: unsigned greater than - * int32.uge: unsigned greater than or equal - */ - -defm CLZ : UnaryInt<ctlz>; -defm CTZ : UnaryInt<cttz>; -defm POPCNT : UnaryInt<ctpop>; +let Defs = [ARGUMENTS] in { + +// The spaces after the names are for aesthetic purposes only, to make +// operands line up vertically after tab expansion. +let isCommutable = 1 in +defm ADD : BinaryInt<add, "add ">; +defm SUB : BinaryInt<sub, "sub ">; +let isCommutable = 1 in +defm MUL : BinaryInt<mul, "mul ">; +// Divide and remainder trap on a zero denominator. +let hasSideEffects = 1 in { +defm DIV_S : BinaryInt<sdiv, "div_s">; +defm DIV_U : BinaryInt<udiv, "div_u">; +defm REM_S : BinaryInt<srem, "rem_s">; +defm REM_U : BinaryInt<urem, "rem_u">; +} // hasSideEffects = 1 +let isCommutable = 1 in { +defm AND : BinaryInt<and, "and ">; +defm OR : BinaryInt<or, "or ">; +defm XOR : BinaryInt<xor, "xor ">; +} // isCommutable = 1 +defm SHL : BinaryInt<shl, "shl ">; +defm SHR_U : BinaryInt<srl, "shr_u">; +defm SHR_S : BinaryInt<sra, "shr_s">; + +let isCommutable = 1 in { +defm EQ : ComparisonInt<SETEQ, "eq ">; +defm NE : ComparisonInt<SETNE, "ne ">; +} // isCommutable = 1 +defm LT_S : ComparisonInt<SETLT, "lt_s">; +defm LE_S : ComparisonInt<SETLE, "le_s">; +defm LT_U : ComparisonInt<SETULT, "lt_u">; +defm LE_U : ComparisonInt<SETULE, "le_u">; +defm GT_S : ComparisonInt<SETGT, "gt_s">; +defm GE_S : ComparisonInt<SETGE, "ge_s">; +defm GT_U : ComparisonInt<SETUGT, "gt_u">; +defm GE_U : ComparisonInt<SETUGE, "ge_u">; + +defm CLZ : UnaryInt<ctlz, "clz ">; +defm CTZ : UnaryInt<cttz, "ctz ">; +defm POPCNT : UnaryInt<ctpop, "popcnt">; + +} // Defs = [ARGUMENTS] + +// Expand the "don't care" operations to supported operations. +def : Pat<(ctlz_zero_undef I32:$src), (CLZ_I32 I32:$src)>; +def : Pat<(ctlz_zero_undef I64:$src), (CLZ_I64 I64:$src)>; +def : Pat<(cttz_zero_undef I32:$src), (CTZ_I32 I32:$src)>; +def : Pat<(cttz_zero_undef I64:$src), (CTZ_I64 I64:$src)>; + +let Defs = [ARGUMENTS] in { + +def SELECT_I32 : I<(outs I32:$dst), (ins I32:$cond, I32:$lhs, I32:$rhs), + [(set I32:$dst, (select I32:$cond, I32:$lhs, I32:$rhs))], + "i32.select\t$dst, $cond, $lhs, $rhs">; +def SELECT_I64 : I<(outs I64:$dst), (ins I32:$cond, I64:$lhs, I64:$rhs), + [(set I64:$dst, (select I32:$cond, I64:$lhs, I64:$rhs))], + "i64.select\t$dst, $cond, $lhs, $rhs">; + +} // Defs = [ARGUMENTS] + +// ISD::SELECT requires its operand to conform to getBooleanContents, but +// WebAssembly's select interprets any non-zero value as true, so we can fold +// a setne with 0 into a select. +def : Pat<(select (i32 (setne I32:$cond, 0)), I32:$lhs, I32:$rhs), + (SELECT_I32 I32:$cond, I32:$lhs, I32:$rhs)>; +def : Pat<(select (i32 (setne I32:$cond, 0)), I64:$lhs, I64:$rhs), + (SELECT_I64 I32:$cond, I64:$lhs, I64:$rhs)>; + +// And again, this time with seteq instead of setne and the arms reversed. +def : Pat<(select (i32 (seteq I32:$cond, 0)), I32:$lhs, I32:$rhs), + (SELECT_I32 I32:$cond, I32:$rhs, I32:$lhs)>; +def : Pat<(select (i32 (seteq I32:$cond, 0)), I64:$lhs, I64:$rhs), + (SELECT_I64 I32:$cond, I64:$rhs, I64:$lhs)>; diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td index 5ab40e826caa..74ec45d58644 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td @@ -12,35 +12,500 @@ /// //===----------------------------------------------------------------------===// -/* - * TODO(jfb): Add the following. - * Each has optional alignment and immediate byte offset. - * - * int32.load_sx[int8]: sign-extend to int32 - * int32.load_sx[int16]: sign-extend to int32 - * int32.load_zx[int8]: zero-extend to int32 - * int32.load_zx[int16]: zero-extend to int32 - * int32.load[int32]: (no conversion) - * int64.load_sx[int8]: sign-extend to int64 - * int64.load_sx[int16]: sign-extend to int64 - * int64.load_sx[int32]: sign-extend to int64 - * int64.load_zx[int8]: zero-extend to int64 - * int64.load_zx[int16]: zero-extend to int64 - * int64.load_zx[int32]: zero-extend to int64 - * int64.load[int64]: (no conversion) - * float32.load[float32]: (no conversion) - * float64.load[float64]: (no conversion) - * - * int32.store[int8]: wrap int32 to int8 - * int32.store[int16]: wrap int32 to int16 - * int32.store[int32]: (no conversion) - * int64.store[int8]: wrap int64 to int8 - * int64.store[int16]: wrap int64 to int16 - * int64.store[int32]: wrap int64 to int32 - * int64.store[int64]: (no conversion) - * float32.store[float32]: (no conversion) - * float64.store[float64]: (no conversion) - * - * load_global: load the value of a given global variable - * store_global: store a given value to a given global variable - */ +// TODO: +// - HasAddr64 +// - WebAssemblyTargetLowering having to do with atomics +// - Each has optional alignment. + +// WebAssembly has i8/i16/i32/i64/f32/f64 memory types, but doesn't have i8/i16 +// local types. These memory-only types instead zero- or sign-extend into local +// types when loading, and truncate when storing. + +// WebAssembly constant offsets are performed as unsigned with infinite +// precision, so we need to check for NoUnsignedWrap so that we don't fold an +// offset for an add that needs wrapping. +def regPlusImm : PatFrag<(ops node:$off, node:$addr), + (add node:$addr, node:$off), + [{ return N->getFlags()->hasNoUnsignedWrap(); }]>; + +let Defs = [ARGUMENTS] in { + +// Basic load. +def LOAD_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [], + "i32.load\t$dst, ${off}(${addr})">; +def LOAD_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [], + "i64.load\t$dst, ${off}(${addr})">; +def LOAD_F32 : I<(outs F32:$dst), (ins i32imm:$off, I32:$addr), [], + "f32.load\t$dst, ${off}(${addr})">; +def LOAD_F64 : I<(outs F64:$dst), (ins i32imm:$off, I32:$addr), [], + "f64.load\t$dst, ${off}(${addr})">; + +} // Defs = [ARGUMENTS] + +// Select loads with no constant offset. +def : Pat<(i32 (load I32:$addr)), (LOAD_I32 0, $addr)>; +def : Pat<(i64 (load I32:$addr)), (LOAD_I64 0, $addr)>; +def : Pat<(f32 (load I32:$addr)), (LOAD_F32 0, $addr)>; +def : Pat<(f64 (load I32:$addr)), (LOAD_F64 0, $addr)>; + +// Select loads with a constant offset. +def : Pat<(i32 (load (regPlusImm imm:$off, I32:$addr))), + (LOAD_I32 imm:$off, $addr)>; +def : Pat<(i64 (load (regPlusImm imm:$off, I32:$addr))), + (LOAD_I64 imm:$off, $addr)>; +def : Pat<(f32 (load (regPlusImm imm:$off, I32:$addr))), + (LOAD_F32 imm:$off, $addr)>; +def : Pat<(f64 (load (regPlusImm imm:$off, I32:$addr))), + (LOAD_F64 imm:$off, $addr)>; +def : Pat<(i32 (load (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD_I32 tglobaladdr:$off, $addr)>; +def : Pat<(i64 (load (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD_I64 tglobaladdr:$off, $addr)>; +def : Pat<(f32 (load (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD_F32 tglobaladdr:$off, $addr)>; +def : Pat<(f64 (load (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD_F64 tglobaladdr:$off, $addr)>; +def : Pat<(i32 (load (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD_I32 texternalsym:$off, $addr)>; +def : Pat<(i64 (load (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD_I64 texternalsym:$off, $addr)>; +def : Pat<(f32 (load (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD_F32 texternalsym:$off, $addr)>; +def : Pat<(f64 (load (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD_F64 texternalsym:$off, $addr)>; + +// Select loads with just a constant offset. +def : Pat<(i32 (load imm:$off)), (LOAD_I32 imm:$off, (CONST_I32 0))>; +def : Pat<(i64 (load imm:$off)), (LOAD_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(f32 (load imm:$off)), (LOAD_F32 imm:$off, (CONST_I32 0))>; +def : Pat<(f64 (load imm:$off)), (LOAD_F64 imm:$off, (CONST_I32 0))>; +def : Pat<(i32 (load (WebAssemblywrapper tglobaladdr:$off))), + (LOAD_I32 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i64 (load (WebAssemblywrapper tglobaladdr:$off))), + (LOAD_I64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(f32 (load (WebAssemblywrapper tglobaladdr:$off))), + (LOAD_F32 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(f64 (load (WebAssemblywrapper tglobaladdr:$off))), + (LOAD_F64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i32 (load (WebAssemblywrapper texternalsym:$off))), + (LOAD_I32 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i64 (load (WebAssemblywrapper texternalsym:$off))), + (LOAD_I64 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(f32 (load (WebAssemblywrapper texternalsym:$off))), + (LOAD_F32 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(f64 (load (WebAssemblywrapper texternalsym:$off))), + (LOAD_F64 texternalsym:$off, (CONST_I32 0))>; + +let Defs = [ARGUMENTS] in { + +// Extending load. +def LOAD8_S_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [], + "i32.load8_s\t$dst, ${off}(${addr})">; +def LOAD8_U_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [], + "i32.load8_u\t$dst, ${off}(${addr})">; +def LOAD16_S_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [], + "i32.load16_s\t$dst, ${off}(${addr})">; +def LOAD16_U_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [], + "i32.load16_u\t$dst, ${off}(${addr})">; +def LOAD8_S_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [], + "i64.load8_s\t$dst, ${off}(${addr})">; +def LOAD8_U_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [], + "i64.load8_u\t$dst, ${off}(${addr})">; +def LOAD16_S_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [], + "i64.load16_s\t$dst, ${off}(${addr})">; +def LOAD16_U_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [], + "i64.load16_u\t$dst, ${off}(${addr})">; +def LOAD32_S_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [], + "i64.load32_s\t$dst, ${off}(${addr})">; +def LOAD32_U_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [], + "i64.load32_u\t$dst, ${off}(${addr})">; + +} // Defs = [ARGUMENTS] + +// Select extending loads with no constant offset. +def : Pat<(i32 (sextloadi8 I32:$addr)), (LOAD8_S_I32 0, $addr)>; +def : Pat<(i32 (zextloadi8 I32:$addr)), (LOAD8_U_I32 0, $addr)>; +def : Pat<(i32 (sextloadi16 I32:$addr)), (LOAD16_S_I32 0, $addr)>; +def : Pat<(i32 (zextloadi16 I32:$addr)), (LOAD16_U_I32 0, $addr)>; +def : Pat<(i64 (sextloadi8 I32:$addr)), (LOAD8_S_I64 0, $addr)>; +def : Pat<(i64 (zextloadi8 I32:$addr)), (LOAD8_U_I64 0, $addr)>; +def : Pat<(i64 (sextloadi16 I32:$addr)), (LOAD16_S_I64 0, $addr)>; +def : Pat<(i64 (zextloadi16 I32:$addr)), (LOAD16_U_I64 0, $addr)>; +def : Pat<(i64 (sextloadi32 I32:$addr)), (LOAD32_S_I64 0, $addr)>; +def : Pat<(i64 (zextloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr)>; + +// Select extending loads with a constant offset. +def : Pat<(i32 (sextloadi8 (regPlusImm imm:$off, I32:$addr))), + (LOAD8_S_I32 imm:$off, $addr)>; +def : Pat<(i32 (zextloadi8 (regPlusImm imm:$off, I32:$addr))), + (LOAD8_U_I32 imm:$off, $addr)>; +def : Pat<(i32 (sextloadi16 (regPlusImm imm:$off, I32:$addr))), + (LOAD16_S_I32 imm:$off, $addr)>; +def : Pat<(i32 (zextloadi16 (regPlusImm imm:$off, I32:$addr))), + (LOAD16_U_I32 imm:$off, $addr)>; +def : Pat<(i64 (sextloadi8 (regPlusImm imm:$off, I32:$addr))), + (LOAD8_S_I64 imm:$off, $addr)>; +def : Pat<(i64 (zextloadi8 (regPlusImm imm:$off, I32:$addr))), + (LOAD8_U_I64 imm:$off, $addr)>; +def : Pat<(i64 (sextloadi16 (regPlusImm imm:$off, I32:$addr))), + (LOAD16_S_I64 imm:$off, $addr)>; +def : Pat<(i64 (zextloadi16 (regPlusImm imm:$off, I32:$addr))), + (LOAD16_U_I64 imm:$off, $addr)>; +def : Pat<(i64 (sextloadi32 (regPlusImm imm:$off, I32:$addr))), + (LOAD32_S_I64 imm:$off, $addr)>; +def : Pat<(i64 (zextloadi32 (regPlusImm imm:$off, I32:$addr))), + (LOAD32_U_I64 imm:$off, $addr)>; +def : Pat<(i32 (sextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD8_S_I32 tglobaladdr:$off, $addr)>; +def : Pat<(i32 (zextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD8_U_I32 tglobaladdr:$off, $addr)>; +def : Pat<(i32 (sextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD16_S_I32 tglobaladdr:$off, $addr)>; +def : Pat<(i32 (zextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD16_U_I32 tglobaladdr:$off, $addr)>; +def : Pat<(i64 (sextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD8_S_I64 tglobaladdr:$off, $addr)>; +def : Pat<(i64 (zextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD8_U_I64 tglobaladdr:$off, $addr)>; +def : Pat<(i64 (sextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD16_S_I64 tglobaladdr:$off, $addr)>; +def : Pat<(i64 (zextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD16_U_I64 tglobaladdr:$off, $addr)>; +def : Pat<(i64 (sextloadi32 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD32_S_I64 tglobaladdr:$off, $addr)>; +def : Pat<(i64 (zextloadi32 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD32_U_I64 tglobaladdr:$off, $addr)>; +def : Pat<(i32 (sextloadi8 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD8_S_I32 texternalsym:$off, $addr)>; +def : Pat<(i32 (zextloadi8 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD8_U_I32 texternalsym:$off, $addr)>; +def : Pat<(i32 (sextloadi16 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD16_S_I32 texternalsym:$off, $addr)>; +def : Pat<(i32 (zextloadi16 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD16_U_I32 texternalsym:$off, $addr)>; +def : Pat<(i64 (sextloadi8 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD8_S_I64 texternalsym:$off, $addr)>; +def : Pat<(i64 (zextloadi8 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD8_U_I64 texternalsym:$off, $addr)>; +def : Pat<(i64 (sextloadi16 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD16_S_I64 texternalsym:$off, $addr)>; +def : Pat<(i64 (zextloadi16 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD16_U_I64 texternalsym:$off, $addr)>; +def : Pat<(i64 (sextloadi32 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD32_S_I64 texternalsym:$off, $addr)>; +def : Pat<(i64 (zextloadi32 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD32_U_I64 texternalsym:$off, $addr)>; + +// Select extending loads with just a constant offset. +def : Pat<(i32 (sextloadi8 imm:$off)), (LOAD8_S_I32 imm:$off, (CONST_I32 0))>; +def : Pat<(i32 (zextloadi8 imm:$off)), (LOAD8_U_I32 imm:$off, (CONST_I32 0))>; +def : Pat<(i32 (sextloadi16 imm:$off)), (LOAD16_S_I32 imm:$off, (CONST_I32 0))>; +def : Pat<(i32 (zextloadi16 imm:$off)), (LOAD16_U_I32 imm:$off, (CONST_I32 0))>; +def : Pat<(i64 (sextloadi8 imm:$off)), (LOAD8_S_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(i64 (zextloadi8 imm:$off)), (LOAD8_U_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(i64 (sextloadi16 imm:$off)), (LOAD16_S_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(i64 (zextloadi16 imm:$off)), (LOAD16_U_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(i64 (sextloadi32 imm:$off)), (LOAD32_S_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(i64 (zextloadi32 imm:$off)), (LOAD32_U_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(i32 (sextloadi8 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD8_S_I32 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i32 (zextloadi8 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD8_U_I32 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i32 (sextloadi16 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD16_S_I32 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i32 (zextloadi16 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD16_U_I32 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i64 (sextloadi8 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD8_S_I64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i64 (zextloadi8 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD8_U_I64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i64 (sextloadi16 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD16_S_I64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i64 (zextloadi16 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD16_U_I64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i64 (sextloadi32 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD32_S_I64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i64 (zextloadi32 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i32 (sextloadi8 (WebAssemblywrapper texternalsym:$off))), + (LOAD8_S_I32 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i32 (zextloadi8 (WebAssemblywrapper texternalsym:$off))), + (LOAD8_U_I32 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i32 (sextloadi16 (WebAssemblywrapper texternalsym:$off))), + (LOAD16_S_I32 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i32 (zextloadi16 (WebAssemblywrapper texternalsym:$off))), + (LOAD16_U_I32 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i64 (sextloadi8 (WebAssemblywrapper texternalsym:$off))), + (LOAD8_S_I64 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i64 (zextloadi8 (WebAssemblywrapper texternalsym:$off))), + (LOAD8_U_I64 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i64 (sextloadi16 (WebAssemblywrapper texternalsym:$off))), + (LOAD16_S_I64 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i64 (zextloadi16 (WebAssemblywrapper texternalsym:$off))), + (LOAD16_U_I64 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i64 (sextloadi32 (WebAssemblywrapper texternalsym:$off))), + (LOAD32_S_I64 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i64 (zextloadi32 (WebAssemblywrapper texternalsym:$off))), + (LOAD32_U_I64 texternalsym:$off, (CONST_I32 0))>; + +// Resolve "don't care" extending loads to zero-extending loads. This is +// somewhat arbitrary, but zero-extending is conceptually simpler. + +// Select "don't care" extending loads with no constant offset. +def : Pat<(i32 (extloadi8 I32:$addr)), (LOAD8_U_I32 0, $addr)>; +def : Pat<(i32 (extloadi16 I32:$addr)), (LOAD16_U_I32 0, $addr)>; +def : Pat<(i64 (extloadi8 I32:$addr)), (LOAD8_U_I64 0, $addr)>; +def : Pat<(i64 (extloadi16 I32:$addr)), (LOAD16_U_I64 0, $addr)>; +def : Pat<(i64 (extloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr)>; + +// Select "don't care" extending loads with a constant offset. +def : Pat<(i32 (extloadi8 (regPlusImm imm:$off, I32:$addr))), + (LOAD8_U_I32 imm:$off, $addr)>; +def : Pat<(i32 (extloadi16 (regPlusImm imm:$off, I32:$addr))), + (LOAD16_U_I32 imm:$off, $addr)>; +def : Pat<(i64 (extloadi8 (regPlusImm imm:$off, I32:$addr))), + (LOAD8_U_I64 imm:$off, $addr)>; +def : Pat<(i64 (extloadi16 (regPlusImm imm:$off, I32:$addr))), + (LOAD16_U_I64 imm:$off, $addr)>; +def : Pat<(i64 (extloadi32 (regPlusImm imm:$off, I32:$addr))), + (LOAD32_U_I64 imm:$off, $addr)>; +def : Pat<(i32 (extloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD8_U_I32 tglobaladdr:$off, $addr)>; +def : Pat<(i32 (extloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD16_U_I32 tglobaladdr:$off, $addr)>; +def : Pat<(i64 (extloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD8_U_I64 tglobaladdr:$off, $addr)>; +def : Pat<(i64 (extloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD16_U_I64 tglobaladdr:$off, $addr)>; +def : Pat<(i64 (extloadi32 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD32_U_I64 tglobaladdr:$off, $addr)>; +def : Pat<(i32 (extloadi8 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD8_U_I32 texternalsym:$off, $addr)>; +def : Pat<(i32 (extloadi16 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD16_U_I32 texternalsym:$off, $addr)>; +def : Pat<(i64 (extloadi8 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD8_U_I64 texternalsym:$off, $addr)>; +def : Pat<(i64 (extloadi16 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD16_U_I64 texternalsym:$off, $addr)>; +def : Pat<(i64 (extloadi32 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD32_U_I64 texternalsym:$off, $addr)>; + +// Select "don't care" extending loads with just a constant offset. +def : Pat<(i32 (extloadi8 imm:$off)), (LOAD8_U_I32 imm:$off, (CONST_I32 0))>; +def : Pat<(i32 (extloadi16 imm:$off)), (LOAD16_U_I32 imm:$off, (CONST_I32 0))>; +def : Pat<(i64 (extloadi8 imm:$off)), (LOAD8_U_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(i64 (extloadi16 imm:$off)), (LOAD16_U_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(i64 (extloadi32 imm:$off)), (LOAD32_U_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(i32 (extloadi8 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD8_U_I32 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i32 (extloadi16 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD16_U_I32 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i64 (extloadi8 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD8_U_I64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i64 (extloadi16 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD16_U_I64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i64 (extloadi32 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i32 (extloadi8 (WebAssemblywrapper texternalsym:$off))), + (LOAD8_U_I32 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i32 (extloadi16 (WebAssemblywrapper texternalsym:$off))), + (LOAD16_U_I32 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i64 (extloadi8 (WebAssemblywrapper texternalsym:$off))), + (LOAD8_U_I64 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i64 (extloadi16 (WebAssemblywrapper texternalsym:$off))), + (LOAD16_U_I64 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i64 (extloadi32 (WebAssemblywrapper texternalsym:$off))), + (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0))>; + +let Defs = [ARGUMENTS] in { + +// Basic store. +// Note that we split the patterns out of the instruction definitions because +// WebAssembly's stores return their operand value, and tablegen doesn't like +// instruction definition patterns that don't reference all of the output +// operands. +// Note: WebAssembly inverts SelectionDAG's usual operand order. +def STORE_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr, I32:$val), [], + "i32.store\t$dst, ${off}(${addr}), $val">; +def STORE_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, I64:$val), [], + "i64.store\t$dst, ${off}(${addr}), $val">; +def STORE_F32 : I<(outs F32:$dst), (ins i32imm:$off, I32:$addr, F32:$val), [], + "f32.store\t$dst, ${off}(${addr}), $val">; +def STORE_F64 : I<(outs F64:$dst), (ins i32imm:$off, I32:$addr, F64:$val), [], + "f64.store\t$dst, ${off}(${addr}), $val">; + +} // Defs = [ARGUMENTS] + +// Select stores with no constant offset. +def : Pat<(store I32:$val, I32:$addr), (STORE_I32 0, I32:$addr, I32:$val)>; +def : Pat<(store I64:$val, I32:$addr), (STORE_I64 0, I32:$addr, I64:$val)>; +def : Pat<(store F32:$val, I32:$addr), (STORE_F32 0, I32:$addr, F32:$val)>; +def : Pat<(store F64:$val, I32:$addr), (STORE_F64 0, I32:$addr, F64:$val)>; + +// Select stores with a constant offset. +def : Pat<(store I32:$val, (regPlusImm imm:$off, I32:$addr)), + (STORE_I32 imm:$off, I32:$addr, I32:$val)>; +def : Pat<(store I64:$val, (regPlusImm imm:$off, I32:$addr)), + (STORE_I64 imm:$off, I32:$addr, I64:$val)>; +def : Pat<(store F32:$val, (regPlusImm imm:$off, I32:$addr)), + (STORE_F32 imm:$off, I32:$addr, F32:$val)>; +def : Pat<(store F64:$val, (regPlusImm imm:$off, I32:$addr)), + (STORE_F64 imm:$off, I32:$addr, F64:$val)>; +def : Pat<(store I32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), + (STORE_I32 tglobaladdr:$off, I32:$addr, I32:$val)>; +def : Pat<(store I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), + (STORE_I64 tglobaladdr:$off, I32:$addr, I64:$val)>; +def : Pat<(store F32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), + (STORE_F32 tglobaladdr:$off, I32:$addr, F32:$val)>; +def : Pat<(store F64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), + (STORE_F64 tglobaladdr:$off, I32:$addr, F64:$val)>; +def : Pat<(store I32:$val, (regPlusImm texternalsym:$off, I32:$addr)), + (STORE_I32 texternalsym:$off, I32:$addr, I32:$val)>; +def : Pat<(store I64:$val, (regPlusImm texternalsym:$off, I32:$addr)), + (STORE_I64 texternalsym:$off, I32:$addr, I64:$val)>; +def : Pat<(store F32:$val, (regPlusImm texternalsym:$off, I32:$addr)), + (STORE_F32 texternalsym:$off, I32:$addr, F32:$val)>; +def : Pat<(store F64:$val, (regPlusImm texternalsym:$off, I32:$addr)), + (STORE_F64 texternalsym:$off, I32:$addr, F64:$val)>; + +// Select stores with just a constant offset. +def : Pat<(store I32:$val, imm:$off), + (STORE_I32 imm:$off, (CONST_I32 0), I32:$val)>; +def : Pat<(store I64:$val, imm:$off), + (STORE_I64 imm:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(store F32:$val, imm:$off), + (STORE_F32 imm:$off, (CONST_I32 0), F32:$val)>; +def : Pat<(store F64:$val, imm:$off), + (STORE_F64 imm:$off, (CONST_I32 0), F64:$val)>; +def : Pat<(store I32:$val, (WebAssemblywrapper tglobaladdr:$off)), + (STORE_I32 tglobaladdr:$off, (CONST_I32 0), I32:$val)>; +def : Pat<(store I64:$val, (WebAssemblywrapper tglobaladdr:$off)), + (STORE_I64 tglobaladdr:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(store F32:$val, (WebAssemblywrapper tglobaladdr:$off)), + (STORE_F32 tglobaladdr:$off, (CONST_I32 0), F32:$val)>; +def : Pat<(store F64:$val, (WebAssemblywrapper tglobaladdr:$off)), + (STORE_F64 tglobaladdr:$off, (CONST_I32 0), F64:$val)>; +def : Pat<(store I32:$val, (WebAssemblywrapper texternalsym:$off)), + (STORE_I32 texternalsym:$off, (CONST_I32 0), I32:$val)>; +def : Pat<(store I64:$val, (WebAssemblywrapper texternalsym:$off)), + (STORE_I64 texternalsym:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(store F32:$val, (WebAssemblywrapper texternalsym:$off)), + (STORE_F32 texternalsym:$off, (CONST_I32 0), F32:$val)>; +def : Pat<(store F64:$val, (WebAssemblywrapper texternalsym:$off)), + (STORE_F64 texternalsym:$off, (CONST_I32 0), F64:$val)>; + +let Defs = [ARGUMENTS] in { + +// Truncating store. +def STORE8_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr, I32:$val), [], + "i32.store8\t$dst, ${off}(${addr}), $val">; +def STORE16_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr, I32:$val), [], + "i32.store16\t$dst, ${off}(${addr}), $val">; +def STORE8_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, I64:$val), [], + "i64.store8\t$dst, ${off}(${addr}), $val">; +def STORE16_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, I64:$val), [], + "i64.store16\t$dst, ${off}(${addr}), $val">; +def STORE32_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, I64:$val), [], + "i64.store32\t$dst, ${off}(${addr}), $val">; + +} // Defs = [ARGUMENTS] + +// Select truncating stores with no constant offset. +def : Pat<(truncstorei8 I32:$val, I32:$addr), + (STORE8_I32 0, I32:$addr, I32:$val)>; +def : Pat<(truncstorei16 I32:$val, I32:$addr), + (STORE16_I32 0, I32:$addr, I32:$val)>; +def : Pat<(truncstorei8 I64:$val, I32:$addr), + (STORE8_I64 0, I32:$addr, I64:$val)>; +def : Pat<(truncstorei16 I64:$val, I32:$addr), + (STORE16_I64 0, I32:$addr, I64:$val)>; +def : Pat<(truncstorei32 I64:$val, I32:$addr), + (STORE32_I64 0, I32:$addr, I64:$val)>; + +// Select truncating stores with a constant offset. +def : Pat<(truncstorei8 I32:$val, (regPlusImm imm:$off, I32:$addr)), + (STORE8_I32 imm:$off, I32:$addr, I32:$val)>; +def : Pat<(truncstorei16 I32:$val, (regPlusImm imm:$off, I32:$addr)), + (STORE16_I32 imm:$off, I32:$addr, I32:$val)>; +def : Pat<(truncstorei8 I64:$val, (regPlusImm imm:$off, I32:$addr)), + (STORE8_I64 imm:$off, I32:$addr, I64:$val)>; +def : Pat<(truncstorei16 I64:$val, (regPlusImm imm:$off, I32:$addr)), + (STORE16_I64 imm:$off, I32:$addr, I64:$val)>; +def : Pat<(truncstorei32 I64:$val, (regPlusImm imm:$off, I32:$addr)), + (STORE32_I64 imm:$off, I32:$addr, I64:$val)>; +def : Pat<(truncstorei8 I32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), + (STORE8_I32 tglobaladdr:$off, I32:$addr, I32:$val)>; +def : Pat<(truncstorei16 I32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), + (STORE16_I32 tglobaladdr:$off, I32:$addr, I32:$val)>; +def : Pat<(truncstorei8 I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), + (STORE8_I64 tglobaladdr:$off, I32:$addr, I64:$val)>; +def : Pat<(truncstorei16 I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), + (STORE16_I64 tglobaladdr:$off, I32:$addr, I64:$val)>; +def : Pat<(truncstorei32 I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), + (STORE32_I64 tglobaladdr:$off, I32:$addr, I64:$val)>; +def : Pat<(truncstorei8 I32:$val, (regPlusImm texternalsym:$off, I32:$addr)), + (STORE8_I32 texternalsym:$off, I32:$addr, I32:$val)>; +def : Pat<(truncstorei16 I32:$val, (regPlusImm texternalsym:$off, I32:$addr)), + (STORE16_I32 texternalsym:$off, I32:$addr, I32:$val)>; +def : Pat<(truncstorei8 I64:$val, (regPlusImm texternalsym:$off, I32:$addr)), + (STORE8_I64 texternalsym:$off, I32:$addr, I64:$val)>; +def : Pat<(truncstorei16 I64:$val, (regPlusImm texternalsym:$off, I32:$addr)), + (STORE16_I64 texternalsym:$off, I32:$addr, I64:$val)>; +def : Pat<(truncstorei32 I64:$val, (regPlusImm texternalsym:$off, I32:$addr)), + (STORE32_I64 texternalsym:$off, I32:$addr, I64:$val)>; + +// Select truncating stores with just a constant offset. +def : Pat<(truncstorei8 I32:$val, imm:$off), + (STORE8_I32 imm:$off, (CONST_I32 0), I32:$val)>; +def : Pat<(truncstorei16 I32:$val, imm:$off), + (STORE16_I32 imm:$off, (CONST_I32 0), I32:$val)>; +def : Pat<(truncstorei8 I64:$val, imm:$off), + (STORE8_I64 imm:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(truncstorei16 I64:$val, imm:$off), + (STORE16_I64 imm:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(truncstorei32 I64:$val, imm:$off), + (STORE32_I64 imm:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(truncstorei8 I32:$val, (WebAssemblywrapper tglobaladdr:$off)), + (STORE8_I32 tglobaladdr:$off, (CONST_I32 0), I32:$val)>; +def : Pat<(truncstorei16 I32:$val, (WebAssemblywrapper tglobaladdr:$off)), + (STORE16_I32 tglobaladdr:$off, (CONST_I32 0), I32:$val)>; +def : Pat<(truncstorei8 I64:$val, (WebAssemblywrapper tglobaladdr:$off)), + (STORE8_I64 tglobaladdr:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(truncstorei16 I64:$val, (WebAssemblywrapper tglobaladdr:$off)), + (STORE16_I64 tglobaladdr:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(truncstorei32 I64:$val, (WebAssemblywrapper tglobaladdr:$off)), + (STORE32_I64 tglobaladdr:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(truncstorei8 I32:$val, (WebAssemblywrapper texternalsym:$off)), + (STORE8_I32 texternalsym:$off, (CONST_I32 0), I32:$val)>; +def : Pat<(truncstorei16 I32:$val, (WebAssemblywrapper texternalsym:$off)), + (STORE16_I32 texternalsym:$off, (CONST_I32 0), I32:$val)>; +def : Pat<(truncstorei8 I64:$val, (WebAssemblywrapper texternalsym:$off)), + (STORE8_I64 texternalsym:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(truncstorei16 I64:$val, (WebAssemblywrapper texternalsym:$off)), + (STORE16_I64 texternalsym:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(truncstorei32 I64:$val, (WebAssemblywrapper texternalsym:$off)), + (STORE32_I64 texternalsym:$off, (CONST_I32 0), I64:$val)>; + +let Defs = [ARGUMENTS] in { + +// Memory size. +def MEMORY_SIZE_I32 : I<(outs I32:$dst), (ins), + [(set I32:$dst, (int_wasm_memory_size))], + "memory_size\t$dst">, + Requires<[HasAddr32]>; +def MEMORY_SIZE_I64 : I<(outs I64:$dst), (ins), + [(set I64:$dst, (int_wasm_memory_size))], + "memory_size\t$dst">, + Requires<[HasAddr64]>; + +// Grow memory. +def GROW_MEMORY_I32 : I<(outs), (ins I32:$delta), + [(int_wasm_grow_memory I32:$delta)], + "grow_memory\t$delta">, + Requires<[HasAddr32]>; +def GROW_MEMORY_I64 : I<(outs), (ins I64:$delta), + [(int_wasm_grow_memory I64:$delta)], + "grow_memory\t$delta">, + Requires<[HasAddr64]>; + +} // Defs = [ARGUMENTS] diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp new file mode 100644 index 000000000000..b009a4e054cc --- /dev/null +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp @@ -0,0 +1,133 @@ +//===-- WebAssemblyLowerBrUnless.cpp - Lower br_unless --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file lowers br_unless into br_if with an inverted condition. +/// +/// br_unless is not currently in the spec, but it's very convenient for LLVM +/// to use. This pass allows LLVM to use it, for now. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-lower-br_unless" + +namespace { +class WebAssemblyLowerBrUnless final : public MachineFunctionPass { + const char *getPassName() const override { + return "WebAssembly Lower br_unless"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyLowerBrUnless() : MachineFunctionPass(ID) {} +}; +} // end anonymous namespace + +char WebAssemblyLowerBrUnless::ID = 0; +FunctionPass *llvm::createWebAssemblyLowerBrUnless() { + return new WebAssemblyLowerBrUnless(); +} + +bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "********** Lowering br_unless **********\n" + "********** Function: " + << MF.getName() << '\n'); + + auto &MFI = *MF.getInfo<WebAssemblyFunctionInfo>(); + const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); + auto &MRI = MF.getRegInfo(); + + for (auto &MBB : MF) { + for (auto MII = MBB.begin(); MII != MBB.end(); ) { + MachineInstr *MI = &*MII++; + if (MI->getOpcode() != WebAssembly::BR_UNLESS) + continue; + + unsigned Cond = MI->getOperand(0).getReg(); + bool Inverted = false; + + // Attempt to invert the condition in place. + if (MFI.isVRegStackified(Cond)) { + assert(MRI.hasOneDef(Cond)); + MachineInstr *Def = MRI.getVRegDef(Cond); + switch (Def->getOpcode()) { + using namespace WebAssembly; + case EQ_I32: Def->setDesc(TII.get(NE_I32)); Inverted = true; break; + case NE_I32: Def->setDesc(TII.get(EQ_I32)); Inverted = true; break; + case GT_S_I32: Def->setDesc(TII.get(LE_S_I32)); Inverted = true; break; + case GE_S_I32: Def->setDesc(TII.get(LT_S_I32)); Inverted = true; break; + case LT_S_I32: Def->setDesc(TII.get(GE_S_I32)); Inverted = true; break; + case LE_S_I32: Def->setDesc(TII.get(GT_S_I32)); Inverted = true; break; + case GT_U_I32: Def->setDesc(TII.get(LE_U_I32)); Inverted = true; break; + case GE_U_I32: Def->setDesc(TII.get(LT_U_I32)); Inverted = true; break; + case LT_U_I32: Def->setDesc(TII.get(GE_U_I32)); Inverted = true; break; + case LE_U_I32: Def->setDesc(TII.get(GT_U_I32)); Inverted = true; break; + case EQ_I64: Def->setDesc(TII.get(NE_I64)); Inverted = true; break; + case NE_I64: Def->setDesc(TII.get(EQ_I64)); Inverted = true; break; + case GT_S_I64: Def->setDesc(TII.get(LE_S_I64)); Inverted = true; break; + case GE_S_I64: Def->setDesc(TII.get(LT_S_I64)); Inverted = true; break; + case LT_S_I64: Def->setDesc(TII.get(GE_S_I64)); Inverted = true; break; + case LE_S_I64: Def->setDesc(TII.get(GT_S_I64)); Inverted = true; break; + case GT_U_I64: Def->setDesc(TII.get(LE_U_I64)); Inverted = true; break; + case GE_U_I64: Def->setDesc(TII.get(LT_U_I64)); Inverted = true; break; + case LT_U_I64: Def->setDesc(TII.get(GE_U_I64)); Inverted = true; break; + case LE_U_I64: Def->setDesc(TII.get(GT_U_I64)); Inverted = true; break; + case EQ_F32: Def->setDesc(TII.get(NE_F32)); Inverted = true; break; + case NE_F32: Def->setDesc(TII.get(EQ_F32)); Inverted = true; break; + case EQ_F64: Def->setDesc(TII.get(NE_F64)); Inverted = true; break; + case NE_F64: Def->setDesc(TII.get(EQ_F64)); Inverted = true; break; + default: break; + } + } + + // If we weren't able to invert the condition in place. Insert an + // expression to invert it. + if (!Inverted) { + unsigned ZeroReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + MFI.stackifyVReg(ZeroReg); + BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::CONST_I32), ZeroReg) + .addImm(0); + unsigned Tmp = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + MFI.stackifyVReg(Tmp); + BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::EQ_I32), Tmp) + .addReg(Cond) + .addReg(ZeroReg); + Cond = Tmp; + Inverted = true; + } + + // The br_unless condition has now been inverted. Insert a br_if and + // delete the br_unless. + assert(Inverted); + BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::BR_IF)) + .addReg(Cond) + .addOperand(MI->getOperand(1)); + MBB.erase(MI); + } + } + + return true; +} diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp new file mode 100644 index 000000000000..a953f8247006 --- /dev/null +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp @@ -0,0 +1,106 @@ +// WebAssemblyMCInstLower.cpp - Convert WebAssembly MachineInstr to an MCInst // +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file contains code to lower WebAssembly MachineInstrs to their +/// corresponding MCInst records. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssemblyMCInstLower.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/Constants.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +MCSymbol * +WebAssemblyMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const { + return Printer.getSymbol(MO.getGlobal()); +} + +MCSymbol *WebAssemblyMCInstLower::GetExternalSymbolSymbol( + const MachineOperand &MO) const { + return Printer.GetExternalSymbolSymbol(MO.getSymbolName()); +} + +MCOperand WebAssemblyMCInstLower::LowerSymbolOperand(const MachineOperand &MO, + MCSymbol *Sym) const { + assert(MO.getTargetFlags() == 0 && "WebAssembly does not use target flags"); + + const MCExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx); + + int64_t Offset = MO.getOffset(); + if (Offset != 0) { + assert(!MO.isJTI() && "Unexpected offset with jump table index"); + Expr = + MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Offset, Ctx), Ctx); + } + + return MCOperand::createExpr(Expr); +} + +void WebAssemblyMCInstLower::Lower(const MachineInstr *MI, + MCInst &OutMI) const { + OutMI.setOpcode(MI->getOpcode()); + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + + MCOperand MCOp; + switch (MO.getType()) { + default: + MI->dump(); + llvm_unreachable("unknown operand type"); + case MachineOperand::MO_Register: { + // Ignore all implicit register operands. + if (MO.isImplicit()) + continue; + const WebAssemblyFunctionInfo &MFI = + *MI->getParent()->getParent()->getInfo<WebAssemblyFunctionInfo>(); + unsigned WAReg = MFI.getWAReg(MO.getReg()); + MCOp = MCOperand::createReg(WAReg); + break; + } + case MachineOperand::MO_Immediate: + MCOp = MCOperand::createImm(MO.getImm()); + break; + case MachineOperand::MO_FPImmediate: { + // TODO: MC converts all floating point immediate operands to double. + // This is fine for numeric values, but may cause NaNs to change bits. + const ConstantFP *Imm = MO.getFPImm(); + if (Imm->getType()->isFloatTy()) + MCOp = MCOperand::createFPImm(Imm->getValueAPF().convertToFloat()); + else if (Imm->getType()->isDoubleTy()) + MCOp = MCOperand::createFPImm(Imm->getValueAPF().convertToDouble()); + else + llvm_unreachable("unknown floating point immediate type"); + break; + } + case MachineOperand::MO_MachineBasicBlock: + MCOp = MCOperand::createExpr( + MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), Ctx)); + break; + case MachineOperand::MO_GlobalAddress: + MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO)); + break; + case MachineOperand::MO_ExternalSymbol: + MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO)); + break; + } + + OutMI.addOperand(MCOp); + } +} diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h new file mode 100644 index 000000000000..6d704704f576 --- /dev/null +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h @@ -0,0 +1,45 @@ +//===-- WebAssemblyMCInstLower.h - Lower MachineInstr to MCInst -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file declares the class to lower WebAssembly MachineInstrs to +/// their corresponding MCInst records. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMCINSTLOWER_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMCINSTLOWER_H + +#include "llvm/MC/MCInst.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { +class AsmPrinter; +class MCContext; +class MCSymbol; +class MachineInstr; +class MachineOperand; + +/// This class is used to lower an MachineInstr into an MCInst. +class LLVM_LIBRARY_VISIBILITY WebAssemblyMCInstLower { + MCContext &Ctx; + AsmPrinter &Printer; + + MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; + MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const; + MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const; + +public: + WebAssemblyMCInstLower(MCContext &ctx, AsmPrinter &printer) + : Ctx(ctx), Printer(printer) {} + void Lower(const MachineInstr *MI, MCInst &OutMI) const; +}; +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp index 542d984b9006..225c5d32cb5d 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp @@ -17,3 +17,9 @@ using namespace llvm; WebAssemblyFunctionInfo::~WebAssemblyFunctionInfo() {} + +void WebAssemblyFunctionInfo::initWARegs() { + assert(WARegs.empty()); + unsigned Reg = UnusedReg; + WARegs.resize(MF.getRegInfo().getNumVirtRegs(), Reg); +} diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h index fc5e910b09ef..6a60280900a9 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h @@ -1,4 +1,4 @@ -// WebAssemblyMachineFuctionInfo.h-WebAssembly machine function info -*- C++ -*- +// WebAssemblyMachineFunctionInfo.h-WebAssembly machine function info-*- C++ -*- // // The LLVM Compiler Infrastructure // @@ -16,8 +16,7 @@ #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMACHINEFUNCTIONINFO_H #define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMACHINEFUNCTIONINFO_H -#include "WebAssemblyRegisterInfo.h" -#include "llvm/CodeGen/MachineFunction.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "llvm/CodeGen/MachineRegisterInfo.h" namespace llvm { @@ -27,9 +26,70 @@ namespace llvm { class WebAssemblyFunctionInfo final : public MachineFunctionInfo { MachineFunction &MF; + std::vector<MVT> Params; + + /// A mapping from CodeGen vreg index to WebAssembly register number. + std::vector<unsigned> WARegs; + + /// A mapping from CodeGen vreg index to a boolean value indicating whether + /// the given register is considered to be "stackified", meaning it has been + /// determined or made to meet the stack requirements: + /// - single use (per path) + /// - single def (per path) + /// - defined and used in LIFO order with other stack registers + BitVector VRegStackified; + + // One entry for each possible target reg. we expect it to be small. + std::vector<unsigned> PhysRegs; + public: - explicit WebAssemblyFunctionInfo(MachineFunction &MF) : MF(MF) {} + explicit WebAssemblyFunctionInfo(MachineFunction &MF) : MF(MF) { + PhysRegs.resize(WebAssembly::NUM_TARGET_REGS, -1U); + } ~WebAssemblyFunctionInfo() override; + + void addParam(MVT VT) { Params.push_back(VT); } + const std::vector<MVT> &getParams() const { return Params; } + + static const unsigned UnusedReg = -1u; + + void stackifyVReg(unsigned VReg) { + if (TargetRegisterInfo::virtReg2Index(VReg) >= VRegStackified.size()) + VRegStackified.resize(TargetRegisterInfo::virtReg2Index(VReg) + 1); + VRegStackified.set(TargetRegisterInfo::virtReg2Index(VReg)); + } + bool isVRegStackified(unsigned VReg) const { + if (TargetRegisterInfo::virtReg2Index(VReg) >= VRegStackified.size()) + return false; + return VRegStackified.test(TargetRegisterInfo::virtReg2Index(VReg)); + } + + void initWARegs(); + void setWAReg(unsigned VReg, unsigned WAReg) { + assert(WAReg != UnusedReg); + assert(TargetRegisterInfo::virtReg2Index(VReg) < WARegs.size()); + WARegs[TargetRegisterInfo::virtReg2Index(VReg)] = WAReg; + } + unsigned getWAReg(unsigned Reg) const { + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + assert(TargetRegisterInfo::virtReg2Index(Reg) < WARegs.size()); + return WARegs[TargetRegisterInfo::virtReg2Index(Reg)]; + } + return PhysRegs[Reg]; + } + // If new virtual registers are created after initWARegs has been called, + // this function can be used to add WebAssembly register mappings for them. + void addWAReg(unsigned VReg, unsigned WAReg) { + assert(VReg = WARegs.size()); + WARegs.push_back(WAReg); + } + + void addPReg(unsigned PReg, unsigned WAReg) { + assert(PReg < WebAssembly::NUM_TARGET_REGS); + assert(WAReg < -1U); + PhysRegs[PReg] = WAReg; + } + const std::vector<unsigned> &getPhysRegs() const { return PhysRegs; } }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp new file mode 100644 index 000000000000..4dc401a2c7cc --- /dev/null +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp @@ -0,0 +1,76 @@ +//===-- WebAssemblyOptimizeReturned.cpp - Optimize "returned" attributes --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Optimize calls with "returned" attributes for WebAssembly. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-optimize-returned" + +namespace { +class OptimizeReturned final : public FunctionPass, + public InstVisitor<OptimizeReturned> { + const char *getPassName() const override { + return "WebAssembly Optimize Returned"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); + FunctionPass::getAnalysisUsage(AU); + } + + bool runOnFunction(Function &F) override; + + DominatorTree *DT; + +public: + static char ID; + OptimizeReturned() : FunctionPass(ID), DT(nullptr) {} + + void visitCallSite(CallSite CS); +}; +} // End anonymous namespace + +char OptimizeReturned::ID = 0; +FunctionPass *llvm::createWebAssemblyOptimizeReturned() { + return new OptimizeReturned(); +} + +void OptimizeReturned::visitCallSite(CallSite CS) { + for (unsigned i = 0, e = CS.getNumArgOperands(); i < e; ++i) + if (CS.paramHasAttr(1 + i, Attribute::Returned)) { + Instruction *Inst = CS.getInstruction(); + Value *Arg = CS.getArgOperand(i); + // Ignore constants, globals, undef, etc. + if (isa<Constant>(Arg)) + continue; + // Like replaceDominatedUsesWith but using Instruction/Use dominance. + for (auto UI = Arg->use_begin(), UE = Arg->use_end(); UI != UE;) { + Use &U = *UI++; + if (DT->dominates(Inst, U)) + U.set(Inst); + } + } +} + +bool OptimizeReturned::runOnFunction(Function &F) { + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + visit(F); + return true; +} diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPEI.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPEI.cpp new file mode 100644 index 000000000000..d570d4266110 --- /dev/null +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPEI.cpp @@ -0,0 +1,1066 @@ +//===-- WebAssemblyPEI.cpp - Insert Prolog/Epilog code in function --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass is responsible for finalizing the functions frame layout, saving +// callee saved registers, and for emitting prolog & epilog code for the +// function. +// +// This pass must be run after register allocation. After this pass is +// executed, it is illegal to construct MO_FrameIndex operands. +// +// This is a copy of lib/CodeGen/PrologEpilogInserter.cpp except that it does +// not assert that all virtual registers are gone (because WebAssembly currently +// uses virtual rather than physical registers), and only runs +// MRI.clearVirtRegs() if scavenging happened (which it never does). It also +// uses a different class name so it can be registered via INITIALIZE_PASS. +// It is otherwise unmodified, so any changes to the target-independent PEI +// can be easily applied. +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/StackProtector.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <climits> + +using namespace llvm; + +#define DEBUG_TYPE "pei" +namespace llvm { +void initializeWasmPEIPass(PassRegistry&); +} +namespace { +class WasmPEI : public MachineFunctionPass { +public: + static char ID; + WasmPEI() : MachineFunctionPass(ID) { + initializeWasmPEIPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + /// runOnMachineFunction - Insert prolog/epilog code and replace abstract + /// frame indexes with appropriate references. + /// + bool runOnMachineFunction(MachineFunction &Fn) override; + +private: + RegScavenger *RS; + + // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved + // stack frame indexes. + unsigned MinCSFrameIndex, MaxCSFrameIndex; + + // Save and Restore blocks of the current function. Typically there is a + // single save block, unless Windows EH funclets are involved. + SmallVector<MachineBasicBlock *, 1> SaveBlocks; + SmallVector<MachineBasicBlock *, 4> RestoreBlocks; + + // Flag to control whether to use the register scavenger to resolve + // frame index materialization registers. Set according to + // TRI->requiresFrameIndexScavenging() for the current function. + bool FrameIndexVirtualScavenging; + + void calculateSets(MachineFunction &Fn); + void calculateCallsInformation(MachineFunction &Fn); + void assignCalleeSavedSpillSlots(MachineFunction &Fn, + const BitVector &SavedRegs); + void insertCSRSpillsAndRestores(MachineFunction &Fn); + void calculateFrameObjectOffsets(MachineFunction &Fn); + void replaceFrameIndices(MachineFunction &Fn); + void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, + int &SPAdj); + void scavengeFrameVirtualRegs(MachineFunction &Fn); + void insertPrologEpilogCode(MachineFunction &Fn); +}; +} // namespace + +char WasmPEI::ID = 0; + +namespace llvm { +FunctionPass *createWebAssemblyPEI() { + return new WasmPEI(); +} +} + +static cl::opt<unsigned> +WarnStackSize("wasm-warn-stack-size", cl::Hidden, cl::init((unsigned)-1), + cl::desc("Warn for stack size bigger than the given" + " number")); + +INITIALIZE_PASS_BEGIN(WasmPEI, "wasmprologepilog", + "Wasm Prologue/Epilogue Insertion", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(StackProtector) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_END(WasmPEI, "wasmprologepilog", + "Wasm Prologue/Epilogue Insertion & Frame Finalization", + false, false) + +STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged"); +STATISTIC(NumBytesStackSpace, + "Number of bytes used for stack in all functions"); + +void WasmPEI::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addPreserved<MachineLoopInfo>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<StackProtector>(); + AU.addRequired<TargetPassConfig>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +/// Compute the set of return blocks +void WasmPEI::calculateSets(MachineFunction &Fn) { + const MachineFrameInfo *MFI = Fn.getFrameInfo(); + + // Even when we do not change any CSR, we still want to insert the + // prologue and epilogue of the function. + // So set the save points for those. + + // Use the points found by shrink-wrapping, if any. + if (MFI->getSavePoint()) { + SaveBlocks.push_back(MFI->getSavePoint()); + assert(MFI->getRestorePoint() && "Both restore and save must be set"); + MachineBasicBlock *RestoreBlock = MFI->getRestorePoint(); + // If RestoreBlock does not have any successor and is not a return block + // then the end point is unreachable and we do not need to insert any + // epilogue. + if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock()) + RestoreBlocks.push_back(RestoreBlock); + return; + } + + // Save refs to entry and return blocks. + SaveBlocks.push_back(&Fn.front()); + for (MachineBasicBlock &MBB : Fn) { + if (MBB.isEHFuncletEntry()) + SaveBlocks.push_back(&MBB); + if (MBB.isReturnBlock()) + RestoreBlocks.push_back(&MBB); + } +} + +/// StackObjSet - A set of stack object indexes +typedef SmallSetVector<int, 8> StackObjSet; + +/// runOnMachineFunction - Insert prolog/epilog code and replace abstract +/// frame indexes with appropriate references. +/// +bool WasmPEI::runOnMachineFunction(MachineFunction &Fn) { + const Function* F = Fn.getFunction(); + const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); + const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); + + // LOCALMOD: assert removed from target-independent PEI + //assert(!Fn.getRegInfo().getNumVirtRegs() && "Regalloc must assign all vregs"); + + RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : nullptr; + FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn); + + // Calculate the MaxCallFrameSize and AdjustsStack variables for the + // function's frame information. Also eliminates call frame pseudo + // instructions. + calculateCallsInformation(Fn); + + // Determine which of the registers in the callee save list should be saved. + BitVector SavedRegs; + TFI->determineCalleeSaves(Fn, SavedRegs, RS); + + // Insert spill code for any callee saved registers that are modified. + assignCalleeSavedSpillSlots(Fn, SavedRegs); + + // Determine placement of CSR spill/restore code: + // place all spills in the entry block, all restores in return blocks. + calculateSets(Fn); + + // Add the code to save and restore the callee saved registers. + if (!F->hasFnAttribute(Attribute::Naked)) + insertCSRSpillsAndRestores(Fn); + + // Allow the target machine to make final modifications to the function + // before the frame layout is finalized. + TFI->processFunctionBeforeFrameFinalized(Fn, RS); + + // Calculate actual frame offsets for all abstract stack objects... + calculateFrameObjectOffsets(Fn); + + // Add prolog and epilog code to the function. This function is required + // to align the stack frame as necessary for any stack variables or + // called functions. Because of this, calculateCalleeSavedRegisters() + // must be called before this function in order to set the AdjustsStack + // and MaxCallFrameSize variables. + if (!F->hasFnAttribute(Attribute::Naked)) + insertPrologEpilogCode(Fn); + + // Replace all MO_FrameIndex operands with physical register references + // and actual offsets. + // + replaceFrameIndices(Fn); + + // If register scavenging is needed, as we've enabled doing it as a + // post-pass, scavenge the virtual registers that frame index elimination + // inserted. + if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) { + scavengeFrameVirtualRegs(Fn); + // Clear any vregs created by virtual scavenging. + // LOCALMOD: made this call conditional with scavengeFrameVirtualregs() + Fn.getRegInfo().clearVirtRegs(); + } + + // Warn on stack size when we exceeds the given limit. + MachineFrameInfo *MFI = Fn.getFrameInfo(); + uint64_t StackSize = MFI->getStackSize(); + if (WarnStackSize.getNumOccurrences() > 0 && WarnStackSize < StackSize) { + DiagnosticInfoStackSize DiagStackSize(*F, StackSize); + F->getContext().diagnose(DiagStackSize); + } + + delete RS; + SaveBlocks.clear(); + RestoreBlocks.clear(); + return true; +} + +/// calculateCallsInformation - Calculate the MaxCallFrameSize and AdjustsStack +/// variables for the function's frame information and eliminate call frame +/// pseudo instructions. +void WasmPEI::calculateCallsInformation(MachineFunction &Fn) { + const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo(); + const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); + MachineFrameInfo *MFI = Fn.getFrameInfo(); + + unsigned MaxCallFrameSize = 0; + bool AdjustsStack = MFI->adjustsStack(); + + // Get the function call frame set-up and tear-down instruction opcode + unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode(); + unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); + + // Early exit for targets which have no call frame setup/destroy pseudo + // instructions. + if (FrameSetupOpcode == ~0u && FrameDestroyOpcode == ~0u) + return; + + std::vector<MachineBasicBlock::iterator> FrameSDOps; + for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) + if (I->getOpcode() == FrameSetupOpcode || + I->getOpcode() == FrameDestroyOpcode) { + assert(I->getNumOperands() >= 1 && "Call Frame Setup/Destroy Pseudo" + " instructions should have a single immediate argument!"); + unsigned Size = I->getOperand(0).getImm(); + if (Size > MaxCallFrameSize) MaxCallFrameSize = Size; + AdjustsStack = true; + FrameSDOps.push_back(I); + } else if (I->isInlineAsm()) { + // Some inline asm's need a stack frame, as indicated by operand 1. + unsigned ExtraInfo = I->getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); + if (ExtraInfo & InlineAsm::Extra_IsAlignStack) + AdjustsStack = true; + } + + MFI->setAdjustsStack(AdjustsStack); + MFI->setMaxCallFrameSize(MaxCallFrameSize); + + for (std::vector<MachineBasicBlock::iterator>::iterator + i = FrameSDOps.begin(), e = FrameSDOps.end(); i != e; ++i) { + MachineBasicBlock::iterator I = *i; + + // If call frames are not being included as part of the stack frame, and + // the target doesn't indicate otherwise, remove the call frame pseudos + // here. The sub/add sp instruction pairs are still inserted, but we don't + // need to track the SP adjustment for frame index elimination. + if (TFI->canSimplifyCallFramePseudos(Fn)) + TFI->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I); + } +} + +void WasmPEI::assignCalleeSavedSpillSlots(MachineFunction &F, + const BitVector &SavedRegs) { + // These are used to keep track the callee-save area. Initialize them. + MinCSFrameIndex = INT_MAX; + MaxCSFrameIndex = 0; + + if (SavedRegs.empty()) + return; + + const TargetRegisterInfo *RegInfo = F.getSubtarget().getRegisterInfo(); + const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&F); + + std::vector<CalleeSavedInfo> CSI; + for (unsigned i = 0; CSRegs[i]; ++i) { + unsigned Reg = CSRegs[i]; + if (SavedRegs.test(Reg)) + CSI.push_back(CalleeSavedInfo(Reg)); + } + + const TargetFrameLowering *TFI = F.getSubtarget().getFrameLowering(); + MachineFrameInfo *MFI = F.getFrameInfo(); + if (!TFI->assignCalleeSavedSpillSlots(F, RegInfo, CSI)) { + // If target doesn't implement this, use generic code. + + if (CSI.empty()) + return; // Early exit if no callee saved registers are modified! + + unsigned NumFixedSpillSlots; + const TargetFrameLowering::SpillSlot *FixedSpillSlots = + TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots); + + // Now that we know which registers need to be saved and restored, allocate + // stack slots for them. + for (std::vector<CalleeSavedInfo>::iterator I = CSI.begin(), E = CSI.end(); + I != E; ++I) { + unsigned Reg = I->getReg(); + const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); + + int FrameIdx; + if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) { + I->setFrameIdx(FrameIdx); + continue; + } + + // Check to see if this physreg must be spilled to a particular stack slot + // on this target. + const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots; + while (FixedSlot != FixedSpillSlots + NumFixedSpillSlots && + FixedSlot->Reg != Reg) + ++FixedSlot; + + if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) { + // Nope, just spill it anywhere convenient. + unsigned Align = RC->getAlignment(); + unsigned StackAlign = TFI->getStackAlignment(); + + // We may not be able to satisfy the desired alignment specification of + // the TargetRegisterClass if the stack alignment is smaller. Use the + // min. + Align = std::min(Align, StackAlign); + FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true); + if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; + if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; + } else { + // Spill it to the stack where we must. + FrameIdx = + MFI->CreateFixedSpillStackObject(RC->getSize(), FixedSlot->Offset); + } + + I->setFrameIdx(FrameIdx); + } + } + + MFI->setCalleeSavedInfo(CSI); +} + +/// Helper function to update the liveness information for the callee-saved +/// registers. +static void updateLiveness(MachineFunction &MF) { + MachineFrameInfo *MFI = MF.getFrameInfo(); + // Visited will contain all the basic blocks that are in the region + // where the callee saved registers are alive: + // - Anything that is not Save or Restore -> LiveThrough. + // - Save -> LiveIn. + // - Restore -> LiveOut. + // The live-out is not attached to the block, so no need to keep + // Restore in this set. + SmallPtrSet<MachineBasicBlock *, 8> Visited; + SmallVector<MachineBasicBlock *, 8> WorkList; + MachineBasicBlock *Entry = &MF.front(); + MachineBasicBlock *Save = MFI->getSavePoint(); + + if (!Save) + Save = Entry; + + if (Entry != Save) { + WorkList.push_back(Entry); + Visited.insert(Entry); + } + Visited.insert(Save); + + MachineBasicBlock *Restore = MFI->getRestorePoint(); + if (Restore) + // By construction Restore cannot be visited, otherwise it + // means there exists a path to Restore that does not go + // through Save. + WorkList.push_back(Restore); + + while (!WorkList.empty()) { + const MachineBasicBlock *CurBB = WorkList.pop_back_val(); + // By construction, the region that is after the save point is + // dominated by the Save and post-dominated by the Restore. + if (CurBB == Save && Save != Restore) + continue; + // Enqueue all the successors not already visited. + // Those are by construction either before Save or after Restore. + for (MachineBasicBlock *SuccBB : CurBB->successors()) + if (Visited.insert(SuccBB).second) + WorkList.push_back(SuccBB); + } + + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + for (MachineBasicBlock *MBB : Visited) { + MCPhysReg Reg = CSI[i].getReg(); + // Add the callee-saved register as live-in. + // It's killed at the spill. + if (!MBB->isLiveIn(Reg)) + MBB->addLiveIn(Reg); + } + } +} + +/// insertCSRSpillsAndRestores - Insert spill and restore code for +/// callee saved registers used in the function. +/// +void WasmPEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { + // Get callee saved register information. + MachineFrameInfo *MFI = Fn.getFrameInfo(); + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + + MFI->setCalleeSavedInfoValid(true); + + // Early exit if no callee saved registers are modified! + if (CSI.empty()) + return; + + const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo(); + const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); + const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); + MachineBasicBlock::iterator I; + + // Spill using target interface. + for (MachineBasicBlock *SaveBlock : SaveBlocks) { + I = SaveBlock->begin(); + if (!TFI->spillCalleeSavedRegisters(*SaveBlock, I, CSI, TRI)) { + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + // Insert the spill to the stack frame. + unsigned Reg = CSI[i].getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(*SaveBlock, I, Reg, true, CSI[i].getFrameIdx(), + RC, TRI); + } + } + // Update the live-in information of all the blocks up to the save point. + updateLiveness(Fn); + } + + // Restore using target interface. + for (MachineBasicBlock *MBB : RestoreBlocks) { + I = MBB->end(); + + // Skip over all terminator instructions, which are part of the return + // sequence. + MachineBasicBlock::iterator I2 = I; + while (I2 != MBB->begin() && (--I2)->isTerminator()) + I = I2; + + bool AtStart = I == MBB->begin(); + MachineBasicBlock::iterator BeforeI = I; + if (!AtStart) + --BeforeI; + + // Restore all registers immediately before the return and any + // terminators that precede it. + if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) { + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(*MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI); + assert(I != MBB->begin() && + "loadRegFromStackSlot didn't insert any code!"); + // Insert in reverse order. loadRegFromStackSlot can insert + // multiple instructions. + if (AtStart) + I = MBB->begin(); + else { + I = BeforeI; + ++I; + } + } + } + } +} + +/// AdjustStackOffset - Helper function used to adjust the stack frame offset. +static inline void +AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, + bool StackGrowsDown, int64_t &Offset, + unsigned &MaxAlign, unsigned Skew) { + // If the stack grows down, add the object size to find the lowest address. + if (StackGrowsDown) + Offset += MFI->getObjectSize(FrameIdx); + + unsigned Align = MFI->getObjectAlignment(FrameIdx); + + // If the alignment of this object is greater than that of the stack, then + // increase the stack alignment to match. + MaxAlign = std::max(MaxAlign, Align); + + // Adjust to alignment boundary. + Offset = RoundUpToAlignment(Offset, Align, Skew); + + if (StackGrowsDown) { + DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset << "]\n"); + MFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset + } else { + DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset << "]\n"); + MFI->setObjectOffset(FrameIdx, Offset); + Offset += MFI->getObjectSize(FrameIdx); + } +} + +/// AssignProtectedObjSet - Helper function to assign large stack objects (i.e., +/// those required to be close to the Stack Protector) to stack offsets. +static void +AssignProtectedObjSet(const StackObjSet &UnassignedObjs, + SmallSet<int, 16> &ProtectedObjs, + MachineFrameInfo *MFI, bool StackGrowsDown, + int64_t &Offset, unsigned &MaxAlign, unsigned Skew) { + + for (StackObjSet::const_iterator I = UnassignedObjs.begin(), + E = UnassignedObjs.end(); I != E; ++I) { + int i = *I; + AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign, Skew); + ProtectedObjs.insert(i); + } +} + +/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the +/// abstract stack objects. +/// +void WasmPEI::calculateFrameObjectOffsets(MachineFunction &Fn) { + const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); + StackProtector *SP = &getAnalysis<StackProtector>(); + + bool StackGrowsDown = + TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; + + // Loop over all of the stack objects, assigning sequential addresses... + MachineFrameInfo *MFI = Fn.getFrameInfo(); + + // Start at the beginning of the local area. + // The Offset is the distance from the stack top in the direction + // of stack growth -- so it's always nonnegative. + int LocalAreaOffset = TFI.getOffsetOfLocalArea(); + if (StackGrowsDown) + LocalAreaOffset = -LocalAreaOffset; + assert(LocalAreaOffset >= 0 + && "Local area offset should be in direction of stack growth"); + int64_t Offset = LocalAreaOffset; + + // Skew to be applied to alignment. + unsigned Skew = TFI.getStackAlignmentSkew(Fn); + + // If there are fixed sized objects that are preallocated in the local area, + // non-fixed objects can't be allocated right at the start of local area. + // We currently don't support filling in holes in between fixed sized + // objects, so we adjust 'Offset' to point to the end of last fixed sized + // preallocated object. + for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) { + int64_t FixedOff; + if (StackGrowsDown) { + // The maximum distance from the stack pointer is at lower address of + // the object -- which is given by offset. For down growing stack + // the offset is negative, so we negate the offset to get the distance. + FixedOff = -MFI->getObjectOffset(i); + } else { + // The maximum distance from the start pointer is at the upper + // address of the object. + FixedOff = MFI->getObjectOffset(i) + MFI->getObjectSize(i); + } + if (FixedOff > Offset) Offset = FixedOff; + } + + // First assign frame offsets to stack objects that are used to spill + // callee saved registers. + if (StackGrowsDown) { + for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) { + // If the stack grows down, we need to add the size to find the lowest + // address of the object. + Offset += MFI->getObjectSize(i); + + unsigned Align = MFI->getObjectAlignment(i); + // Adjust to alignment boundary + Offset = RoundUpToAlignment(Offset, Align, Skew); + + MFI->setObjectOffset(i, -Offset); // Set the computed offset + } + } else { + int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex; + for (int i = MaxCSFI; i >= MinCSFI ; --i) { + unsigned Align = MFI->getObjectAlignment(i); + // Adjust to alignment boundary + Offset = RoundUpToAlignment(Offset, Align, Skew); + + MFI->setObjectOffset(i, Offset); + Offset += MFI->getObjectSize(i); + } + } + + unsigned MaxAlign = MFI->getMaxAlignment(); + + // Make sure the special register scavenging spill slot is closest to the + // incoming stack pointer if a frame pointer is required and is closer + // to the incoming rather than the final stack pointer. + const TargetRegisterInfo *RegInfo = Fn.getSubtarget().getRegisterInfo(); + bool EarlyScavengingSlots = (TFI.hasFP(Fn) && + TFI.isFPCloseToIncomingSP() && + RegInfo->useFPForScavengingIndex(Fn) && + !RegInfo->needsStackRealignment(Fn)); + if (RS && EarlyScavengingSlots) { + SmallVector<int, 2> SFIs; + RS->getScavengingFrameIndices(SFIs); + for (SmallVectorImpl<int>::iterator I = SFIs.begin(), + IE = SFIs.end(); I != IE; ++I) + AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign, Skew); + } + + // FIXME: Once this is working, then enable flag will change to a target + // check for whether the frame is large enough to want to use virtual + // frame index registers. Functions which don't want/need this optimization + // will continue to use the existing code path. + if (MFI->getUseLocalStackAllocationBlock()) { + unsigned Align = MFI->getLocalFrameMaxAlign(); + + // Adjust to alignment boundary. + Offset = RoundUpToAlignment(Offset, Align, Skew); + + DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n"); + + // Resolve offsets for objects in the local block. + for (unsigned i = 0, e = MFI->getLocalFrameObjectCount(); i != e; ++i) { + std::pair<int, int64_t> Entry = MFI->getLocalFrameObjectMap(i); + int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second; + DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" << + FIOffset << "]\n"); + MFI->setObjectOffset(Entry.first, FIOffset); + } + // Allocate the local block + Offset += MFI->getLocalFrameSize(); + + MaxAlign = std::max(Align, MaxAlign); + } + + // Make sure that the stack protector comes before the local variables on the + // stack. + SmallSet<int, 16> ProtectedObjs; + if (MFI->getStackProtectorIndex() >= 0) { + StackObjSet LargeArrayObjs; + StackObjSet SmallArrayObjs; + StackObjSet AddrOfObjs; + + AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown, + Offset, MaxAlign, Skew); + + // Assign large stack objects first. + for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { + if (MFI->isObjectPreAllocated(i) && + MFI->getUseLocalStackAllocationBlock()) + continue; + if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) + continue; + if (RS && RS->isScavengingFrameIndex((int)i)) + continue; + if (MFI->isDeadObjectIndex(i)) + continue; + if (MFI->getStackProtectorIndex() == (int)i) + continue; + + switch (SP->getSSPLayout(MFI->getObjectAllocation(i))) { + case StackProtector::SSPLK_None: + continue; + case StackProtector::SSPLK_SmallArray: + SmallArrayObjs.insert(i); + continue; + case StackProtector::SSPLK_AddrOf: + AddrOfObjs.insert(i); + continue; + case StackProtector::SSPLK_LargeArray: + LargeArrayObjs.insert(i); + continue; + } + llvm_unreachable("Unexpected SSPLayoutKind."); + } + + AssignProtectedObjSet(LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown, + Offset, MaxAlign, Skew); + AssignProtectedObjSet(SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown, + Offset, MaxAlign, Skew); + AssignProtectedObjSet(AddrOfObjs, ProtectedObjs, MFI, StackGrowsDown, + Offset, MaxAlign, Skew); + } + + // Then assign frame offsets to stack objects that are not used to spill + // callee saved registers. + for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { + if (MFI->isObjectPreAllocated(i) && + MFI->getUseLocalStackAllocationBlock()) + continue; + if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) + continue; + if (RS && RS->isScavengingFrameIndex((int)i)) + continue; + if (MFI->isDeadObjectIndex(i)) + continue; + if (MFI->getStackProtectorIndex() == (int)i) + continue; + if (ProtectedObjs.count(i)) + continue; + + AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign, Skew); + } + + // Make sure the special register scavenging spill slot is closest to the + // stack pointer. + if (RS && !EarlyScavengingSlots) { + SmallVector<int, 2> SFIs; + RS->getScavengingFrameIndices(SFIs); + for (SmallVectorImpl<int>::iterator I = SFIs.begin(), + IE = SFIs.end(); I != IE; ++I) + AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign, Skew); + } + + if (!TFI.targetHandlesStackFrameRounding()) { + // If we have reserved argument space for call sites in the function + // immediately on entry to the current function, count it as part of the + // overall stack size. + if (MFI->adjustsStack() && TFI.hasReservedCallFrame(Fn)) + Offset += MFI->getMaxCallFrameSize(); + + // Round up the size to a multiple of the alignment. If the function has + // any calls or alloca's, align to the target's StackAlignment value to + // ensure that the callee's frame or the alloca data is suitably aligned; + // otherwise, for leaf functions, align to the TransientStackAlignment + // value. + unsigned StackAlign; + if (MFI->adjustsStack() || MFI->hasVarSizedObjects() || + (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0)) + StackAlign = TFI.getStackAlignment(); + else + StackAlign = TFI.getTransientStackAlignment(); + + // If the frame pointer is eliminated, all frame offsets will be relative to + // SP not FP. Align to MaxAlign so this works. + StackAlign = std::max(StackAlign, MaxAlign); + Offset = RoundUpToAlignment(Offset, StackAlign, Skew); + } + + // Update frame info to pretend that this is part of the stack... + int64_t StackSize = Offset - LocalAreaOffset; + MFI->setStackSize(StackSize); + NumBytesStackSpace += StackSize; +} + +/// insertPrologEpilogCode - Scan the function for modified callee saved +/// registers, insert spill code for these callee saved registers, then add +/// prolog and epilog code to the function. +/// +void WasmPEI::insertPrologEpilogCode(MachineFunction &Fn) { + const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); + + // Add prologue to the function... + for (MachineBasicBlock *SaveBlock : SaveBlocks) + TFI.emitPrologue(Fn, *SaveBlock); + + // Add epilogue to restore the callee-save registers in each exiting block. + for (MachineBasicBlock *RestoreBlock : RestoreBlocks) + TFI.emitEpilogue(Fn, *RestoreBlock); + + for (MachineBasicBlock *SaveBlock : SaveBlocks) + TFI.inlineStackProbe(Fn, *SaveBlock); + + // Emit additional code that is required to support segmented stacks, if + // we've been asked for it. This, when linked with a runtime with support + // for segmented stacks (libgcc is one), will result in allocating stack + // space in small chunks instead of one large contiguous block. + if (Fn.shouldSplitStack()) { + for (MachineBasicBlock *SaveBlock : SaveBlocks) + TFI.adjustForSegmentedStacks(Fn, *SaveBlock); + } + + // Emit additional code that is required to explicitly handle the stack in + // HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The + // approach is rather similar to that of Segmented Stacks, but it uses a + // different conditional check and another BIF for allocating more stack + // space. + if (Fn.getFunction()->getCallingConv() == CallingConv::HiPE) + for (MachineBasicBlock *SaveBlock : SaveBlocks) + TFI.adjustForHiPEPrologue(Fn, *SaveBlock); +} + +/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical +/// register references and actual offsets. +/// +void WasmPEI::replaceFrameIndices(MachineFunction &Fn) { + const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); + if (!TFI.needsFrameIndexResolution(Fn)) return; + + // Store SPAdj at exit of a basic block. + SmallVector<int, 8> SPState; + SPState.resize(Fn.getNumBlockIDs()); + SmallPtrSet<MachineBasicBlock*, 8> Reachable; + + // Iterate over the reachable blocks in DFS order. + for (auto DFI = df_ext_begin(&Fn, Reachable), DFE = df_ext_end(&Fn, Reachable); + DFI != DFE; ++DFI) { + int SPAdj = 0; + // Check the exit state of the DFS stack predecessor. + if (DFI.getPathLength() >= 2) { + MachineBasicBlock *StackPred = DFI.getPath(DFI.getPathLength() - 2); + assert(Reachable.count(StackPred) && + "DFS stack predecessor is already visited.\n"); + SPAdj = SPState[StackPred->getNumber()]; + } + MachineBasicBlock *BB = *DFI; + replaceFrameIndices(BB, Fn, SPAdj); + SPState[BB->getNumber()] = SPAdj; + } + + // Handle the unreachable blocks. + for (auto &BB : Fn) { + if (Reachable.count(&BB)) + // Already handled in DFS traversal. + continue; + int SPAdj = 0; + replaceFrameIndices(&BB, Fn, SPAdj); + } +} + +void WasmPEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, + int &SPAdj) { + assert(Fn.getSubtarget().getRegisterInfo() && + "getRegisterInfo() must be implemented!"); + const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo(); + const TargetRegisterInfo &TRI = *Fn.getSubtarget().getRegisterInfo(); + const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); + unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode(); + unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); + + if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB); + + bool InsideCallSequence = false; + + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { + + if (I->getOpcode() == FrameSetupOpcode || + I->getOpcode() == FrameDestroyOpcode) { + InsideCallSequence = (I->getOpcode() == FrameSetupOpcode); + SPAdj += TII.getSPAdjust(I); + + MachineBasicBlock::iterator PrevI = BB->end(); + if (I != BB->begin()) PrevI = std::prev(I); + TFI->eliminateCallFramePseudoInstr(Fn, *BB, I); + + // Visit the instructions created by eliminateCallFramePseudoInstr(). + if (PrevI == BB->end()) + I = BB->begin(); // The replaced instr was the first in the block. + else + I = std::next(PrevI); + continue; + } + + MachineInstr *MI = I; + bool DoIncr = true; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + if (!MI->getOperand(i).isFI()) + continue; + + // Frame indices in debug values are encoded in a target independent + // way with simply the frame index and offset rather than any + // target-specific addressing mode. + if (MI->isDebugValue()) { + assert(i == 0 && "Frame indices can only appear as the first " + "operand of a DBG_VALUE machine instruction"); + unsigned Reg; + MachineOperand &Offset = MI->getOperand(1); + Offset.setImm(Offset.getImm() + + TFI->getFrameIndexReference( + Fn, MI->getOperand(0).getIndex(), Reg)); + MI->getOperand(0).ChangeToRegister(Reg, false /*isDef*/); + continue; + } + + // TODO: This code should be commoned with the code for + // PATCHPOINT. There's no good reason for the difference in + // implementation other than historical accident. The only + // remaining difference is the unconditional use of the stack + // pointer as the base register. + if (MI->getOpcode() == TargetOpcode::STATEPOINT) { + assert((!MI->isDebugValue() || i == 0) && + "Frame indicies can only appear as the first operand of a " + "DBG_VALUE machine instruction"); + unsigned Reg; + MachineOperand &Offset = MI->getOperand(i + 1); + const unsigned refOffset = + TFI->getFrameIndexReferenceFromSP(Fn, MI->getOperand(i).getIndex(), + Reg); + + Offset.setImm(Offset.getImm() + refOffset); + MI->getOperand(i).ChangeToRegister(Reg, false /*isDef*/); + continue; + } + + // Some instructions (e.g. inline asm instructions) can have + // multiple frame indices and/or cause eliminateFrameIndex + // to insert more than one instruction. We need the register + // scavenger to go through all of these instructions so that + // it can update its register information. We keep the + // iterator at the point before insertion so that we can + // revisit them in full. + bool AtBeginning = (I == BB->begin()); + if (!AtBeginning) --I; + + // If this instruction has a FrameIndex operand, we need to + // use that target machine register info object to eliminate + // it. + TRI.eliminateFrameIndex(MI, SPAdj, i, + FrameIndexVirtualScavenging ? nullptr : RS); + + // Reset the iterator if we were at the beginning of the BB. + if (AtBeginning) { + I = BB->begin(); + DoIncr = false; + } + + MI = nullptr; + break; + } + + // If we are looking at a call sequence, we need to keep track of + // the SP adjustment made by each instruction in the sequence. + // This includes both the frame setup/destroy pseudos (handled above), + // as well as other instructions that have side effects w.r.t the SP. + // Note that this must come after eliminateFrameIndex, because + // if I itself referred to a frame index, we shouldn't count its own + // adjustment. + if (MI && InsideCallSequence) + SPAdj += TII.getSPAdjust(MI); + + if (DoIncr && I != BB->end()) ++I; + + // Update register states. + if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI); + } +} + +/// scavengeFrameVirtualRegs - Replace all frame index virtual registers +/// with physical registers. Use the register scavenger to find an +/// appropriate register to use. +/// +/// FIXME: Iterating over the instruction stream is unnecessary. We can simply +/// iterate over the vreg use list, which at this point only contains machine +/// operands for which eliminateFrameIndex need a new scratch reg. +void +WasmPEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { + // Run through the instructions and find any virtual registers. + for (MachineFunction::iterator BB = Fn.begin(), + E = Fn.end(); BB != E; ++BB) { + RS->enterBasicBlock(&*BB); + + int SPAdj = 0; + + // The instruction stream may change in the loop, so check BB->end() + // directly. + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { + // We might end up here again with a NULL iterator if we scavenged a + // register for which we inserted spill code for definition by what was + // originally the first instruction in BB. + if (I == MachineBasicBlock::iterator(nullptr)) + I = BB->begin(); + + MachineInstr *MI = I; + MachineBasicBlock::iterator J = std::next(I); + MachineBasicBlock::iterator P = + I == BB->begin() ? MachineBasicBlock::iterator(nullptr) + : std::prev(I); + + // RS should process this instruction before we might scavenge at this + // location. This is because we might be replacing a virtual register + // defined by this instruction, and if so, registers killed by this + // instruction are available, and defined registers are not. + RS->forward(I); + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + if (MI->getOperand(i).isReg()) { + MachineOperand &MO = MI->getOperand(i); + unsigned Reg = MO.getReg(); + if (Reg == 0) + continue; + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + + // When we first encounter a new virtual register, it + // must be a definition. + assert(MI->getOperand(i).isDef() && + "frame index virtual missing def!"); + // Scavenge a new scratch register + const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg); + unsigned ScratchReg = RS->scavengeRegister(RC, J, SPAdj); + + ++NumScavengedRegs; + + // Replace this reference to the virtual register with the + // scratch register. + assert (ScratchReg && "Missing scratch register!"); + Fn.getRegInfo().replaceRegWith(Reg, ScratchReg); + + // Because this instruction was processed by the RS before this + // register was allocated, make sure that the RS now records the + // register as being used. + RS->setRegUsed(ScratchReg); + } + } + + // If the scavenger needed to use one of its spill slots, the + // spill code will have been inserted in between I and J. This is a + // problem because we need the spill code before I: Move I to just + // prior to J. + if (I != std::prev(J)) { + BB->splice(J, &*BB, I); + + // Before we move I, we need to prepare the RS to visit I again. + // Specifically, RS will assert if it sees uses of registers that + // it believes are undefined. Because we have already processed + // register kills in I, when it visits I again, it will believe that + // those registers are undefined. To avoid this situation, unprocess + // the instruction I. + assert(RS->getCurrentPosition() == I && + "The register scavenger has an unexpected position"); + I = P; + RS->unprocess(P); + } else + ++I; + } + } +} diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp new file mode 100644 index 000000000000..4ad6eed7385b --- /dev/null +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp @@ -0,0 +1,86 @@ +//===-- WebAssemblyPeephole.cpp - WebAssembly Peephole Optimiztions -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Late peephole optimizations for WebAssembly. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-peephole" + +namespace { +class WebAssemblyPeephole final : public MachineFunctionPass { + const char *getPassName() const override { + return "WebAssembly late peephole optimizer"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +public: + static char ID; + WebAssemblyPeephole() : MachineFunctionPass(ID) {} +}; +} // end anonymous namespace + +char WebAssemblyPeephole::ID = 0; +FunctionPass *llvm::createWebAssemblyPeephole() { + return new WebAssemblyPeephole(); +} + +bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) { + bool Changed = false; + + MachineRegisterInfo &MRI = MF.getRegInfo(); + WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>(); + + for (auto &MBB : MF) + for (auto &MI : MBB) + switch (MI.getOpcode()) { + default: + break; + case WebAssembly::STORE8_I32: + case WebAssembly::STORE16_I32: + case WebAssembly::STORE8_I64: + case WebAssembly::STORE16_I64: + case WebAssembly::STORE32_I64: + case WebAssembly::STORE_F32: + case WebAssembly::STORE_F64: + case WebAssembly::STORE_I32: + case WebAssembly::STORE_I64: { + // Store instructions return their value operand. If we ended up using + // the same register for both, replace it with a dead def so that it + // can use $discard instead. + MachineOperand &MO = MI.getOperand(0); + unsigned OldReg = MO.getReg(); + // TODO: Handle SP/physregs + if (OldReg == MI.getOperand(3).getReg() + && TargetRegisterInfo::isVirtualRegister(MI.getOperand(3).getReg())) { + Changed = true; + unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); + MO.setReg(NewReg); + MO.setIsDead(); + MFI.stackifyVReg(NewReg); + MFI.addWAReg(NewReg, WebAssemblyFunctionInfo::UnusedReg); + } + } + } + + return Changed; +} diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp new file mode 100644 index 000000000000..9ec66595d8da --- /dev/null +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp @@ -0,0 +1,175 @@ +//===-- WebAssemblyRegColoring.cpp - Register coloring --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements a virtual register coloring pass. +/// +/// WebAssembly doesn't have a fixed number of registers, but it is still +/// desirable to minimize the total number of registers used in each function. +/// +/// This code is modeled after lib/CodeGen/StackSlotColoring.cpp. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-reg-coloring" + +namespace { +class WebAssemblyRegColoring final : public MachineFunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyRegColoring() : MachineFunctionPass(ID) {} + + const char *getPassName() const override { + return "WebAssembly Register Coloring"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired<LiveIntervals>(); + AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addPreserved<MachineBlockFrequencyInfo>(); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +private: +}; +} // end anonymous namespace + +char WebAssemblyRegColoring::ID = 0; +FunctionPass *llvm::createWebAssemblyRegColoring() { + return new WebAssemblyRegColoring(); +} + +// Compute the total spill weight for VReg. +static float computeWeight(const MachineRegisterInfo *MRI, + const MachineBlockFrequencyInfo *MBFI, + unsigned VReg) { + float weight = 0.0f; + for (MachineOperand &MO : MRI->reg_nodbg_operands(VReg)) + weight += LiveIntervals::getSpillWeight(MO.isDef(), MO.isUse(), MBFI, + MO.getParent()); + return weight; +} + +bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) { + DEBUG({ + dbgs() << "********** Register Coloring **********\n" + << "********** Function: " << MF.getName() << '\n'; + }); + + // If there are calls to setjmp or sigsetjmp, don't perform coloring. Virtual + // registers could be modified before the longjmp is executed, resulting in + // the wrong value being used afterwards. (See <rdar://problem/8007500>.) + // TODO: Does WebAssembly need to care about setjmp for register coloring? + if (MF.exposesReturnsTwice()) + return false; + + MachineRegisterInfo *MRI = &MF.getRegInfo(); + LiveIntervals *Liveness = &getAnalysis<LiveIntervals>(); + const MachineBlockFrequencyInfo *MBFI = + &getAnalysis<MachineBlockFrequencyInfo>(); + WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>(); + + // Gather all register intervals into a list and sort them. + unsigned NumVRegs = MRI->getNumVirtRegs(); + SmallVector<LiveInterval *, 0> SortedIntervals; + SortedIntervals.reserve(NumVRegs); + + DEBUG(dbgs() << "Interesting register intervals:\n"); + for (unsigned i = 0; i < NumVRegs; ++i) { + unsigned VReg = TargetRegisterInfo::index2VirtReg(i); + if (MFI.isVRegStackified(VReg)) + continue; + // Skip unused registers, which can use $discard. + if (MRI->use_empty(VReg)) + continue; + + LiveInterval *LI = &Liveness->getInterval(VReg); + assert(LI->weight == 0.0f); + LI->weight = computeWeight(MRI, MBFI, VReg); + DEBUG(LI->dump()); + SortedIntervals.push_back(LI); + } + DEBUG(dbgs() << '\n'); + + // Sort them to put arguments first (since we don't want to rename live-in + // registers), by weight next, and then by position. + // TODO: Investigate more intelligent sorting heuristics. For starters, we + // should try to coalesce adjacent live intervals before non-adjacent ones. + std::sort(SortedIntervals.begin(), SortedIntervals.end(), + [MRI](LiveInterval *LHS, LiveInterval *RHS) { + if (MRI->isLiveIn(LHS->reg) != MRI->isLiveIn(RHS->reg)) + return MRI->isLiveIn(LHS->reg); + if (LHS->weight != RHS->weight) + return LHS->weight > RHS->weight; + if (LHS->empty() || RHS->empty()) + return !LHS->empty() && RHS->empty(); + return *LHS < *RHS; + }); + + DEBUG(dbgs() << "Coloring register intervals:\n"); + SmallVector<unsigned, 16> SlotMapping(SortedIntervals.size(), -1u); + SmallVector<SmallVector<LiveInterval *, 4>, 16> Assignments( + SortedIntervals.size()); + BitVector UsedColors(SortedIntervals.size()); + bool Changed = false; + for (size_t i = 0, e = SortedIntervals.size(); i < e; ++i) { + LiveInterval *LI = SortedIntervals[i]; + unsigned Old = LI->reg; + size_t Color = i; + const TargetRegisterClass *RC = MRI->getRegClass(Old); + + // Check if it's possible to reuse any of the used colors. + if (!MRI->isLiveIn(Old)) + for (int C(UsedColors.find_first()); C != -1; + C = UsedColors.find_next(C)) { + if (MRI->getRegClass(SortedIntervals[C]->reg) != RC) + continue; + for (LiveInterval *OtherLI : Assignments[C]) + if (!OtherLI->empty() && OtherLI->overlaps(*LI)) + goto continue_outer; + Color = C; + break; + continue_outer:; + } + + unsigned New = SortedIntervals[Color]->reg; + SlotMapping[i] = New; + Changed |= Old != New; + UsedColors.set(Color); + Assignments[Color].push_back(LI); + DEBUG(dbgs() << "Assigning vreg" + << TargetRegisterInfo::virtReg2Index(LI->reg) << " to vreg" + << TargetRegisterInfo::virtReg2Index(New) << "\n"); + } + if (!Changed) + return false; + + // Rewrite register operands. + for (size_t i = 0, e = SortedIntervals.size(); i < e; ++i) { + unsigned Old = SortedIntervals[i]->reg; + unsigned New = SlotMapping[i]; + if (Old != New) + MRI->replaceRegWith(Old, New); + } + return true; +} diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp new file mode 100644 index 000000000000..f621db070b5b --- /dev/null +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp @@ -0,0 +1,109 @@ +//===-- WebAssemblyRegNumbering.cpp - Register Numbering ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements a pass which assigns WebAssembly register +/// numbers for CodeGen virtual registers. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-reg-numbering" + +namespace { +class WebAssemblyRegNumbering final : public MachineFunctionPass { + const char *getPassName() const override { + return "WebAssembly Register Numbering"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyRegNumbering() : MachineFunctionPass(ID) {} +}; +} // end anonymous namespace + +char WebAssemblyRegNumbering::ID = 0; +FunctionPass *llvm::createWebAssemblyRegNumbering() { + return new WebAssemblyRegNumbering(); +} + +bool WebAssemblyRegNumbering::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "********** Register Numbering **********\n" + "********** Function: " + << MF.getName() << '\n'); + + WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + const MachineFrameInfo &FrameInfo = *MF.getFrameInfo(); + + MFI.initWARegs(); + + // WebAssembly argument registers are in the same index space as local + // variables. Assign the numbers for them first. + MachineBasicBlock &EntryMBB = MF.front(); + for (MachineInstr &MI : EntryMBB) { + switch (MI.getOpcode()) { + case WebAssembly::ARGUMENT_I32: + case WebAssembly::ARGUMENT_I64: + case WebAssembly::ARGUMENT_F32: + case WebAssembly::ARGUMENT_F64: + MFI.setWAReg(MI.getOperand(0).getReg(), MI.getOperand(1).getImm()); + break; + default: + break; + } + } + + // Then assign regular WebAssembly registers for all remaining used + // virtual registers. TODO: Consider sorting the registers by frequency of + // use, to maximize usage of small immediate fields. + unsigned NumArgRegs = MFI.getParams().size(); + unsigned NumVRegs = MF.getRegInfo().getNumVirtRegs(); + unsigned NumStackRegs = 0; + unsigned CurReg = 0; + for (unsigned VRegIdx = 0; VRegIdx < NumVRegs; ++VRegIdx) { + unsigned VReg = TargetRegisterInfo::index2VirtReg(VRegIdx); + // Handle stackified registers. + if (MFI.isVRegStackified(VReg)) { + MFI.setWAReg(VReg, INT32_MIN | NumStackRegs++); + continue; + } + // Skip unused registers. + if (MRI.use_empty(VReg)) + continue; + if (MFI.getWAReg(VReg) == WebAssemblyFunctionInfo::UnusedReg) + MFI.setWAReg(VReg, NumArgRegs + CurReg++); + } + // Allocate locals for used physical registers + if (FrameInfo.getStackSize() > 0) + MFI.addPReg(WebAssembly::SP32, CurReg++); + + return true; +} diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp new file mode 100644 index 000000000000..89ef5cdb2bef --- /dev/null +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -0,0 +1,265 @@ +//===-- WebAssemblyRegStackify.cpp - Register Stackification --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements a register stacking pass. +/// +/// This pass reorders instructions to put register uses and defs in an order +/// such that they form single-use expression trees. Registers fitting this form +/// are then marked as "stackified", meaning references to them are replaced by +/// "push" and "pop" from the stack. +/// +/// This is primarily a code size optimization, since temporary values on the +/// expression don't need to be named. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_* +#include "WebAssemblyMachineFunctionInfo.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-reg-stackify" + +namespace { +class WebAssemblyRegStackify final : public MachineFunctionPass { + const char *getPassName() const override { + return "WebAssembly Register Stackify"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired<AAResultsWrapperPass>(); + AU.addRequired<LiveIntervals>(); + AU.addPreserved<MachineBlockFrequencyInfo>(); + AU.addPreserved<SlotIndexes>(); + AU.addPreserved<LiveIntervals>(); + AU.addPreservedID(MachineDominatorsID); + AU.addPreservedID(LiveVariablesID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyRegStackify() : MachineFunctionPass(ID) {} +}; +} // end anonymous namespace + +char WebAssemblyRegStackify::ID = 0; +FunctionPass *llvm::createWebAssemblyRegStackify() { + return new WebAssemblyRegStackify(); +} + +// Decorate the given instruction with implicit operands that enforce the +// expression stack ordering constraints for an instruction which is on +// the expression stack. +static void ImposeStackOrdering(MachineInstr *MI) { + // Write the opaque EXPR_STACK register. + if (!MI->definesRegister(WebAssembly::EXPR_STACK)) + MI->addOperand(MachineOperand::CreateReg(WebAssembly::EXPR_STACK, + /*isDef=*/true, + /*isImp=*/true)); + + // Also read the opaque EXPR_STACK register. + if (!MI->readsRegister(WebAssembly::EXPR_STACK)) + MI->addOperand(MachineOperand::CreateReg(WebAssembly::EXPR_STACK, + /*isDef=*/false, + /*isImp=*/true)); +} + +// Test whether it's safe to move Def to just before Insert. +// TODO: Compute memory dependencies in a way that doesn't require always +// walking the block. +// TODO: Compute memory dependencies in a way that uses AliasAnalysis to be +// more precise. +static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert, + AliasAnalysis &AA, LiveIntervals &LIS, + MachineRegisterInfo &MRI) { + assert(Def->getParent() == Insert->getParent()); + bool SawStore = false, SawSideEffects = false; + MachineBasicBlock::const_iterator D(Def), I(Insert); + + // Check for register dependencies. + for (const MachineOperand &MO : Def->operands()) { + if (!MO.isReg() || MO.isUndef()) + continue; + unsigned Reg = MO.getReg(); + + // If the register is dead here and at Insert, ignore it. + if (MO.isDead() && Insert->definesRegister(Reg) && + !Insert->readsRegister(Reg)) + continue; + + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + // If the physical register is never modified, ignore it. + if (!MRI.isPhysRegModified(Reg)) + continue; + // Otherwise, it's a physical register with unknown liveness. + return false; + } + + // Ask LiveIntervals whether moving this virtual register use or def to + // Insert will change value numbers are seen. + const LiveInterval &LI = LIS.getInterval(Reg); + VNInfo *DefVNI = MO.isDef() ? + LI.getVNInfoAt(LIS.getInstructionIndex(Def).getRegSlot()) : + LI.getVNInfoBefore(LIS.getInstructionIndex(Def)); + assert(DefVNI && "Instruction input missing value number"); + VNInfo *InsVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(Insert)); + if (InsVNI && DefVNI != InsVNI) + return false; + } + + // Check for memory dependencies and side effects. + for (--I; I != D; --I) + SawSideEffects |= I->isSafeToMove(&AA, SawStore); + return !(SawStore && Def->mayLoad() && !Def->isInvariantLoad(&AA)) && + !(SawSideEffects && !Def->isSafeToMove(&AA, SawStore)); +} + +bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "********** Register Stackifying **********\n" + "********** Function: " + << MF.getName() << '\n'); + + bool Changed = false; + MachineRegisterInfo &MRI = MF.getRegInfo(); + WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>(); + AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); + LiveIntervals &LIS = getAnalysis<LiveIntervals>(); + + // Walk the instructions from the bottom up. Currently we don't look past + // block boundaries, and the blocks aren't ordered so the block visitation + // order isn't significant, but we may want to change this in the future. + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : reverse(MBB)) { + MachineInstr *Insert = &MI; + // Don't nest anything inside a phi. + if (Insert->getOpcode() == TargetOpcode::PHI) + break; + + // Don't nest anything inside an inline asm, because we don't have + // constraints for $push inputs. + if (Insert->getOpcode() == TargetOpcode::INLINEASM) + break; + + // Iterate through the inputs in reverse order, since we'll be pulling + // operands off the stack in LIFO order. + bool AnyStackified = false; + for (MachineOperand &Op : reverse(Insert->uses())) { + // We're only interested in explicit virtual register operands. + if (!Op.isReg() || Op.isImplicit() || !Op.isUse()) + continue; + + unsigned Reg = Op.getReg(); + + // Only consider registers with a single definition. + // TODO: Eventually we may relax this, to stackify phi transfers. + MachineInstr *Def = MRI.getUniqueVRegDef(Reg); + if (!Def) + continue; + + // There's no use in nesting implicit defs inside anything. + if (Def->getOpcode() == TargetOpcode::IMPLICIT_DEF) + continue; + + // Don't nest an INLINE_ASM def into anything, because we don't have + // constraints for $pop outputs. + if (Def->getOpcode() == TargetOpcode::INLINEASM) + continue; + + // Don't nest PHIs inside of anything. + if (Def->getOpcode() == TargetOpcode::PHI) + continue; + + // Argument instructions represent live-in registers and not real + // instructions. + if (Def->getOpcode() == WebAssembly::ARGUMENT_I32 || + Def->getOpcode() == WebAssembly::ARGUMENT_I64 || + Def->getOpcode() == WebAssembly::ARGUMENT_F32 || + Def->getOpcode() == WebAssembly::ARGUMENT_F64) + continue; + + // Single-use expression trees require defs that have one use. + // TODO: Eventually we'll relax this, to take advantage of set_local + // returning its result. + if (!MRI.hasOneUse(Reg)) + continue; + + // For now, be conservative and don't look across block boundaries. + // TODO: Be more aggressive? + if (Def->getParent() != &MBB) + continue; + + // Don't move instructions that have side effects or memory dependencies + // or other complications. + if (!IsSafeToMove(Def, Insert, AA, LIS, MRI)) + continue; + + Changed = true; + AnyStackified = true; + // Move the def down and nest it in the current instruction. + MBB.splice(Insert, &MBB, Def); + LIS.handleMove(Def); + MFI.stackifyVReg(Reg); + ImposeStackOrdering(Def); + Insert = Def; + } + if (AnyStackified) + ImposeStackOrdering(&MI); + } + } + + // If we used EXPR_STACK anywhere, add it to the live-in sets everywhere + // so that it never looks like a use-before-def. + if (Changed) { + MF.getRegInfo().addLiveIn(WebAssembly::EXPR_STACK); + for (MachineBasicBlock &MBB : MF) + MBB.addLiveIn(WebAssembly::EXPR_STACK); + } + +#ifndef NDEBUG + // Verify that pushes and pops are performed in FIFO order. + SmallVector<unsigned, 0> Stack; + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + for (MachineOperand &MO : reverse(MI.explicit_operands())) { + if (!MO.isReg()) + continue; + unsigned VReg = MO.getReg(); + + // Don't stackify physregs like SP or FP. + if (!TargetRegisterInfo::isVirtualRegister(VReg)) + continue; + + if (MFI.isVRegStackified(VReg)) { + if (MO.isDef()) + Stack.push_back(VReg); + else + assert(Stack.pop_back_val() == VReg); + } + } + } + // TODO: Generalize this code to support keeping values on the stack across + // basic block boundaries. + assert(Stack.empty()); + } +#endif + + return Changed; +} diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp index 385c40bf6693..dcada45f96d1 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp @@ -43,7 +43,7 @@ WebAssemblyRegisterInfo::getCalleeSavedRegs(const MachineFunction *) const { } BitVector -WebAssemblyRegisterInfo::getReservedRegs(const MachineFunction &MF) const { +WebAssemblyRegisterInfo::getReservedRegs(const MachineFunction & /*MF*/) const { BitVector Reserved(getNumRegs()); for (auto Reg : {WebAssembly::SP32, WebAssembly::SP64, WebAssembly::FP32, WebAssembly::FP64}) @@ -52,9 +52,37 @@ WebAssemblyRegisterInfo::getReservedRegs(const MachineFunction &MF) const { } void WebAssemblyRegisterInfo::eliminateFrameIndex( - MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, - RegScavenger *RS) const { - llvm_unreachable("WebAssemblyRegisterInfo::eliminateFrameIndex"); // FIXME + MachineBasicBlock::iterator II, int SPAdj, + unsigned FIOperandNum, RegScavenger * /*RS*/) const { + assert(SPAdj == 0); + MachineInstr &MI = *II; + + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); + const MachineFrameInfo& MFI = *MF.getFrameInfo(); + int FrameOffset = MFI.getStackSize() + MFI.getObjectOffset(FrameIndex); + + if (MI.mayLoadOrStore()) { + // If this is a load or store, make it relative to SP and fold the frame + // offset directly in + assert(MI.getOperand(1).getImm() == 0 && + "Can't eliminate FI yet if offset is already set"); + MI.getOperand(1).setImm(FrameOffset); + MI.getOperand(2).ChangeToRegister(WebAssembly::SP32, /*IsDef=*/false); + } else { + // Otherwise create an i32.add SP, offset and make it the operand + auto &MRI = MF.getRegInfo(); + const auto *TII = MF.getSubtarget().getInstrInfo(); + + unsigned OffsetReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(WebAssembly::CONST_I32), OffsetReg) + .addImm(FrameOffset); + BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(WebAssembly::ADD_I32), OffsetReg) + .addReg(WebAssembly::SP32) + .addReg(OffsetReg); + MI.getOperand(FIOperandNum).ChangeToRegister(OffsetReg, /*IsDef=*/false); + } } unsigned @@ -67,21 +95,11 @@ WebAssemblyRegisterInfo::getFrameRegister(const MachineFunction &MF) const { return Regs[TFI->hasFP(MF)][TT.isArch64Bit()]; } -bool WebAssemblyRegisterInfo::canRealignStack(const MachineFunction &MF) const { - return !MF.getFunction()->hasFnAttribute("no-realign-stack"); -} - -// FIXME: share this with other backends with identical implementation? -bool WebAssemblyRegisterInfo::needsStackRealignment( - const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - const WebAssemblyFrameLowering *TFI = getFrameLowering(MF); - const Function *F = MF.getFunction(); - unsigned StackAlign = TFI->getStackAlignment(); - bool requiresRealignment = - ((MFI->getMaxAlignment() > StackAlign) || - F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::StackAlignment)); - - return requiresRealignment && canRealignStack(MF); +const TargetRegisterClass * +WebAssemblyRegisterInfo::getPointerRegClass(const MachineFunction &MF, + unsigned Kind) const { + assert(Kind == 0 && "Only one kind of pointer on WebAssembly"); + if (MF.getSubtarget<WebAssemblySubtarget>().hasAddr64()) + return &WebAssembly::I64RegClass; + return &WebAssembly::I32RegClass; } diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h index dbdb9d0457af..ad1d71eebf22 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h @@ -42,9 +42,9 @@ public: // Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const override; - // Base pointer (stack realignment) support. - bool canRealignStack(const MachineFunction &MF) const; - bool needsStackRealignment(const MachineFunction &MF) const override; + const TargetRegisterClass * + getPointerRegClass(const MachineFunction &MF, + unsigned Kind = 0) const override; }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td index 2ba42eb94a40..80a83fa76b57 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td @@ -33,22 +33,26 @@ def FP64 : WebAssemblyReg<"%FP64">; def SP32 : WebAssemblyReg<"%SP32">; def SP64 : WebAssemblyReg<"%SP64">; -// TODO(jfb) The following comes from NVPTX. Is it really needed, or can we do -// away with it? Try deleting once the backend works. -// WebAssembly uses virtual registers, but the backend defines a few physical -// registers here to keep SDAG and the MachineInstr layers happy. -foreach i = 0-4 in { - def I#i : WebAssemblyReg<"%i."#i>; // i32 - def L#i : WebAssemblyReg<"%l."#i>; // i64 - def F#i : WebAssemblyReg<"%f."#i>; // f32 - def D#i : WebAssemblyReg<"%d."#i>; // f64 -} +// The register allocation framework requires register classes have at least +// one register, so we define a few for the floating point register classes +// since we otherwise don't need a physical register in those classes. +def F32_0 : WebAssemblyReg<"%f32.0">; +def F64_0 : WebAssemblyReg<"%f64.0">; + +// The expression stack "register". This is an opaque entity which serves to +// order uses and defs that must remain in LIFO order. +def EXPR_STACK : WebAssemblyReg<"STACK">; + +// The incoming arguments "register". This is an opaque entity which serves to +// order the ARGUMENT instructions that are emulating live-in registers and +// must not be scheduled below other instructions. +def ARGUMENTS : WebAssemblyReg<"ARGUMENTS">; //===----------------------------------------------------------------------===// // Register classes //===----------------------------------------------------------------------===// -def Int32 : WebAssemblyRegClass<[i32], 32, (add (sequence "I%u", 0, 4), SP32)>; -def Int64 : WebAssemblyRegClass<[i64], 64, (add (sequence "L%u", 0, 4), SP64)>; -def Float32 : WebAssemblyRegClass<[f32], 32, (add (sequence "F%u", 0, 4))>; -def Float64 : WebAssemblyRegClass<[f64], 64, (add (sequence "D%u", 0, 4))>; +def I32 : WebAssemblyRegClass<[i32], 32, (add FP32, SP32)>; +def I64 : WebAssemblyRegClass<[i64], 64, (add FP64, SP64)>; +def F32 : WebAssemblyRegClass<[f32], 32, (add F32_0)>; +def F64 : WebAssemblyRegClass<[f64], 64, (add F64_0)>; diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp new file mode 100644 index 000000000000..4e08b2b079eb --- /dev/null +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp @@ -0,0 +1,124 @@ +//===-- WebAssemblyStoreResults.cpp - Optimize using store result values --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements an optimization pass using store result values. +/// +/// WebAssembly's store instructions return the stored value. This is to enable +/// an optimization wherein uses of the stored value can be replaced by uses of +/// the store's result value, making the stored value register more likely to +/// be single-use, thus more likely to be useful to register stackifying, and +/// potentially also exposing the store to register stackifying. These both can +/// reduce get_local/set_local traffic. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-store-results" + +namespace { +class WebAssemblyStoreResults final : public MachineFunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyStoreResults() : MachineFunctionPass(ID) {} + + const char *getPassName() const override { + return "WebAssembly Store Results"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addPreserved<MachineBlockFrequencyInfo>(); + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +private: +}; +} // end anonymous namespace + +char WebAssemblyStoreResults::ID = 0; +FunctionPass *llvm::createWebAssemblyStoreResults() { + return new WebAssemblyStoreResults(); +} + +bool WebAssemblyStoreResults::runOnMachineFunction(MachineFunction &MF) { + DEBUG({ + dbgs() << "********** Store Results **********\n" + << "********** Function: " << MF.getName() << '\n'; + }); + + const MachineRegisterInfo &MRI = MF.getRegInfo(); + MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); + bool Changed = false; + + assert(MRI.isSSA() && "StoreResults depends on SSA form"); + + for (auto &MBB : MF) { + DEBUG(dbgs() << "Basic Block: " << MBB.getName() << '\n'); + for (auto &MI : MBB) + switch (MI.getOpcode()) { + default: + break; + case WebAssembly::STORE8_I32: + case WebAssembly::STORE16_I32: + case WebAssembly::STORE8_I64: + case WebAssembly::STORE16_I64: + case WebAssembly::STORE32_I64: + case WebAssembly::STORE_F32: + case WebAssembly::STORE_F64: + case WebAssembly::STORE_I32: + case WebAssembly::STORE_I64: + unsigned ToReg = MI.getOperand(0).getReg(); + unsigned FromReg = MI.getOperand(3).getReg(); + for (auto I = MRI.use_begin(FromReg), E = MRI.use_end(); I != E;) { + MachineOperand &O = *I++; + MachineInstr *Where = O.getParent(); + if (Where->getOpcode() == TargetOpcode::PHI) { + // PHIs use their operands on their incoming CFG edges rather than + // in their parent blocks. Get the basic block paired with this use + // of FromReg and check that MI's block dominates it. + MachineBasicBlock *Pred = + Where->getOperand(&O - &Where->getOperand(0) + 1).getMBB(); + if (!MDT.dominates(&MBB, Pred)) + continue; + } else { + // For a non-PHI, check that MI dominates the instruction in the + // normal way. + if (&MI == Where || !MDT.dominates(&MI, Where)) + continue; + } + Changed = true; + DEBUG(dbgs() << "Setting operand " << O << " in " << *Where + << " from " << MI << "\n"); + O.setReg(ToReg); + // If the store's def was previously dead, it is no longer. But the + // dead flag shouldn't be set yet. + assert(!MI.getOperand(0).isDead() && "Dead flag set on store result"); + } + } + } + + return Changed; +} diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp index 3d9e7aacbfbf..cb2d5a63a19f 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp @@ -46,3 +46,4 @@ WebAssemblySubtarget::WebAssemblySubtarget(const Triple &TT, TLInfo(TM, *this) {} bool WebAssemblySubtarget::enableMachineScheduler() const { return true; } +bool WebAssemblySubtarget::useAA() const { return true; } diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h index 6f1761940930..f530a290fa0e 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h @@ -61,9 +61,15 @@ public: const WebAssemblyTargetLowering *getTargetLowering() const override { return &TLInfo; } + const WebAssemblyInstrInfo *getInstrInfo() const override { + return &InstrInfo; + } + const WebAssemblyRegisterInfo *getRegisterInfo() const override { + return &getInstrInfo()->getRegisterInfo(); + } const Triple &getTargetTriple() const { return TargetTriple; } bool enableMachineScheduler() const override; - bool useAA() const override { return true; } + bool useAA() const override; // Predicates used by WebAssemblyInstrInfo.td. bool hasAddr64() const { return TargetTriple.isArch64Bit(); } diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index 6f93248bd13c..e31ea46de9f5 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -45,11 +45,16 @@ WebAssemblyTargetMachine::WebAssemblyTargetMachine( const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : LLVMTargetMachine(T, TT.isArch64Bit() - ? "e-p:64:64-i64:64-v128:8:128-n32:64-S128" - : "e-p:32:32-i64:64-v128:8:128-n32:64-S128", + : LLVMTargetMachine(T, TT.isArch64Bit() ? "e-p:64:64-i64:64-n32:64-S128" + : "e-p:32:32-i64:64-n32:64-S128", TT, CPU, FS, Options, RM, CM, OL), TLOF(make_unique<WebAssemblyTargetObjectFile>()) { + // WebAssembly type-checks expressions, but a noreturn function with a return + // type that doesn't match the context will cause a check failure. So we lower + // LLVM 'unreachable' to ISD::TRAP and then lower that to WebAssembly's + // 'unreachable' expression which is meant for that case. + this->Options.TrapUnreachable = true; + initAsmInfo(); // We need a reducible CFG, so disable some optimizations which tend to @@ -77,7 +82,7 @@ WebAssemblyTargetMachine::getSubtargetImpl(const Function &F) const { // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); - I = make_unique<WebAssemblySubtarget>(TargetTriple, CPU, FS, *this); + I = llvm::make_unique<WebAssemblySubtarget>(TargetTriple, CPU, FS, *this); } return I.get(); } @@ -94,23 +99,18 @@ public: } FunctionPass *createTargetRegisterAllocator(bool) override; - void addFastRegAlloc(FunctionPass *RegAllocPass) override; - void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; void addIRPasses() override; - bool addPreISel() override; bool addInstSelector() override; bool addILPOpts() override; void addPreRegAlloc() override; - void addRegAllocPasses(bool Optimized); void addPostRegAlloc() override; - void addPreSched2() override; void addPreEmitPass() override; }; } // end anonymous namespace TargetIRAnalysis WebAssemblyTargetMachine::getTargetIRAnalysis() { - return TargetIRAnalysis([this](Function &F) { + return TargetIRAnalysis([this](const Function &F) { return TargetTransformInfo(WebAssemblyTTIImpl(this, F)); }); } @@ -124,50 +124,86 @@ FunctionPass *WebAssemblyPassConfig::createTargetRegisterAllocator(bool) { return nullptr; // No reg alloc } -void WebAssemblyPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { - assert(!RegAllocPass && "WebAssembly uses no regalloc!"); - addRegAllocPasses(false); -} - -void WebAssemblyPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { - assert(!RegAllocPass && "WebAssembly uses no regalloc!"); - addRegAllocPasses(true); -} - //===----------------------------------------------------------------------===// // The following functions are called from lib/CodeGen/Passes.cpp to modify // the CodeGen pass sequence. //===----------------------------------------------------------------------===// void WebAssemblyPassConfig::addIRPasses() { - // FIXME: the default for this option is currently POSIX, whereas - // WebAssembly's MVP should default to Single. if (TM->Options.ThreadModel == ThreadModel::Single) + // In "single" mode, atomics get lowered to non-atomics. addPass(createLowerAtomicPass()); else // Expand some atomic operations. WebAssemblyTargetLowering has hooks which // control specifically what gets lowered. addPass(createAtomicExpandPass(TM)); + // Optimize "returned" function attributes. + addPass(createWebAssemblyOptimizeReturned()); + TargetPassConfig::addIRPasses(); } -bool WebAssemblyPassConfig::addPreISel() { return false; } - bool WebAssemblyPassConfig::addInstSelector() { + (void)TargetPassConfig::addInstSelector(); addPass( createWebAssemblyISelDag(getWebAssemblyTargetMachine(), getOptLevel())); + // Run the argument-move pass immediately after the ScheduleDAG scheduler + // so that we can fix up the ARGUMENT instructions before anything else + // sees them in the wrong place. + addPass(createWebAssemblyArgumentMove()); return false; } -bool WebAssemblyPassConfig::addILPOpts() { return true; } +bool WebAssemblyPassConfig::addILPOpts() { + (void)TargetPassConfig::addILPOpts(); + return true; +} + +void WebAssemblyPassConfig::addPreRegAlloc() { + TargetPassConfig::addPreRegAlloc(); -void WebAssemblyPassConfig::addPreRegAlloc() {} + // Prepare store instructions for register stackifying. + addPass(createWebAssemblyStoreResults()); +} -void WebAssemblyPassConfig::addRegAllocPasses(bool Optimized) {} +void WebAssemblyPassConfig::addPostRegAlloc() { + // TODO: The following CodeGen passes don't currently support code containing + // virtual registers. Consider removing their restrictions and re-enabling + // them. + // + // We use our own PrologEpilogInserter which is very slightly modified to + // tolerate virtual registers. + disablePass(&PrologEpilogCodeInserterID); + // Fails with: should be run after register allocation. + disablePass(&MachineCopyPropagationID); + + // Mark registers as representing wasm's expression stack. + addPass(createWebAssemblyRegStackify()); + + // Run the register coloring pass to reduce the total number of registers. + addPass(createWebAssemblyRegColoring()); + + TargetPassConfig::addPostRegAlloc(); + + // Run WebAssembly's version of the PrologEpilogInserter. Target-independent + // PEI runs after PostRegAlloc and after ShrinkWrap. Putting it here will run + // PEI before ShrinkWrap but otherwise in the same position in the order. + addPass(createWebAssemblyPEI()); +} -void WebAssemblyPassConfig::addPostRegAlloc() {} +void WebAssemblyPassConfig::addPreEmitPass() { + TargetPassConfig::addPreEmitPass(); -void WebAssemblyPassConfig::addPreSched2() {} + // Put the CFG in structured form; insert BLOCK and LOOP markers. + addPass(createWebAssemblyCFGStackify()); -void WebAssemblyPassConfig::addPreEmitPass() {} + // Lower br_unless into br_if. + addPass(createWebAssemblyLowerBrUnless()); + + // Create a mapping from LLVM CodeGen virtual registers to wasm registers. + addPass(createWebAssemblyRegNumbering()); + + // Perform the very last peephole optimizations on the code. + addPass(createWebAssemblyPeephole()); +} diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp new file mode 100644 index 000000000000..74e33b93e00d --- /dev/null +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp @@ -0,0 +1,24 @@ +//===-- WebAssemblyTargetObjectFile.cpp - WebAssembly Object Info ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file defines the functions of the WebAssembly-specific subclass +/// of TargetLoweringObjectFile. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssemblyTargetObjectFile.h" +#include "WebAssemblyTargetMachine.h" +using namespace llvm; + +void WebAssemblyTargetObjectFile::Initialize(MCContext &Ctx, + const TargetMachine &TM) { + TargetLoweringObjectFileELF::Initialize(Ctx, TM); + InitializeELF(TM.Options.UseInitArray); +} diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h index ee78b945ada2..39e50c9c575d 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h @@ -16,50 +16,13 @@ #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYTARGETOBJECTFILE_H #define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYTARGETOBJECTFILE_H -#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" namespace llvm { -class GlobalVariable; - -class WebAssemblyTargetObjectFile final : public TargetLoweringObjectFile { +class WebAssemblyTargetObjectFile final : public TargetLoweringObjectFileELF { public: - WebAssemblyTargetObjectFile() { - TextSection = nullptr; - DataSection = nullptr; - BSSSection = nullptr; - ReadOnlySection = nullptr; - - StaticCtorSection = nullptr; - StaticDtorSection = nullptr; - LSDASection = nullptr; - EHFrameSection = nullptr; - DwarfAbbrevSection = nullptr; - DwarfInfoSection = nullptr; - DwarfLineSection = nullptr; - DwarfFrameSection = nullptr; - DwarfPubTypesSection = nullptr; - DwarfDebugInlineSection = nullptr; - DwarfStrSection = nullptr; - DwarfLocSection = nullptr; - DwarfARangesSection = nullptr; - DwarfRangesSection = nullptr; - } - - MCSection *getSectionForConstant(SectionKind Kind, - const Constant *C) const override { - return ReadOnlySection; - } - - MCSection *getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler &Mang, - const TargetMachine &TM) const override { - return DataSection; - } - - MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler &Mang, - const TargetMachine &TM) const override; + void Initialize(MCContext &Ctx, const TargetMachine &TM) override; }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp index fa88ed526df2..356631711921 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp @@ -21,8 +21,7 @@ using namespace llvm; #define DEBUG_TYPE "wasmtti" TargetTransformInfo::PopcntSupportKind -WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) { +WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const { assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); - // TODO: Make Math.popcount32 happen in WebAssembly. - return TTI::PSK_Software; + return TargetTransformInfo::PSK_FastHardware; } diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h index 7ffb6047b963..26dc388cc922 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h @@ -38,7 +38,7 @@ class WebAssemblyTTIImpl final : public BasicTTIImplBase<WebAssemblyTTIImpl> { const WebAssemblyTargetLowering *getTLI() const { return TLI; } public: - WebAssemblyTTIImpl(const WebAssemblyTargetMachine *TM, Function &F) + WebAssemblyTTIImpl(const WebAssemblyTargetMachine *TM, const Function &F) : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} @@ -54,7 +54,7 @@ public: // TODO: Implement more Scalar TTI for WebAssembly - TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); + TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const; /// @} diff --git a/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt b/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt new file mode 100644 index 000000000000..ee9d060f339e --- /dev/null +++ b/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt @@ -0,0 +1,311 @@ +# Tests which are known to fail from the GCC torture test suite. + +# Core dump. +920908-1.c +pr38151.c +va-arg-22.c + +# TargetRegisterInfo.h:315: static unsigned int llvm::TargetRegisterInfo::virtReg2Index(unsigned int): Assertion `isVirtualRegister(Reg) && "Not a virtual register"' failed. +struct-ret-1.c +va-arg-11.c +va-arg-21.c +va-arg-24.c +va-arg-trap-1.c + +# WebAssemblyCFGStackify.cpp:211: void SortBlocks(llvm::MachineFunction&, const llvm::MachineLoopInfo&): Assertion `L->contains( MLI.getLoopFor(&*prev(MachineFunction::iterator(&MBB)))) && "Loop isn't contiguous"' failed. +20000815-1.c +20010129-1.c +930628-1.c +980707-1.c + +# WebAssemblyISelLowering.cpp:316: virtual llvm::SDValue llvm::WebAssemblyTargetLowering::LowerCall(llvm::TargetLowering::CallLoweringInfo&, llvm::SmallVectorImpl<llvm::SDValue>&) const: Assertion `!Out.Flags.isByVal() && "byval is not valid for return values"' failed. +20030914-2.c +20040703-1.c +20081117-1.c +920625-1.c +931004-11.c +931004-13.c +980223.c +bitfld-5.c +complex-7.c +pr38969.c +pr51323.c +pr52129.c +pr57130.c + +# These were previously "Cannot select FrameIndex." Now most of them fail +# because they contain call frame pseudos (e.g. call a vararg func), +# frame pointers, or similar. This list will be updated again soon. +20000519-1.c +20000706-4.c +20000706-5.c +20000801-2.c +20000801-4.c +20011126-2.c + +20020529-1.c +20021024-1.c + +20030828-1.c +20030914-1.c + +20040302-1.c +20040625-1.c +20040823-1.c + +20041113-1.c + +20041214-1.c + +20050826-2.c + +20071213-1.c + +20080506-2.c +20080519-1.c + +20081103-1.c +20090113-1.c +20090113-2.c +20090113-3.c + +20090623-1.c + +920501-6.c +920501-8.c +920726-1.c +930518-1.c + +931004-10.c +931004-12.c +931004-14.c +931004-2.c +931004-4.c +931004-6.c +931004-8.c + +980205.c +980608-1.c +980709-1.c +980716-1.c +990127-1.c + +991216-2.c + +#cbrt.c +complex-5.c +complex-6.c + +enum-3.c +fprintf-chk-1.c +frame-address.c +loop-15.c +loop-ivopts-2.c +mayalias-3.c + +multi-ix.c + +pr20466-1.c + + +pr28778.c +pr28982b.c + +pr30778.c +pr31448-2.c +pr31448.c + +pr33870-1.c +pr33870.c + +pr38051.c + +pr39100.c + +pr39339.c +pr40022.c +pr40657.c + +pr43987.c + +pr44575.c + +pr44942.c +pr46309.c +pr47538.c +pr47925.c + +pr49390.c +pr49419.c + +#pr51877.c + +#pr52979-1.c +#pr52979-2.c +pr53645-2.c +pr53645.c + +pr56205.c + +pr56866.c + +pr57876.c +pr58277-1.c + +pr59643.c + +printf-chk-1.c +pta-field-1.c +pta-field-2.c + +stdarg-1.c +stdarg-2.c +stdarg-3.c +stdarg-4.c +strct-stdarg-1.c +strct-varg-1.c + +va-arg-1.c +va-arg-10.c +va-arg-12.c +va-arg-13.c +va-arg-14.c +va-arg-15.c +va-arg-16.c +va-arg-17.c +va-arg-18.c +va-arg-19.c +va-arg-2.c +va-arg-20.c +va-arg-23.c +va-arg-26.c +va-arg-4.c +va-arg-5.c +va-arg-6.c +va-arg-7.c +va-arg-8.c +va-arg-9.c +va-arg-pack-1.c +vfprintf-1.c +vfprintf-chk-1.c +vprintf-1.c +vprintf-chk-1.c + +# Cannot select callseq_end. +20040811-1.c +pr43220.c +vla-dealloc-1.c + +# Cannot select brind. +20071210-1.c +920501-4.c +920501-5.c + +# Cannot select BlockAddress. +comp-goto-1.c +980526-1.c +990208-1.c + +# WebAssembly hasn't implemented byval arguments. +20000412-3.c +20000419-1.c +20000706-1.c +20000706-2.c +20000707-1.c +20000717-1.c +20000717-5.c +20000808-1.c +20010605-2.c +20011113-1.c +20020215-1.c +20020810-1.c +20021118-1.c +20040707-1.c +20040709-1.c +20040709-2.c +20041201-1.c +20050713-1.c +20070614-1.c +920908-2.c +921112-1.c +921117-1.c +921123-2.c +921204-1.c +930126-1.c +930208-1.c +931004-5.c +931004-9.c +931031-1.c +950607-2.c +960416-1.c +990525-1.c +991118-1.c +bf64-1.c +complex-1.c +complex-2.c +pr15262-2.c +pr20621-1.c +pr23135.c +pr30185.c +pr42248.c + +# unimplemented operation lowering. +20010122-1.c +20030323-1.c +20030811-1.c +pr17377.c + +# Error: invalid output constraint '=t' in asm. +990413-2.c +990826-0.c + +# Error: __builtin_setjmp / __builtin_longjmp is not supported for the current target. +built-in-setjmp.c +pr60003.c + +# Error in the program / unsupported by Clang. +scal-to-vec1.c +scal-to-vec2.c +scal-to-vec3.c +20000822-1.c +20010209-1.c +20010605-1.c +20030501-1.c +20040520-1.c +20061220-1.c +20090219-1.c +920415-1.c +920428-2.c +920501-7.c +920612-2.c +920721-4.c +921017-1.c +921215-1.c +931002-1.c +comp-goto-2.c +nest-align-1.c +nest-stdar-1.c +nestfunc-1.c +nestfunc-2.c +nestfunc-3.c +nestfunc-5.c +nestfunc-6.c +nestfunc-7.c +pr22061-3.c +pr22061-4.c +pr24135.c +pr51447.c +20020412-1.c +20040308-1.c +20040423-1.c +20041218-2.c +20070919-1.c +align-nest.c +pr41935.c +20050107-1.c +20050119-1.c +20050119-2.c +920302-1.c +920501-3.c +920728-1.c +pr28865.c |