diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2015-12-30 13:13:10 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2015-12-30 13:13:10 +0000 |
commit | 7d523365ff1a3cc95bc058b33102500f61e8166d (patch) | |
tree | b466a4817f79516eb1df8eae92bccf62ecc84003 /contrib/llvm/lib/Target/Hexagon | |
parent | e3b65fde506060bec5cd110fcf03b440bd0eea1d (diff) | |
parent | dd58ef019b700900793a1eb48b52123db01b654e (diff) |
Update llvm to trunk r256633.
Notes
Notes:
svn path=/projects/clang380-import/; revision=292941
Diffstat (limited to 'contrib/llvm/lib/Target/Hexagon')
84 files changed, 22946 insertions, 3783 deletions
diff --git a/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp new file mode 100644 index 000000000000..a8622a96527c --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp @@ -0,0 +1,2152 @@ +//===-- HexagonAsmParser.cpp - Parse Hexagon asm to MCInst instructions----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mcasmparser" + +#include "Hexagon.h" +#include "HexagonRegisterInfo.h" +#include "HexagonTargetStreamer.h" +#include "MCTargetDesc/HexagonBaseInfo.h" +#include "MCTargetDesc/HexagonMCELFStreamer.h" +#include "MCTargetDesc/HexagonMCChecker.h" +#include "MCTargetDesc/HexagonMCExpr.h" +#include "MCTargetDesc/HexagonMCShuffler.h" +#include "MCTargetDesc/HexagonMCTargetDesc.h" +#include "MCTargetDesc/HexagonMCAsmInfo.h" +#include "MCTargetDesc/HexagonShuffler.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCELFStreamer.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCTargetAsmParser.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +#include <sstream> + +using namespace llvm; + +static cl::opt<bool> EnableFutureRegs("mfuture-regs", + cl::desc("Enable future registers")); + +static cl::opt<bool> WarnMissingParenthesis("mwarn-missing-parenthesis", +cl::desc("Warn for missing parenthesis around predicate registers"), +cl::init(true)); +static cl::opt<bool> ErrorMissingParenthesis("merror-missing-parenthesis", +cl::desc("Error for missing parenthesis around predicate registers"), +cl::init(false)); +static cl::opt<bool> WarnSignedMismatch("mwarn-sign-mismatch", +cl::desc("Warn for mismatching a signed and unsigned value"), +cl::init(true)); +static cl::opt<bool> WarnNoncontigiousRegister("mwarn-noncontigious-register", +cl::desc("Warn for register names that arent contigious"), +cl::init(true)); +static cl::opt<bool> ErrorNoncontigiousRegister("merror-noncontigious-register", +cl::desc("Error for register names that aren't contigious"), +cl::init(false)); + + +namespace { +struct HexagonOperand; + +class HexagonAsmParser : public MCTargetAsmParser { + + HexagonTargetStreamer &getTargetStreamer() { + MCTargetStreamer &TS = *Parser.getStreamer().getTargetStreamer(); + return static_cast<HexagonTargetStreamer &>(TS); + } + + MCAsmParser &Parser; + MCAssembler *Assembler; + MCInstrInfo const &MCII; + MCInst MCB; + bool InBrackets; + + MCAsmParser &getParser() const { return Parser; } + MCAssembler *getAssembler() const { return Assembler; } + MCAsmLexer &getLexer() const { return Parser.getLexer(); } + + bool equalIsAsmAssignment() override { return false; } + bool isLabel(AsmToken &Token) override; + + void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); } + bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } + bool ParseDirectiveFalign(unsigned Size, SMLoc L); + + virtual bool ParseRegister(unsigned &RegNo, + SMLoc &StartLoc, + SMLoc &EndLoc) override; + bool ParseDirectiveSubsection(SMLoc L); + bool ParseDirectiveValue(unsigned Size, SMLoc L); + bool ParseDirectiveComm(bool IsLocal, SMLoc L); + bool RegisterMatchesArch(unsigned MatchNum) const; + + bool matchBundleOptions(); + bool handleNoncontigiousRegister(bool Contigious, SMLoc &Loc); + bool finishBundle(SMLoc IDLoc, MCStreamer &Out); + void canonicalizeImmediates(MCInst &MCI); + bool matchOneInstruction(MCInst &MCB, SMLoc IDLoc, + OperandVector &InstOperands, uint64_t &ErrorInfo, + bool MatchingInlineAsm, bool &MustExtend); + + bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, MCStreamer &Out, + uint64_t &ErrorInfo, bool MatchingInlineAsm) override; + + unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind) override; + void OutOfRange(SMLoc IDLoc, long long Val, long long Max); + int processInstruction(MCInst &Inst, OperandVector const &Operands, + SMLoc IDLoc, bool &MustExtend); + + // Check if we have an assembler and, if so, set the ELF e_header flags. + void chksetELFHeaderEFlags(unsigned flags) { + if (getAssembler()) + getAssembler()->setELFHeaderEFlags(flags); + } + +/// @name Auto-generated Match Functions +/// { + +#define GET_ASSEMBLER_HEADER +#include "HexagonGenAsmMatcher.inc" + + /// } + +public: + HexagonAsmParser(const MCSubtargetInfo &_STI, MCAsmParser &_Parser, + const MCInstrInfo &MII, const MCTargetOptions &Options) + : MCTargetAsmParser(Options, _STI), Parser(_Parser), + MCII (MII), MCB(HexagonMCInstrInfo::createBundle()), InBrackets(false) { + setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits())); + + MCAsmParserExtension::Initialize(_Parser); + + Assembler = nullptr; + // FIXME: need better way to detect AsmStreamer (upstream removed getKind()) + if (!Parser.getStreamer().hasRawTextSupport()) { + MCELFStreamer *MES = static_cast<MCELFStreamer *>(&Parser.getStreamer()); + Assembler = &MES->getAssembler(); + } + } + + bool mustExtend(OperandVector &Operands); + bool splitIdentifier(OperandVector &Operands); + bool parseOperand(OperandVector &Operands); + bool parseInstruction(OperandVector &Operands); + bool implicitExpressionLocation(OperandVector &Operands); + bool parseExpressionOrOperand(OperandVector &Operands); + bool parseExpression(MCExpr const *& Expr); + virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, OperandVector &Operands) override + { + llvm_unreachable("Unimplemented"); + } + virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + AsmToken ID, OperandVector &Operands) override; + + virtual bool ParseDirective(AsmToken DirectiveID) override; +}; + +/// HexagonOperand - Instances of this class represent a parsed Hexagon machine +/// instruction. +struct HexagonOperand : public MCParsedAsmOperand { + enum KindTy { Token, Immediate, Register } Kind; + + SMLoc StartLoc, EndLoc; + + struct TokTy { + const char *Data; + unsigned Length; + }; + + struct RegTy { + unsigned RegNum; + }; + + struct ImmTy { + const MCExpr *Val; + bool MustExtend; + }; + + struct InstTy { + OperandVector *SubInsts; + }; + + union { + struct TokTy Tok; + struct RegTy Reg; + struct ImmTy Imm; + }; + + HexagonOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} + +public: + HexagonOperand(const HexagonOperand &o) : MCParsedAsmOperand() { + Kind = o.Kind; + StartLoc = o.StartLoc; + EndLoc = o.EndLoc; + switch (Kind) { + case Register: + Reg = o.Reg; + break; + case Immediate: + Imm = o.Imm; + break; + case Token: + Tok = o.Tok; + break; + } + } + + /// getStartLoc - Get the location of the first token of this operand. + SMLoc getStartLoc() const { return StartLoc; } + + /// getEndLoc - Get the location of the last token of this operand. + SMLoc getEndLoc() const { return EndLoc; } + + unsigned getReg() const { + assert(Kind == Register && "Invalid access!"); + return Reg.RegNum; + } + + const MCExpr *getImm() const { + assert(Kind == Immediate && "Invalid access!"); + return Imm.Val; + } + + bool isToken() const { return Kind == Token; } + bool isImm() const { return Kind == Immediate; } + bool isMem() const { llvm_unreachable("No isMem"); } + bool isReg() const { return Kind == Register; } + + bool CheckImmRange(int immBits, int zeroBits, bool isSigned, + bool isRelocatable, bool Extendable) const { + if (Kind == Immediate) { + const MCExpr *myMCExpr = getImm(); + if (Imm.MustExtend && !Extendable) + return false; + int64_t Res; + if (myMCExpr->evaluateAsAbsolute(Res)) { + int bits = immBits + zeroBits; + // Field bit range is zerobits + bits + // zeroBits must be 0 + if (Res & ((1 << zeroBits) - 1)) + return false; + if (isSigned) { + if (Res < (1LL << (bits - 1)) && Res >= -(1LL << (bits - 1))) + return true; + } else { + if (bits == 64) + return true; + if (Res >= 0) + return ((uint64_t)Res < (uint64_t)(1ULL << bits)) ? true : false; + else { + const int64_t high_bit_set = 1ULL << 63; + const uint64_t mask = (high_bit_set >> (63 - bits)); + return (((uint64_t)Res & mask) == mask) ? true : false; + } + } + } else if (myMCExpr->getKind() == MCExpr::SymbolRef && isRelocatable) + return true; + else if (myMCExpr->getKind() == MCExpr::Binary || + myMCExpr->getKind() == MCExpr::Unary) + return true; + } + return false; + } + + bool isf32Ext() const { return false; } + bool iss32Imm() const { return CheckImmRange(32, 0, true, true, false); } + bool iss8Imm() const { return CheckImmRange(8, 0, true, false, false); } + bool iss8Imm64() const { return CheckImmRange(8, 0, true, true, false); } + bool iss7Imm() const { return CheckImmRange(7, 0, true, false, false); } + bool iss6Imm() const { return CheckImmRange(6, 0, true, false, false); } + bool iss4Imm() const { return CheckImmRange(4, 0, true, false, false); } + bool iss4_0Imm() const { return CheckImmRange(4, 0, true, false, false); } + bool iss4_1Imm() const { return CheckImmRange(4, 1, true, false, false); } + bool iss4_2Imm() const { return CheckImmRange(4, 2, true, false, false); } + bool iss4_3Imm() const { return CheckImmRange(4, 3, true, false, false); } + bool iss4_6Imm() const { return CheckImmRange(4, 0, true, false, false); } + bool iss3_6Imm() const { return CheckImmRange(3, 0, true, false, false); } + bool iss3Imm() const { return CheckImmRange(3, 0, true, false, false); } + + bool isu64Imm() const { return CheckImmRange(64, 0, false, true, true); } + bool isu32Imm() const { return CheckImmRange(32, 0, false, true, false); } + bool isu26_6Imm() const { return CheckImmRange(26, 6, false, true, false); } + bool isu16Imm() const { return CheckImmRange(16, 0, false, true, false); } + bool isu16_0Imm() const { return CheckImmRange(16, 0, false, true, false); } + bool isu16_1Imm() const { return CheckImmRange(16, 1, false, true, false); } + bool isu16_2Imm() const { return CheckImmRange(16, 2, false, true, false); } + bool isu16_3Imm() const { return CheckImmRange(16, 3, false, true, false); } + bool isu11_3Imm() const { return CheckImmRange(11, 3, false, false, false); } + bool isu6_0Imm() const { return CheckImmRange(6, 0, false, false, false); } + bool isu6_1Imm() const { return CheckImmRange(6, 1, false, false, false); } + bool isu6_2Imm() const { return CheckImmRange(6, 2, false, false, false); } + bool isu6_3Imm() const { return CheckImmRange(6, 3, false, false, false); } + bool isu10Imm() const { return CheckImmRange(10, 0, false, false, false); } + bool isu9Imm() const { return CheckImmRange(9, 0, false, false, false); } + bool isu8Imm() const { return CheckImmRange(8, 0, false, false, false); } + bool isu7Imm() const { return CheckImmRange(7, 0, false, false, false); } + bool isu6Imm() const { return CheckImmRange(6, 0, false, false, false); } + bool isu5Imm() const { return CheckImmRange(5, 0, false, false, false); } + bool isu4Imm() const { return CheckImmRange(4, 0, false, false, false); } + bool isu3Imm() const { return CheckImmRange(3, 0, false, false, false); } + bool isu2Imm() const { return CheckImmRange(2, 0, false, false, false); } + bool isu1Imm() const { return CheckImmRange(1, 0, false, false, false); } + + bool ism6Imm() const { return CheckImmRange(6, 0, false, false, false); } + bool isn8Imm() const { return CheckImmRange(8, 0, false, false, false); } + + bool iss16Ext() const { return CheckImmRange(16 + 26, 0, true, true, true); } + bool iss12Ext() const { return CheckImmRange(12 + 26, 0, true, true, true); } + bool iss10Ext() const { return CheckImmRange(10 + 26, 0, true, true, true); } + bool iss9Ext() const { return CheckImmRange(9 + 26, 0, true, true, true); } + bool iss8Ext() const { return CheckImmRange(8 + 26, 0, true, true, true); } + bool iss7Ext() const { return CheckImmRange(7 + 26, 0, true, true, true); } + bool iss6Ext() const { return CheckImmRange(6 + 26, 0, true, true, true); } + bool iss11_0Ext() const { + return CheckImmRange(11 + 26, 0, true, true, true); + } + bool iss11_1Ext() const { + return CheckImmRange(11 + 26, 1, true, true, true); + } + bool iss11_2Ext() const { + return CheckImmRange(11 + 26, 2, true, true, true); + } + bool iss11_3Ext() const { + return CheckImmRange(11 + 26, 3, true, true, true); + } + + bool isu6Ext() const { return CheckImmRange(6 + 26, 0, false, true, true); } + bool isu7Ext() const { return CheckImmRange(7 + 26, 0, false, true, true); } + bool isu8Ext() const { return CheckImmRange(8 + 26, 0, false, true, true); } + bool isu9Ext() const { return CheckImmRange(9 + 26, 0, false, true, true); } + bool isu10Ext() const { return CheckImmRange(10 + 26, 0, false, true, true); } + bool isu6_0Ext() const { return CheckImmRange(6 + 26, 0, false, true, true); } + bool isu6_1Ext() const { return CheckImmRange(6 + 26, 1, false, true, true); } + bool isu6_2Ext() const { return CheckImmRange(6 + 26, 2, false, true, true); } + bool isu6_3Ext() const { return CheckImmRange(6 + 26, 3, false, true, true); } + bool isu32MustExt() const { return isImm() && Imm.MustExtend; } + + void addRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getReg())); + } + + void addImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createExpr(getImm())); + } + + void addSignedImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + MCExpr const *Expr = getImm(); + int64_t Value; + if (!Expr->evaluateAsAbsolute(Value)) { + Inst.addOperand(MCOperand::createExpr(Expr)); + return; + } + int64_t Extended = SignExtend64 (Value, 32); + if ((Extended < 0) == (Value < 0)) { + Inst.addOperand(MCOperand::createExpr(Expr)); + return; + } + // Flip bit 33 to signal signed unsigned mismatch + Extended ^= 0x100000000; + Inst.addOperand(MCOperand::createImm(Extended)); + } + + void addf32ExtOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + + void adds32ImmOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds8ImmOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds8Imm64Operands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds6ImmOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds4ImmOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds4_0ImmOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds4_1ImmOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds4_2ImmOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds4_3ImmOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds3ImmOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + + void addu64ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu32ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu26_6ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu16ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu16_0ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu16_1ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu16_2ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu16_3ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu11_3ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu10ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu9ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu8ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu7ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu6ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu6_0ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu6_1ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu6_2ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu6_3ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu5ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu4ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu3ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu2ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu1ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + + void addm6ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addn8ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + + void adds16ExtOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds12ExtOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds10ExtOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds9ExtOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds8ExtOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds6ExtOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds11_0ExtOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds11_1ExtOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds11_2ExtOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds11_3ExtOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + + void addu6ExtOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu7ExtOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu8ExtOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu9ExtOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu10ExtOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu6_0ExtOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu6_1ExtOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu6_2ExtOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu6_3ExtOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu32MustExtOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + + void adds4_6ImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::createImm(CE->getValue() * 64)); + } + + void adds3_6ImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::createImm(CE->getValue() * 64)); + } + + StringRef getToken() const { + assert(Kind == Token && "Invalid access!"); + return StringRef(Tok.Data, Tok.Length); + } + + virtual void print(raw_ostream &OS) const; + + static std::unique_ptr<HexagonOperand> CreateToken(StringRef Str, SMLoc S) { + HexagonOperand *Op = new HexagonOperand(Token); + Op->Tok.Data = Str.data(); + Op->Tok.Length = Str.size(); + Op->StartLoc = S; + Op->EndLoc = S; + return std::unique_ptr<HexagonOperand>(Op); + } + + static std::unique_ptr<HexagonOperand> CreateReg(unsigned RegNum, SMLoc S, + SMLoc E) { + HexagonOperand *Op = new HexagonOperand(Register); + Op->Reg.RegNum = RegNum; + Op->StartLoc = S; + Op->EndLoc = E; + return std::unique_ptr<HexagonOperand>(Op); + } + + static std::unique_ptr<HexagonOperand> CreateImm(const MCExpr *Val, SMLoc S, + SMLoc E) { + HexagonOperand *Op = new HexagonOperand(Immediate); + Op->Imm.Val = Val; + Op->Imm.MustExtend = false; + Op->StartLoc = S; + Op->EndLoc = E; + return std::unique_ptr<HexagonOperand>(Op); + } +}; + +} // end anonymous namespace. + +void HexagonOperand::print(raw_ostream &OS) const { + switch (Kind) { + case Immediate: + getImm()->print(OS, nullptr); + break; + case Register: + OS << "<register R"; + OS << getReg() << ">"; + break; + case Token: + OS << "'" << getToken() << "'"; + break; + } +} + +/// @name Auto-generated Match Functions +static unsigned MatchRegisterName(StringRef Name); + +bool HexagonAsmParser::finishBundle(SMLoc IDLoc, MCStreamer &Out) { + DEBUG(dbgs() << "Bundle:"); + DEBUG(MCB.dump_pretty(dbgs())); + DEBUG(dbgs() << "--\n"); + + // Check the bundle for errors. + const MCRegisterInfo *RI = getContext().getRegisterInfo(); + HexagonMCChecker Check(MCII, getSTI(), MCB, MCB, *RI); + + bool CheckOk = HexagonMCInstrInfo::canonicalizePacket(MCII, getSTI(), + getContext(), MCB, + &Check); + + while (Check.getNextErrInfo() == true) { + unsigned Reg = Check.getErrRegister(); + Twine R(RI->getName(Reg)); + + uint64_t Err = Check.getError(); + if (Err != HexagonMCErrInfo::CHECK_SUCCESS) { + if (HexagonMCErrInfo::CHECK_ERROR_BRANCHES & Err) + Error(IDLoc, + "unconditional branch cannot precede another branch in packet"); + + if (HexagonMCErrInfo::CHECK_ERROR_NEWP & Err || + HexagonMCErrInfo::CHECK_ERROR_NEWV & Err) + Error(IDLoc, "register `" + R + + "' used with `.new' " + "but not validly modified in the same packet"); + + if (HexagonMCErrInfo::CHECK_ERROR_REGISTERS & Err) + Error(IDLoc, "register `" + R + "' modified more than once"); + + if (HexagonMCErrInfo::CHECK_ERROR_READONLY & Err) + Error(IDLoc, "cannot write to read-only register `" + R + "'"); + + if (HexagonMCErrInfo::CHECK_ERROR_LOOP & Err) + Error(IDLoc, "loop-setup and some branch instructions " + "cannot be in the same packet"); + + if (HexagonMCErrInfo::CHECK_ERROR_ENDLOOP & Err) { + Twine N(HexagonMCInstrInfo::isInnerLoop(MCB) ? '0' : '1'); + Error(IDLoc, "packet marked with `:endloop" + N + "' " + + "cannot contain instructions that modify register " + + "`" + R + "'"); + } + + if (HexagonMCErrInfo::CHECK_ERROR_SOLO & Err) + Error(IDLoc, + "instruction cannot appear in packet with other instructions"); + + if (HexagonMCErrInfo::CHECK_ERROR_NOSLOTS & Err) + Error(IDLoc, "too many slots used in packet"); + + if (Err & HexagonMCErrInfo::CHECK_ERROR_SHUFFLE) { + uint64_t Erm = Check.getShuffleError(); + + if (HexagonShuffler::SHUFFLE_ERROR_INVALID == Erm) + Error(IDLoc, "invalid instruction packet"); + else if (HexagonShuffler::SHUFFLE_ERROR_STORES == Erm) + Error(IDLoc, "invalid instruction packet: too many stores"); + else if (HexagonShuffler::SHUFFLE_ERROR_LOADS == Erm) + Error(IDLoc, "invalid instruction packet: too many loads"); + else if (HexagonShuffler::SHUFFLE_ERROR_BRANCHES == Erm) + Error(IDLoc, "too many branches in packet"); + else if (HexagonShuffler::SHUFFLE_ERROR_NOSLOTS == Erm) + Error(IDLoc, "invalid instruction packet: out of slots"); + else if (HexagonShuffler::SHUFFLE_ERROR_SLOTS == Erm) + Error(IDLoc, "invalid instruction packet: slot error"); + else if (HexagonShuffler::SHUFFLE_ERROR_ERRATA2 == Erm) + Error(IDLoc, "v60 packet violation"); + else if (HexagonShuffler::SHUFFLE_ERROR_STORE_LOAD_CONFLICT == Erm) + Error(IDLoc, "slot 0 instruction does not allow slot 1 store"); + else + Error(IDLoc, "unknown error in instruction packet"); + } + } + + unsigned Warn = Check.getWarning(); + if (Warn != HexagonMCErrInfo::CHECK_SUCCESS) { + if (HexagonMCErrInfo::CHECK_WARN_CURRENT & Warn) + Warning(IDLoc, "register `" + R + "' used with `.cur' " + "but not used in the same packet"); + else if (HexagonMCErrInfo::CHECK_WARN_TEMPORARY & Warn) + Warning(IDLoc, "register `" + R + "' used with `.tmp' " + "but not used in the same packet"); + } + } + + if (CheckOk) { + MCB.setLoc(IDLoc); + if (HexagonMCInstrInfo::bundleSize(MCB) == 0) { + assert(!HexagonMCInstrInfo::isInnerLoop(MCB)); + assert(!HexagonMCInstrInfo::isOuterLoop(MCB)); + // Empty packets are valid yet aren't emitted + return false; + } + Out.EmitInstruction(MCB, getSTI()); + } else { + // If compounding and duplexing didn't reduce the size below + // 4 or less we have a packet that is too big. + if (HexagonMCInstrInfo::bundleSize(MCB) > HEXAGON_PACKET_SIZE) { + Error(IDLoc, "invalid instruction packet: out of slots"); + return true; // Error + } + } + + return false; // No error +} + +bool HexagonAsmParser::matchBundleOptions() { + MCAsmParser &Parser = getParser(); + MCAsmLexer &Lexer = getLexer(); + while (true) { + if (!Parser.getTok().is(AsmToken::Colon)) + return false; + Lexer.Lex(); + StringRef Option = Parser.getTok().getString(); + if (Option.compare_lower("endloop0") == 0) + HexagonMCInstrInfo::setInnerLoop(MCB); + else if (Option.compare_lower("endloop1") == 0) + HexagonMCInstrInfo::setOuterLoop(MCB); + else if (Option.compare_lower("mem_noshuf") == 0) + HexagonMCInstrInfo::setMemReorderDisabled(MCB); + else if (Option.compare_lower("mem_shuf") == 0) + HexagonMCInstrInfo::setMemStoreReorderEnabled(MCB); + else + return true; + Lexer.Lex(); + } +} + +// For instruction aliases, immediates are generated rather than +// MCConstantExpr. Convert them for uniform MCExpr. +// Also check for signed/unsigned mismatches and warn +void HexagonAsmParser::canonicalizeImmediates(MCInst &MCI) { + MCInst NewInst; + NewInst.setOpcode(MCI.getOpcode()); + for (MCOperand &I : MCI) + if (I.isImm()) { + int64_t Value (I.getImm()); + if ((Value & 0x100000000) != (Value & 0x80000000)) { + // Detect flipped bit 33 wrt bit 32 and signal warning + Value ^= 0x100000000; + if (WarnSignedMismatch) + Warning (MCI.getLoc(), "Signed/Unsigned mismatch"); + } + NewInst.addOperand(MCOperand::createExpr( + MCConstantExpr::create(Value, getContext()))); + } + else + NewInst.addOperand(I); + MCI = NewInst; +} + +bool HexagonAsmParser::matchOneInstruction(MCInst &MCI, SMLoc IDLoc, + OperandVector &InstOperands, + uint64_t &ErrorInfo, + bool MatchingInlineAsm, + bool &MustExtend) { + // Perform matching with tablegen asmmatcher generated function + int result = + MatchInstructionImpl(InstOperands, MCI, ErrorInfo, MatchingInlineAsm); + if (result == Match_Success) { + MCI.setLoc(IDLoc); + MustExtend = mustExtend(InstOperands); + canonicalizeImmediates(MCI); + result = processInstruction(MCI, InstOperands, IDLoc, MustExtend); + + DEBUG(dbgs() << "Insn:"); + DEBUG(MCI.dump_pretty(dbgs())); + DEBUG(dbgs() << "\n\n"); + + MCI.setLoc(IDLoc); + } + + // Create instruction operand for bundle instruction + // Break this into a separate function Code here is less readable + // Think about how to get an instruction error to report correctly. + // SMLoc will return the "{" + switch (result) { + default: + break; + case Match_Success: + return false; + case Match_MissingFeature: + return Error(IDLoc, "invalid instruction"); + case Match_MnemonicFail: + return Error(IDLoc, "unrecognized instruction"); + case Match_InvalidOperand: + SMLoc ErrorLoc = IDLoc; + if (ErrorInfo != ~0U) { + if (ErrorInfo >= InstOperands.size()) + return Error(IDLoc, "too few operands for instruction"); + + ErrorLoc = (static_cast<HexagonOperand *>(InstOperands[ErrorInfo].get())) + ->getStartLoc(); + if (ErrorLoc == SMLoc()) + ErrorLoc = IDLoc; + } + return Error(ErrorLoc, "invalid operand for instruction"); + } + llvm_unreachable("Implement any new match types added!"); +} + +bool HexagonAsmParser::mustExtend(OperandVector &Operands) { + unsigned Count = 0; + for (std::unique_ptr<MCParsedAsmOperand> &i : Operands) + if (i->isImm()) + if (static_cast<HexagonOperand *>(i.get())->Imm.MustExtend) + ++Count; + // Multiple extenders should have been filtered by iss9Ext et. al. + assert(Count < 2 && "Multiple extenders"); + return Count == 1; +} + +bool HexagonAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, + MCStreamer &Out, + uint64_t &ErrorInfo, + bool MatchingInlineAsm) { + if (!InBrackets) { + MCB.clear(); + MCB.addOperand(MCOperand::createImm(0)); + } + HexagonOperand &FirstOperand = static_cast<HexagonOperand &>(*Operands[0]); + if (FirstOperand.isToken() && FirstOperand.getToken() == "{") { + assert(Operands.size() == 1 && "Brackets should be by themselves"); + if (InBrackets) { + getParser().Error(IDLoc, "Already in a packet"); + return true; + } + InBrackets = true; + return false; + } + if (FirstOperand.isToken() && FirstOperand.getToken() == "}") { + assert(Operands.size() == 1 && "Brackets should be by themselves"); + if (!InBrackets) { + getParser().Error(IDLoc, "Not in a packet"); + return true; + } + InBrackets = false; + if (matchBundleOptions()) + return true; + return finishBundle(IDLoc, Out); + } + MCInst *SubInst = new (getParser().getContext()) MCInst; + bool MustExtend = false; + if (matchOneInstruction(*SubInst, IDLoc, Operands, ErrorInfo, + MatchingInlineAsm, MustExtend)) + return true; + HexagonMCInstrInfo::extendIfNeeded( + getParser().getContext(), MCII, MCB, *SubInst, + HexagonMCInstrInfo::isExtended(MCII, *SubInst) || MustExtend); + MCB.addOperand(MCOperand::createInst(SubInst)); + if (!InBrackets) + return finishBundle(IDLoc, Out); + return false; +} + +/// ParseDirective parses the Hexagon specific directives +bool HexagonAsmParser::ParseDirective(AsmToken DirectiveID) { + StringRef IDVal = DirectiveID.getIdentifier(); + if ((IDVal.lower() == ".word") || (IDVal.lower() == ".4byte")) + return ParseDirectiveValue(4, DirectiveID.getLoc()); + if (IDVal.lower() == ".short" || IDVal.lower() == ".hword" || + IDVal.lower() == ".half") + return ParseDirectiveValue(2, DirectiveID.getLoc()); + if (IDVal.lower() == ".falign") + return ParseDirectiveFalign(256, DirectiveID.getLoc()); + if ((IDVal.lower() == ".lcomm") || (IDVal.lower() == ".lcommon")) + return ParseDirectiveComm(true, DirectiveID.getLoc()); + if ((IDVal.lower() == ".comm") || (IDVal.lower() == ".common")) + return ParseDirectiveComm(false, DirectiveID.getLoc()); + if (IDVal.lower() == ".subsection") + return ParseDirectiveSubsection(DirectiveID.getLoc()); + + return true; +} +bool HexagonAsmParser::ParseDirectiveSubsection(SMLoc L) { + const MCExpr *Subsection = 0; + int64_t Res; + + assert((getLexer().isNot(AsmToken::EndOfStatement)) && + "Invalid subsection directive"); + getParser().parseExpression(Subsection); + + if (!Subsection->evaluateAsAbsolute(Res)) + return Error(L, "Cannot evaluate subsection number"); + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in directive"); + + // 0-8192 is the hard-coded range in MCObjectStreamper.cpp, this keeps the + // negative subsections together and in the same order but at the opposite + // end of the section. Only legacy hexagon-gcc created assembly code + // used negative subsections. + if ((Res < 0) && (Res > -8193)) + Subsection = MCConstantExpr::create(8192 + Res, this->getContext()); + + getStreamer().SubSection(Subsection); + return false; +} + +/// ::= .falign [expression] +bool HexagonAsmParser::ParseDirectiveFalign(unsigned Size, SMLoc L) { + + int64_t MaxBytesToFill = 15; + + // if there is an arguement + if (getLexer().isNot(AsmToken::EndOfStatement)) { + const MCExpr *Value; + SMLoc ExprLoc = L; + + // Make sure we have a number (false is returned if expression is a number) + if (getParser().parseExpression(Value) == false) { + // Make sure this is a number that is in range + const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value); + uint64_t IntValue = MCE->getValue(); + if (!isUIntN(Size, IntValue) && !isIntN(Size, IntValue)) + return Error(ExprLoc, "literal value out of range (256) for falign"); + MaxBytesToFill = IntValue; + Lex(); + } else { + return Error(ExprLoc, "not a valid expression for falign directive"); + } + } + + getTargetStreamer().emitFAlign(16, MaxBytesToFill); + Lex(); + + return false; +} + +/// ::= .word [ expression (, expression)* ] +bool HexagonAsmParser::ParseDirectiveValue(unsigned Size, SMLoc L) { + if (getLexer().isNot(AsmToken::EndOfStatement)) { + + for (;;) { + const MCExpr *Value; + SMLoc ExprLoc = L; + if (getParser().parseExpression(Value)) + return true; + + // Special case constant expressions to match code generator. + if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) { + assert(Size <= 8 && "Invalid size"); + uint64_t IntValue = MCE->getValue(); + if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue)) + return Error(ExprLoc, "literal value out of range for directive"); + getStreamer().EmitIntValue(IntValue, Size); + } else + getStreamer().EmitValue(Value, Size); + + if (getLexer().is(AsmToken::EndOfStatement)) + break; + + // FIXME: Improve diagnostic. + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + } + } + + Lex(); + return false; +} + +// This is largely a copy of AsmParser's ParseDirectiveComm extended to +// accept a 3rd argument, AccessAlignment which indicates the smallest +// memory access made to the symbol, expressed in bytes. If no +// AccessAlignment is specified it defaults to the Alignment Value. +// Hexagon's .lcomm: +// .lcomm Symbol, Length, Alignment, AccessAlignment +bool HexagonAsmParser::ParseDirectiveComm(bool IsLocal, SMLoc Loc) { + // FIXME: need better way to detect if AsmStreamer (upstream removed + // getKind()) + if (getStreamer().hasRawTextSupport()) + return true; // Only object file output requires special treatment. + + StringRef Name; + if (getParser().parseIdentifier(Name)) + return TokError("expected identifier in directive"); + // Handle the identifier as the key symbol. + MCSymbol *Sym = getContext().getOrCreateSymbol(Name); + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + + int64_t Size; + SMLoc SizeLoc = getLexer().getLoc(); + if (getParser().parseAbsoluteExpression(Size)) + return true; + + int64_t ByteAlignment = 1; + SMLoc ByteAlignmentLoc; + if (getLexer().is(AsmToken::Comma)) { + Lex(); + ByteAlignmentLoc = getLexer().getLoc(); + if (getParser().parseAbsoluteExpression(ByteAlignment)) + return true; + if (!isPowerOf2_64(ByteAlignment)) + return Error(ByteAlignmentLoc, "alignment must be a power of 2"); + } + + int64_t AccessAlignment = 0; + if (getLexer().is(AsmToken::Comma)) { + // The optional access argument specifies the size of the smallest memory + // access to be made to the symbol, expressed in bytes. + SMLoc AccessAlignmentLoc; + Lex(); + AccessAlignmentLoc = getLexer().getLoc(); + if (getParser().parseAbsoluteExpression(AccessAlignment)) + return true; + + if (!isPowerOf2_64(AccessAlignment)) + return Error(AccessAlignmentLoc, "access alignment must be a power of 2"); + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.comm' or '.lcomm' directive"); + + Lex(); + + // NOTE: a size of zero for a .comm should create a undefined symbol + // but a size of .lcomm creates a bss symbol of size zero. + if (Size < 0) + return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't " + "be less than zero"); + + // NOTE: The alignment in the directive is a power of 2 value, the assembler + // may internally end up wanting an alignment in bytes. + // FIXME: Diagnose overflow. + if (ByteAlignment < 0) + return Error(ByteAlignmentLoc, "invalid '.comm' or '.lcomm' directive " + "alignment, can't be less than zero"); + + if (!Sym->isUndefined()) + return Error(Loc, "invalid symbol redefinition"); + + HexagonMCELFStreamer &HexagonELFStreamer = + static_cast<HexagonMCELFStreamer &>(getStreamer()); + if (IsLocal) { + HexagonELFStreamer.HexagonMCEmitLocalCommonSymbol(Sym, Size, ByteAlignment, + AccessAlignment); + return false; + } + + HexagonELFStreamer.HexagonMCEmitCommonSymbol(Sym, Size, ByteAlignment, + AccessAlignment); + return false; +} + +// validate register against architecture +bool HexagonAsmParser::RegisterMatchesArch(unsigned MatchNum) const { + return true; +} + +// extern "C" void LLVMInitializeHexagonAsmLexer(); + +/// Force static initialization. +extern "C" void LLVMInitializeHexagonAsmParser() { + RegisterMCAsmParser<HexagonAsmParser> X(TheHexagonTarget); +} + +#define GET_MATCHER_IMPLEMENTATION +#define GET_REGISTER_MATCHER +#include "HexagonGenAsmMatcher.inc" + +namespace { +bool previousEqual(OperandVector &Operands, size_t Index, StringRef String) { + if (Index >= Operands.size()) + return false; + MCParsedAsmOperand &Operand = *Operands[Operands.size() - Index - 1]; + if (!Operand.isToken()) + return false; + return static_cast<HexagonOperand &>(Operand).getToken().equals_lower(String); +} +bool previousIsLoop(OperandVector &Operands, size_t Index) { + return previousEqual(Operands, Index, "loop0") || + previousEqual(Operands, Index, "loop1") || + previousEqual(Operands, Index, "sp1loop0") || + previousEqual(Operands, Index, "sp2loop0") || + previousEqual(Operands, Index, "sp3loop0"); +} +} + +bool HexagonAsmParser::splitIdentifier(OperandVector &Operands) { + AsmToken const &Token = getParser().getTok(); + StringRef String = Token.getString(); + SMLoc Loc = Token.getLoc(); + getLexer().Lex(); + do { + std::pair<StringRef, StringRef> HeadTail = String.split('.'); + if (!HeadTail.first.empty()) + Operands.push_back(HexagonOperand::CreateToken(HeadTail.first, Loc)); + if (!HeadTail.second.empty()) + Operands.push_back(HexagonOperand::CreateToken( + String.substr(HeadTail.first.size(), 1), Loc)); + String = HeadTail.second; + } while (!String.empty()); + return false; +} + +bool HexagonAsmParser::parseOperand(OperandVector &Operands) { + unsigned Register; + SMLoc Begin; + SMLoc End; + MCAsmLexer &Lexer = getLexer(); + if (!ParseRegister(Register, Begin, End)) { + if (!ErrorMissingParenthesis) + switch (Register) { + default: + break; + case Hexagon::P0: + case Hexagon::P1: + case Hexagon::P2: + case Hexagon::P3: + if (previousEqual(Operands, 0, "if")) { + if (WarnMissingParenthesis) + Warning (Begin, "Missing parenthesis around predicate register"); + static char const *LParen = "("; + static char const *RParen = ")"; + Operands.push_back(HexagonOperand::CreateToken(LParen, Begin)); + Operands.push_back(HexagonOperand::CreateReg(Register, Begin, End)); + AsmToken MaybeDotNew = Lexer.getTok(); + if (MaybeDotNew.is(AsmToken::TokenKind::Identifier) && + MaybeDotNew.getString().equals_lower(".new")) + splitIdentifier(Operands); + Operands.push_back(HexagonOperand::CreateToken(RParen, Begin)); + return false; + } + if (previousEqual(Operands, 0, "!") && + previousEqual(Operands, 1, "if")) { + if (WarnMissingParenthesis) + Warning (Begin, "Missing parenthesis around predicate register"); + static char const *LParen = "("; + static char const *RParen = ")"; + Operands.insert(Operands.end () - 1, + HexagonOperand::CreateToken(LParen, Begin)); + Operands.push_back(HexagonOperand::CreateReg(Register, Begin, End)); + AsmToken MaybeDotNew = Lexer.getTok(); + if (MaybeDotNew.is(AsmToken::TokenKind::Identifier) && + MaybeDotNew.getString().equals_lower(".new")) + splitIdentifier(Operands); + Operands.push_back(HexagonOperand::CreateToken(RParen, Begin)); + return false; + } + break; + } + Operands.push_back(HexagonOperand::CreateReg( + Register, Begin, End)); + return false; + } + return splitIdentifier(Operands); +} + +bool HexagonAsmParser::isLabel(AsmToken &Token) { + MCAsmLexer &Lexer = getLexer(); + AsmToken const &Second = Lexer.getTok(); + AsmToken Third = Lexer.peekTok(); + StringRef String = Token.getString(); + if (Token.is(AsmToken::TokenKind::LCurly) || + Token.is(AsmToken::TokenKind::RCurly)) + return false; + if (!Token.is(AsmToken::TokenKind::Identifier)) + return true; + if (!MatchRegisterName(String.lower())) + return true; + (void)Second; + assert(Second.is(AsmToken::Colon)); + StringRef Raw (String.data(), Third.getString().data() - String.data() + + Third.getString().size()); + std::string Collapsed = Raw; + Collapsed.erase(std::remove_if(Collapsed.begin(), Collapsed.end(), isspace), + Collapsed.end()); + StringRef Whole = Collapsed; + std::pair<StringRef, StringRef> DotSplit = Whole.split('.'); + if (!MatchRegisterName(DotSplit.first.lower())) + return true; + return false; +} + +bool HexagonAsmParser::handleNoncontigiousRegister(bool Contigious, SMLoc &Loc) { + if (!Contigious && ErrorNoncontigiousRegister) { + Error(Loc, "Register name is not contigious"); + return true; + } + if (!Contigious && WarnNoncontigiousRegister) + Warning(Loc, "Register name is not contigious"); + return false; +} + +bool HexagonAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { + MCAsmLexer &Lexer = getLexer(); + StartLoc = getLexer().getLoc(); + SmallVector<AsmToken, 5> Lookahead; + StringRef RawString(Lexer.getTok().getString().data(), 0); + bool Again = Lexer.is(AsmToken::Identifier); + bool NeededWorkaround = false; + while (Again) { + AsmToken const &Token = Lexer.getTok(); + RawString = StringRef(RawString.data(), + Token.getString().data() - RawString.data () + + Token.getString().size()); + Lookahead.push_back(Token); + Lexer.Lex(); + bool Contigious = Lexer.getTok().getString().data() == + Lookahead.back().getString().data() + + Lookahead.back().getString().size(); + bool Type = Lexer.is(AsmToken::Identifier) || Lexer.is(AsmToken::Dot) || + Lexer.is(AsmToken::Integer) || Lexer.is(AsmToken::Real) || + Lexer.is(AsmToken::Colon); + bool Workaround = Lexer.is(AsmToken::Colon) || + Lookahead.back().is(AsmToken::Colon); + Again = (Contigious && Type) || (Workaround && Type); + NeededWorkaround = NeededWorkaround || (Again && !(Contigious && Type)); + } + std::string Collapsed = RawString; + Collapsed.erase(std::remove_if(Collapsed.begin(), Collapsed.end(), isspace), + Collapsed.end()); + StringRef FullString = Collapsed; + std::pair<StringRef, StringRef> DotSplit = FullString.split('.'); + unsigned DotReg = MatchRegisterName(DotSplit.first.lower()); + if (DotReg != Hexagon::NoRegister && RegisterMatchesArch(DotReg)) { + if (DotSplit.second.empty()) { + RegNo = DotReg; + EndLoc = Lexer.getLoc(); + if (handleNoncontigiousRegister(!NeededWorkaround, StartLoc)) + return true; + return false; + } else { + RegNo = DotReg; + size_t First = RawString.find('.'); + StringRef DotString (RawString.data() + First, RawString.size() - First); + Lexer.UnLex(AsmToken(AsmToken::Identifier, DotString)); + EndLoc = Lexer.getLoc(); + if (handleNoncontigiousRegister(!NeededWorkaround, StartLoc)) + return true; + return false; + } + } + std::pair<StringRef, StringRef> ColonSplit = StringRef(FullString).split(':'); + unsigned ColonReg = MatchRegisterName(ColonSplit.first.lower()); + if (ColonReg != Hexagon::NoRegister && RegisterMatchesArch(DotReg)) { + Lexer.UnLex(Lookahead.back()); + Lookahead.pop_back(); + Lexer.UnLex(Lookahead.back()); + Lookahead.pop_back(); + RegNo = ColonReg; + EndLoc = Lexer.getLoc(); + if (handleNoncontigiousRegister(!NeededWorkaround, StartLoc)) + return true; + return false; + } + while (!Lookahead.empty()) { + Lexer.UnLex(Lookahead.back()); + Lookahead.pop_back(); + } + return true; +} + +bool HexagonAsmParser::implicitExpressionLocation(OperandVector &Operands) { + if (previousEqual(Operands, 0, "call")) + return true; + if (previousEqual(Operands, 0, "jump")) + if (!getLexer().getTok().is(AsmToken::Colon)) + return true; + if (previousEqual(Operands, 0, "(") && previousIsLoop(Operands, 1)) + return true; + if (previousEqual(Operands, 1, ":") && previousEqual(Operands, 2, "jump") && + (previousEqual(Operands, 0, "nt") || previousEqual(Operands, 0, "t"))) + return true; + return false; +} + +bool HexagonAsmParser::parseExpression(MCExpr const *& Expr) { + llvm::SmallVector<AsmToken, 4> Tokens; + MCAsmLexer &Lexer = getLexer(); + bool Done = false; + static char const * Comma = ","; + do { + Tokens.emplace_back (Lexer.getTok()); + Lexer.Lex(); + switch (Tokens.back().getKind()) + { + case AsmToken::TokenKind::Hash: + if (Tokens.size () > 1) + if ((Tokens.end () - 2)->getKind() == AsmToken::TokenKind::Plus) { + Tokens.insert(Tokens.end() - 2, + AsmToken(AsmToken::TokenKind::Comma, Comma)); + Done = true; + } + break; + case AsmToken::TokenKind::RCurly: + case AsmToken::TokenKind::EndOfStatement: + case AsmToken::TokenKind::Eof: + Done = true; + break; + default: + break; + } + } while (!Done); + while (!Tokens.empty()) { + Lexer.UnLex(Tokens.back()); + Tokens.pop_back(); + } + return getParser().parseExpression(Expr); +} + +bool HexagonAsmParser::parseExpressionOrOperand(OperandVector &Operands) { + if (implicitExpressionLocation(Operands)) { + MCAsmParser &Parser = getParser(); + SMLoc Loc = Parser.getLexer().getLoc(); + std::unique_ptr<HexagonOperand> Expr = + HexagonOperand::CreateImm(nullptr, Loc, Loc); + MCExpr const *& Val = Expr->Imm.Val; + Operands.push_back(std::move(Expr)); + return parseExpression(Val); + } + return parseOperand(Operands); +} + +/// Parse an instruction. +bool HexagonAsmParser::parseInstruction(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); + MCAsmLexer &Lexer = getLexer(); + while (true) { + AsmToken const &Token = Parser.getTok(); + switch (Token.getKind()) { + case AsmToken::EndOfStatement: { + Lexer.Lex(); + return false; + } + case AsmToken::LCurly: { + if (!Operands.empty()) + return true; + Operands.push_back( + HexagonOperand::CreateToken(Token.getString(), Token.getLoc())); + Lexer.Lex(); + return false; + } + case AsmToken::RCurly: { + if (Operands.empty()) { + Operands.push_back( + HexagonOperand::CreateToken(Token.getString(), Token.getLoc())); + Lexer.Lex(); + } + return false; + } + case AsmToken::Comma: { + Lexer.Lex(); + continue; + } + case AsmToken::EqualEqual: + case AsmToken::ExclaimEqual: + case AsmToken::GreaterEqual: + case AsmToken::GreaterGreater: + case AsmToken::LessEqual: + case AsmToken::LessLess: { + Operands.push_back(HexagonOperand::CreateToken( + Token.getString().substr(0, 1), Token.getLoc())); + Operands.push_back(HexagonOperand::CreateToken( + Token.getString().substr(1, 1), Token.getLoc())); + Lexer.Lex(); + continue; + } + case AsmToken::Hash: { + bool MustNotExtend = false; + bool ImplicitExpression = implicitExpressionLocation(Operands); + std::unique_ptr<HexagonOperand> Expr = HexagonOperand::CreateImm( + nullptr, Lexer.getLoc(), Lexer.getLoc()); + if (!ImplicitExpression) + Operands.push_back( + HexagonOperand::CreateToken(Token.getString(), Token.getLoc())); + Lexer.Lex(); + bool MustExtend = false; + bool HiOnly = false; + bool LoOnly = false; + if (Lexer.is(AsmToken::Hash)) { + Lexer.Lex(); + MustExtend = true; + } else if (ImplicitExpression) + MustNotExtend = true; + AsmToken const &Token = Parser.getTok(); + if (Token.is(AsmToken::Identifier)) { + StringRef String = Token.getString(); + AsmToken IDToken = Token; + if (String.lower() == "hi") { + HiOnly = true; + } else if (String.lower() == "lo") { + LoOnly = true; + } + if (HiOnly || LoOnly) { + AsmToken LParen = Lexer.peekTok(); + if (!LParen.is(AsmToken::LParen)) { + HiOnly = false; + LoOnly = false; + } else { + Lexer.Lex(); + } + } + } + if (parseExpression(Expr->Imm.Val)) + return true; + int64_t Value; + MCContext &Context = Parser.getContext(); + assert(Expr->Imm.Val != nullptr); + if (Expr->Imm.Val->evaluateAsAbsolute(Value)) { + if (HiOnly) + Expr->Imm.Val = MCBinaryExpr::createLShr( + Expr->Imm.Val, MCConstantExpr::create(16, Context), Context); + if (HiOnly || LoOnly) + Expr->Imm.Val = MCBinaryExpr::createAnd( + Expr->Imm.Val, MCConstantExpr::create(0xffff, Context), Context); + } + if (MustNotExtend) + Expr->Imm.Val = HexagonNoExtendOperand::Create(Expr->Imm.Val, Context); + Expr->Imm.MustExtend = MustExtend; + Operands.push_back(std::move(Expr)); + continue; + } + default: + break; + } + if (parseExpressionOrOperand(Operands)) + return true; + } +} + +bool HexagonAsmParser::ParseInstruction(ParseInstructionInfo &Info, + StringRef Name, + AsmToken ID, + OperandVector &Operands) { + getLexer().UnLex(ID); + return parseInstruction(Operands); +} + +namespace { +MCInst makeCombineInst(int opCode, MCOperand &Rdd, + MCOperand &MO1, MCOperand &MO2) { + MCInst TmpInst; + TmpInst.setOpcode(opCode); + TmpInst.addOperand(Rdd); + TmpInst.addOperand(MO1); + TmpInst.addOperand(MO2); + + return TmpInst; +} +} + +// Define this matcher function after the auto-generated include so we +// have the match class enum definitions. +unsigned HexagonAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, + unsigned Kind) { + HexagonOperand *Op = static_cast<HexagonOperand *>(&AsmOp); + + switch (Kind) { + case MCK_0: { + int64_t Value; + return Op->isImm() && Op->Imm.Val->evaluateAsAbsolute(Value) && Value == 0 + ? Match_Success + : Match_InvalidOperand; + } + case MCK_1: { + int64_t Value; + return Op->isImm() && Op->Imm.Val->evaluateAsAbsolute(Value) && Value == 1 + ? Match_Success + : Match_InvalidOperand; + } + case MCK__MINUS_1: { + int64_t Value; + return Op->isImm() && Op->Imm.Val->evaluateAsAbsolute(Value) && Value == -1 + ? Match_Success + : Match_InvalidOperand; + } + } + if (Op->Kind == HexagonOperand::Token && Kind != InvalidMatchClass) { + StringRef myStringRef = StringRef(Op->Tok.Data, Op->Tok.Length); + if (matchTokenString(myStringRef.lower()) == (MatchClassKind)Kind) + return Match_Success; + if (matchTokenString(myStringRef.upper()) == (MatchClassKind)Kind) + return Match_Success; + } + + DEBUG(dbgs() << "Unmatched Operand:"); + DEBUG(Op->dump()); + DEBUG(dbgs() << "\n"); + + return Match_InvalidOperand; +} + +void HexagonAsmParser::OutOfRange(SMLoc IDLoc, long long Val, long long Max) { + std::string errStr; + raw_string_ostream ES(errStr); + ES << "value " << Val << "(" << format_hex(Val, 0) << ") out of range: "; + if (Max >= 0) + ES << "0-" << Max; + else + ES << Max << "-" << (-Max - 1); + Error(IDLoc, ES.str().c_str()); +} + +int HexagonAsmParser::processInstruction(MCInst &Inst, + OperandVector const &Operands, + SMLoc IDLoc, bool &MustExtend) { + MCContext &Context = getParser().getContext(); + const MCRegisterInfo *RI = getContext().getRegisterInfo(); + std::string r = "r"; + std::string v = "v"; + std::string Colon = ":"; + + bool is32bit = false; // used to distinguish between CONST32 and CONST64 + switch (Inst.getOpcode()) { + default: + break; + + case Hexagon::M4_mpyrr_addr: + case Hexagon::S4_addi_asl_ri: + case Hexagon::S4_addi_lsr_ri: + case Hexagon::S4_andi_asl_ri: + case Hexagon::S4_andi_lsr_ri: + case Hexagon::S4_ori_asl_ri: + case Hexagon::S4_ori_lsr_ri: + case Hexagon::S4_or_andix: + case Hexagon::S4_subi_asl_ri: + case Hexagon::S4_subi_lsr_ri: { + MCOperand &Ry = Inst.getOperand(0); + MCOperand &src = Inst.getOperand(2); + if (RI->getEncodingValue(Ry.getReg()) != RI->getEncodingValue(src.getReg())) + return Match_InvalidOperand; + break; + } + + case Hexagon::C2_cmpgei: { + MCOperand &MO = Inst.getOperand(2); + MO.setExpr(MCBinaryExpr::createSub( + MO.getExpr(), MCConstantExpr::create(1, Context), Context)); + Inst.setOpcode(Hexagon::C2_cmpgti); + break; + } + + case Hexagon::C2_cmpgeui: { + MCOperand &MO = Inst.getOperand(2); + int64_t Value; + bool Success = MO.getExpr()->evaluateAsAbsolute(Value); + (void)Success; + assert(Success && "Assured by matcher"); + if (Value == 0) { + MCInst TmpInst; + MCOperand &Pd = Inst.getOperand(0); + MCOperand &Rt = Inst.getOperand(1); + TmpInst.setOpcode(Hexagon::C2_cmpeq); + TmpInst.addOperand(Pd); + TmpInst.addOperand(Rt); + TmpInst.addOperand(Rt); + Inst = TmpInst; + } else { + MO.setExpr(MCBinaryExpr::createSub( + MO.getExpr(), MCConstantExpr::create(1, Context), Context)); + Inst.setOpcode(Hexagon::C2_cmpgtui); + } + break; + } + case Hexagon::J2_loop1r: + case Hexagon::J2_loop1i: + case Hexagon::J2_loop0r: + case Hexagon::J2_loop0i: { + MCOperand &MO = Inst.getOperand(0); + // Loop has different opcodes for extended vs not extended, but we should + // not use the other opcode as it is a legacy artifact of TD files. + int64_t Value; + if (MO.getExpr()->evaluateAsAbsolute(Value)) { + // if the operand can fit within a 7:2 field + if (Value < (1 << 8) && Value >= -(1 << 8)) { + SMLoc myLoc = Operands[2]->getStartLoc(); + // # is left in startLoc in the case of ## + // If '##' found then force extension. + if (*myLoc.getPointer() == '#') { + MustExtend = true; + break; + } + } else { + // If immediate and out of 7:2 range. + MustExtend = true; + } + } + break; + } + + // Translate a "$Rdd = $Rss" to "$Rdd = combine($Rs, $Rt)" + case Hexagon::A2_tfrp: { + MCOperand &MO = Inst.getOperand(1); + unsigned int RegPairNum = RI->getEncodingValue(MO.getReg()); + std::string R1 = r + llvm::utostr_32(RegPairNum + 1); + StringRef Reg1(R1); + MO.setReg(MatchRegisterName(Reg1)); + // Add a new operand for the second register in the pair. + std::string R2 = r + llvm::utostr_32(RegPairNum); + StringRef Reg2(R2); + Inst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2))); + Inst.setOpcode(Hexagon::A2_combinew); + break; + } + + case Hexagon::A2_tfrpt: + case Hexagon::A2_tfrpf: { + MCOperand &MO = Inst.getOperand(2); + unsigned int RegPairNum = RI->getEncodingValue(MO.getReg()); + std::string R1 = r + llvm::utostr_32(RegPairNum + 1); + StringRef Reg1(R1); + MO.setReg(MatchRegisterName(Reg1)); + // Add a new operand for the second register in the pair. + std::string R2 = r + llvm::utostr_32(RegPairNum); + StringRef Reg2(R2); + Inst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2))); + Inst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrpt) + ? Hexagon::C2_ccombinewt + : Hexagon::C2_ccombinewf); + break; + } + case Hexagon::A2_tfrptnew: + case Hexagon::A2_tfrpfnew: { + MCOperand &MO = Inst.getOperand(2); + unsigned int RegPairNum = RI->getEncodingValue(MO.getReg()); + std::string R1 = r + llvm::utostr_32(RegPairNum + 1); + StringRef Reg1(R1); + MO.setReg(MatchRegisterName(Reg1)); + // Add a new operand for the second register in the pair. + std::string R2 = r + llvm::utostr_32(RegPairNum); + StringRef Reg2(R2); + Inst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2))); + Inst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrptnew) + ? Hexagon::C2_ccombinewnewt + : Hexagon::C2_ccombinewnewf); + break; + } + + // Translate a "$Rx = CONST32(#imm)" to "$Rx = memw(gp+#LABEL) " + case Hexagon::CONST32: + case Hexagon::CONST32_Float_Real: + case Hexagon::CONST32_Int_Real: + case Hexagon::FCONST32_nsdata: + is32bit = true; + // Translate a "$Rx:y = CONST64(#imm)" to "$Rx:y = memd(gp+#LABEL) " + case Hexagon::CONST64_Float_Real: + case Hexagon::CONST64_Int_Real: + + // FIXME: need better way to detect AsmStreamer (upstream removed getKind()) + if (!Parser.getStreamer().hasRawTextSupport()) { + MCELFStreamer *MES = static_cast<MCELFStreamer *>(&Parser.getStreamer()); + MCOperand &MO_1 = Inst.getOperand(1); + MCOperand &MO_0 = Inst.getOperand(0); + + // push section onto section stack + MES->PushSection(); + + std::string myCharStr; + MCSectionELF *mySection; + + // check if this as an immediate or a symbol + int64_t Value; + bool Absolute = MO_1.getExpr()->evaluateAsAbsolute(Value); + if (Absolute) { + // Create a new section - one for each constant + // Some or all of the zeros are replaced with the given immediate. + if (is32bit) { + std::string myImmStr = utohexstr(static_cast<uint32_t>(Value)); + myCharStr = StringRef(".gnu.linkonce.l4.CONST_00000000") + .drop_back(myImmStr.size()) + .str() + + myImmStr; + } else { + std::string myImmStr = utohexstr(Value); + myCharStr = StringRef(".gnu.linkonce.l8.CONST_0000000000000000") + .drop_back(myImmStr.size()) + .str() + + myImmStr; + } + + mySection = getContext().getELFSection(myCharStr, ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_WRITE); + } else if (MO_1.isExpr()) { + // .lita - for expressions + myCharStr = ".lita"; + mySection = getContext().getELFSection(myCharStr, ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_WRITE); + } else + llvm_unreachable("unexpected type of machine operand!"); + + MES->SwitchSection(mySection); + unsigned byteSize = is32bit ? 4 : 8; + getStreamer().EmitCodeAlignment(byteSize, byteSize); + + MCSymbol *Sym; + + // for symbols, get rid of prepended ".gnu.linkonce.lx." + + // emit symbol if needed + if (Absolute) { + Sym = getContext().getOrCreateSymbol(StringRef(myCharStr.c_str() + 16)); + if (Sym->isUndefined()) { + getStreamer().EmitLabel(Sym); + getStreamer().EmitSymbolAttribute(Sym, MCSA_Global); + getStreamer().EmitIntValue(Value, byteSize); + } + } else if (MO_1.isExpr()) { + const char *StringStart = 0; + const char *StringEnd = 0; + if (*Operands[4]->getStartLoc().getPointer() == '#') { + StringStart = Operands[5]->getStartLoc().getPointer(); + StringEnd = Operands[6]->getStartLoc().getPointer(); + } else { // no pound + StringStart = Operands[4]->getStartLoc().getPointer(); + StringEnd = Operands[5]->getStartLoc().getPointer(); + } + + unsigned size = StringEnd - StringStart; + std::string DotConst = ".CONST_"; + Sym = getContext().getOrCreateSymbol(DotConst + + StringRef(StringStart, size)); + + if (Sym->isUndefined()) { + // case where symbol is not yet defined: emit symbol + getStreamer().EmitLabel(Sym); + getStreamer().EmitSymbolAttribute(Sym, MCSA_Local); + getStreamer().EmitValue(MO_1.getExpr(), 4); + } + } else + llvm_unreachable("unexpected type of machine operand!"); + + MES->PopSection(); + + if (Sym) { + MCInst TmpInst; + if (is32bit) // 32 bit + TmpInst.setOpcode(Hexagon::L2_loadrigp); + else // 64 bit + TmpInst.setOpcode(Hexagon::L2_loadrdgp); + + TmpInst.addOperand(MO_0); + TmpInst.addOperand( + MCOperand::createExpr(MCSymbolRefExpr::create(Sym, getContext()))); + Inst = TmpInst; + } + } + break; + + // Translate a "$Rdd = #-imm" to "$Rdd = combine(#[-1,0], #-imm)" + case Hexagon::A2_tfrpi: { + MCOperand &Rdd = Inst.getOperand(0); + MCOperand &MO = Inst.getOperand(1); + int64_t Value; + int sVal = (MO.getExpr()->evaluateAsAbsolute(Value) && Value < 0) ? -1 : 0; + MCOperand imm(MCOperand::createExpr(MCConstantExpr::create(sVal, Context))); + Inst = makeCombineInst(Hexagon::A2_combineii, Rdd, imm, MO); + break; + } + + // Translate a "$Rdd = [#]#imm" to "$Rdd = combine(#, [#]#imm)" + case Hexagon::TFRI64_V4: { + MCOperand &Rdd = Inst.getOperand(0); + MCOperand &MO = Inst.getOperand(1); + int64_t Value; + if (MO.getExpr()->evaluateAsAbsolute(Value)) { + unsigned long long u64 = Value; + signed int s8 = (u64 >> 32) & 0xFFFFFFFF; + if (s8 < -128 || s8 > 127) + OutOfRange(IDLoc, s8, -128); + MCOperand imm(MCOperand::createExpr( + MCConstantExpr::create(s8, Context))); // upper 32 + MCOperand imm2(MCOperand::createExpr( + MCConstantExpr::create(u64 & 0xFFFFFFFF, Context))); // lower 32 + Inst = makeCombineInst(Hexagon::A4_combineii, Rdd, imm, imm2); + } else { + MCOperand imm(MCOperand::createExpr( + MCConstantExpr::create(0, Context))); // upper 32 + Inst = makeCombineInst(Hexagon::A4_combineii, Rdd, imm, MO); + } + break; + } + + // Handle $Rdd = combine(##imm, #imm)" + case Hexagon::TFRI64_V2_ext: { + MCOperand &Rdd = Inst.getOperand(0); + MCOperand &MO1 = Inst.getOperand(1); + MCOperand &MO2 = Inst.getOperand(2); + int64_t Value; + if (MO2.getExpr()->evaluateAsAbsolute(Value)) { + int s8 = Value; + if (s8 < -128 || s8 > 127) + OutOfRange(IDLoc, s8, -128); + } + Inst = makeCombineInst(Hexagon::A2_combineii, Rdd, MO1, MO2); + break; + } + + // Handle $Rdd = combine(#imm, ##imm)" + case Hexagon::A4_combineii: { + MCOperand &Rdd = Inst.getOperand(0); + MCOperand &MO1 = Inst.getOperand(1); + int64_t Value; + if (MO1.getExpr()->evaluateAsAbsolute(Value)) { + int s8 = Value; + if (s8 < -128 || s8 > 127) + OutOfRange(IDLoc, s8, -128); + } + MCOperand &MO2 = Inst.getOperand(2); + Inst = makeCombineInst(Hexagon::A4_combineii, Rdd, MO1, MO2); + break; + } + + case Hexagon::S2_tableidxb_goodsyntax: { + Inst.setOpcode(Hexagon::S2_tableidxb); + break; + } + + case Hexagon::S2_tableidxh_goodsyntax: { + MCInst TmpInst; + MCOperand &Rx = Inst.getOperand(0); + MCOperand &_dst_ = Inst.getOperand(1); + MCOperand &Rs = Inst.getOperand(2); + MCOperand &Imm4 = Inst.getOperand(3); + MCOperand &Imm6 = Inst.getOperand(4); + Imm6.setExpr(MCBinaryExpr::createSub( + Imm6.getExpr(), MCConstantExpr::create(1, Context), Context)); + TmpInst.setOpcode(Hexagon::S2_tableidxh); + TmpInst.addOperand(Rx); + TmpInst.addOperand(_dst_); + TmpInst.addOperand(Rs); + TmpInst.addOperand(Imm4); + TmpInst.addOperand(Imm6); + Inst = TmpInst; + break; + } + + case Hexagon::S2_tableidxw_goodsyntax: { + MCInst TmpInst; + MCOperand &Rx = Inst.getOperand(0); + MCOperand &_dst_ = Inst.getOperand(1); + MCOperand &Rs = Inst.getOperand(2); + MCOperand &Imm4 = Inst.getOperand(3); + MCOperand &Imm6 = Inst.getOperand(4); + Imm6.setExpr(MCBinaryExpr::createSub( + Imm6.getExpr(), MCConstantExpr::create(2, Context), Context)); + TmpInst.setOpcode(Hexagon::S2_tableidxw); + TmpInst.addOperand(Rx); + TmpInst.addOperand(_dst_); + TmpInst.addOperand(Rs); + TmpInst.addOperand(Imm4); + TmpInst.addOperand(Imm6); + Inst = TmpInst; + break; + } + + case Hexagon::S2_tableidxd_goodsyntax: { + MCInst TmpInst; + MCOperand &Rx = Inst.getOperand(0); + MCOperand &_dst_ = Inst.getOperand(1); + MCOperand &Rs = Inst.getOperand(2); + MCOperand &Imm4 = Inst.getOperand(3); + MCOperand &Imm6 = Inst.getOperand(4); + Imm6.setExpr(MCBinaryExpr::createSub( + Imm6.getExpr(), MCConstantExpr::create(3, Context), Context)); + TmpInst.setOpcode(Hexagon::S2_tableidxd); + TmpInst.addOperand(Rx); + TmpInst.addOperand(_dst_); + TmpInst.addOperand(Rs); + TmpInst.addOperand(Imm4); + TmpInst.addOperand(Imm6); + Inst = TmpInst; + break; + } + + case Hexagon::M2_mpyui: { + Inst.setOpcode(Hexagon::M2_mpyi); + break; + } + case Hexagon::M2_mpysmi: { + MCInst TmpInst; + MCOperand &Rd = Inst.getOperand(0); + MCOperand &Rs = Inst.getOperand(1); + MCOperand &Imm = Inst.getOperand(2); + int64_t Value; + bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value); + assert(Absolute); + (void)Absolute; + if (!MustExtend) { + if (Value < 0 && Value > -256) { + Imm.setExpr(MCConstantExpr::create(Value * -1, Context)); + TmpInst.setOpcode(Hexagon::M2_mpysin); + } else if (Value < 256 && Value >= 0) + TmpInst.setOpcode(Hexagon::M2_mpysip); + else + return Match_InvalidOperand; + } else { + if (Value >= 0) + TmpInst.setOpcode(Hexagon::M2_mpysip); + else + return Match_InvalidOperand; + } + TmpInst.addOperand(Rd); + TmpInst.addOperand(Rs); + TmpInst.addOperand(Imm); + Inst = TmpInst; + break; + } + + case Hexagon::S2_asr_i_r_rnd_goodsyntax: { + MCOperand &Imm = Inst.getOperand(2); + MCInst TmpInst; + int64_t Value; + bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value); + assert(Absolute); + (void)Absolute; + if (Value == 0) { // convert to $Rd = $Rs + TmpInst.setOpcode(Hexagon::A2_tfr); + MCOperand &Rd = Inst.getOperand(0); + MCOperand &Rs = Inst.getOperand(1); + TmpInst.addOperand(Rd); + TmpInst.addOperand(Rs); + } else { + Imm.setExpr(MCBinaryExpr::createSub( + Imm.getExpr(), MCConstantExpr::create(1, Context), Context)); + TmpInst.setOpcode(Hexagon::S2_asr_i_r_rnd); + MCOperand &Rd = Inst.getOperand(0); + MCOperand &Rs = Inst.getOperand(1); + TmpInst.addOperand(Rd); + TmpInst.addOperand(Rs); + TmpInst.addOperand(Imm); + } + Inst = TmpInst; + break; + } + + case Hexagon::S2_asr_i_p_rnd_goodsyntax: { + MCOperand &Rdd = Inst.getOperand(0); + MCOperand &Rss = Inst.getOperand(1); + MCOperand &Imm = Inst.getOperand(2); + int64_t Value; + bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value); + assert(Absolute); + (void)Absolute; + if (Value == 0) { // convert to $Rdd = combine ($Rs[0], $Rs[1]) + MCInst TmpInst; + unsigned int RegPairNum = RI->getEncodingValue(Rss.getReg()); + std::string R1 = r + llvm::utostr_32(RegPairNum + 1); + StringRef Reg1(R1); + Rss.setReg(MatchRegisterName(Reg1)); + // Add a new operand for the second register in the pair. + std::string R2 = r + llvm::utostr_32(RegPairNum); + StringRef Reg2(R2); + TmpInst.setOpcode(Hexagon::A2_combinew); + TmpInst.addOperand(Rdd); + TmpInst.addOperand(Rss); + TmpInst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2))); + Inst = TmpInst; + } else { + Imm.setExpr(MCBinaryExpr::createSub( + Imm.getExpr(), MCConstantExpr::create(1, Context), Context)); + Inst.setOpcode(Hexagon::S2_asr_i_p_rnd); + } + break; + } + + case Hexagon::A4_boundscheck: { + MCOperand &Rs = Inst.getOperand(1); + unsigned int RegNum = RI->getEncodingValue(Rs.getReg()); + if (RegNum & 1) { // Odd mapped to raw:hi, regpair is rodd:odd-1, like r3:2 + Inst.setOpcode(Hexagon::A4_boundscheck_hi); + std::string Name = + r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1); + StringRef RegPair = Name; + Rs.setReg(MatchRegisterName(RegPair)); + } else { // raw:lo + Inst.setOpcode(Hexagon::A4_boundscheck_lo); + std::string Name = + r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum); + StringRef RegPair = Name; + Rs.setReg(MatchRegisterName(RegPair)); + } + break; + } + + case Hexagon::A2_addsp: { + MCOperand &Rs = Inst.getOperand(1); + unsigned int RegNum = RI->getEncodingValue(Rs.getReg()); + if (RegNum & 1) { // Odd mapped to raw:hi + Inst.setOpcode(Hexagon::A2_addsph); + std::string Name = + r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1); + StringRef RegPair = Name; + Rs.setReg(MatchRegisterName(RegPair)); + } else { // Even mapped raw:lo + Inst.setOpcode(Hexagon::A2_addspl); + std::string Name = + r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum); + StringRef RegPair = Name; + Rs.setReg(MatchRegisterName(RegPair)); + } + break; + } + + case Hexagon::M2_vrcmpys_s1: { + MCOperand &Rt = Inst.getOperand(2); + unsigned int RegNum = RI->getEncodingValue(Rt.getReg()); + if (RegNum & 1) { // Odd mapped to sat:raw:hi + Inst.setOpcode(Hexagon::M2_vrcmpys_s1_h); + std::string Name = + r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1); + StringRef RegPair = Name; + Rt.setReg(MatchRegisterName(RegPair)); + } else { // Even mapped sat:raw:lo + Inst.setOpcode(Hexagon::M2_vrcmpys_s1_l); + std::string Name = + r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum); + StringRef RegPair = Name; + Rt.setReg(MatchRegisterName(RegPair)); + } + break; + } + + case Hexagon::M2_vrcmpys_acc_s1: { + MCInst TmpInst; + MCOperand &Rxx = Inst.getOperand(0); + MCOperand &Rss = Inst.getOperand(2); + MCOperand &Rt = Inst.getOperand(3); + unsigned int RegNum = RI->getEncodingValue(Rt.getReg()); + if (RegNum & 1) { // Odd mapped to sat:raw:hi + TmpInst.setOpcode(Hexagon::M2_vrcmpys_acc_s1_h); + std::string Name = + r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1); + StringRef RegPair = Name; + Rt.setReg(MatchRegisterName(RegPair)); + } else { // Even mapped sat:raw:lo + TmpInst.setOpcode(Hexagon::M2_vrcmpys_acc_s1_l); + std::string Name = + r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum); + StringRef RegPair = Name; + Rt.setReg(MatchRegisterName(RegPair)); + } + // Registers are in different positions + TmpInst.addOperand(Rxx); + TmpInst.addOperand(Rxx); + TmpInst.addOperand(Rss); + TmpInst.addOperand(Rt); + Inst = TmpInst; + break; + } + + case Hexagon::M2_vrcmpys_s1rp: { + MCOperand &Rt = Inst.getOperand(2); + unsigned int RegNum = RI->getEncodingValue(Rt.getReg()); + if (RegNum & 1) { // Odd mapped to rnd:sat:raw:hi + Inst.setOpcode(Hexagon::M2_vrcmpys_s1rp_h); + std::string Name = + r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1); + StringRef RegPair = Name; + Rt.setReg(MatchRegisterName(RegPair)); + } else { // Even mapped rnd:sat:raw:lo + Inst.setOpcode(Hexagon::M2_vrcmpys_s1rp_l); + std::string Name = + r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum); + StringRef RegPair = Name; + Rt.setReg(MatchRegisterName(RegPair)); + } + break; + } + + case Hexagon::S5_asrhub_rnd_sat_goodsyntax: { + MCOperand &Imm = Inst.getOperand(2); + int64_t Value; + bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value); + assert(Absolute); + (void)Absolute; + if (Value == 0) + Inst.setOpcode(Hexagon::S2_vsathub); + else { + Imm.setExpr(MCBinaryExpr::createSub( + Imm.getExpr(), MCConstantExpr::create(1, Context), Context)); + Inst.setOpcode(Hexagon::S5_asrhub_rnd_sat); + } + break; + } + + case Hexagon::S5_vasrhrnd_goodsyntax: { + MCOperand &Rdd = Inst.getOperand(0); + MCOperand &Rss = Inst.getOperand(1); + MCOperand &Imm = Inst.getOperand(2); + int64_t Value; + bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value); + assert(Absolute); + (void)Absolute; + if (Value == 0) { + MCInst TmpInst; + unsigned int RegPairNum = RI->getEncodingValue(Rss.getReg()); + std::string R1 = r + llvm::utostr_32(RegPairNum + 1); + StringRef Reg1(R1); + Rss.setReg(MatchRegisterName(Reg1)); + // Add a new operand for the second register in the pair. + std::string R2 = r + llvm::utostr_32(RegPairNum); + StringRef Reg2(R2); + TmpInst.setOpcode(Hexagon::A2_combinew); + TmpInst.addOperand(Rdd); + TmpInst.addOperand(Rss); + TmpInst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2))); + Inst = TmpInst; + } else { + Imm.setExpr(MCBinaryExpr::createSub( + Imm.getExpr(), MCConstantExpr::create(1, Context), Context)); + Inst.setOpcode(Hexagon::S5_vasrhrnd); + } + break; + } + + case Hexagon::A2_not: { + MCInst TmpInst; + MCOperand &Rd = Inst.getOperand(0); + MCOperand &Rs = Inst.getOperand(1); + TmpInst.setOpcode(Hexagon::A2_subri); + TmpInst.addOperand(Rd); + TmpInst.addOperand( + MCOperand::createExpr(MCConstantExpr::create(-1, Context))); + TmpInst.addOperand(Rs); + Inst = TmpInst; + break; + } + } // switch + + return Match_Success; +} diff --git a/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp b/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp index cb7e633fb82f..ea96eb0ee10a 100644 --- a/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp +++ b/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp @@ -868,7 +868,7 @@ void BT::visitNonBranch(const MachineInstr *MI) { continue; bool Changed = false; - if (!Eval || !ResMap.has(RD.Reg)) { + if (!Eval || ResMap.count(RD.Reg) == 0) { // Set to "ref" (aka "bottom"). uint16_t DefBW = ME.getRegBitWidth(RD); RegisterCell RefC = RegisterCell::self(RD.Reg, DefBW); @@ -951,11 +951,11 @@ void BT::visitBranchesFrom(const MachineInstr *BI) { // be processed. for (succ_iterator I = B.succ_begin(), E = B.succ_end(); I != E; ++I) { const MachineBasicBlock *SB = *I; - if (SB->isLandingPad()) + if (SB->isEHPad()) Targets.insert(SB); } if (FallsThrough) { - MachineFunction::const_iterator BIt = &B; + MachineFunction::const_iterator BIt = B.getIterator(); MachineFunction::const_iterator Next = std::next(BIt); if (Next != MF.end()) Targets.insert(&*Next); @@ -1005,7 +1005,7 @@ void BT::put(RegisterRef RR, const RegisterCell &RC) { // Replace all references to bits from OldRR with the corresponding bits // in NewRR. void BT::subst(RegisterRef OldRR, RegisterRef NewRR) { - assert(Map.has(OldRR.Reg) && "OldRR not present in map"); + assert(Map.count(OldRR.Reg) > 0 && "OldRR not present in map"); BitMask OM = ME.mask(OldRR.Reg, OldRR.Sub); BitMask NM = ME.mask(NewRR.Reg, NewRR.Sub); uint16_t OMB = OM.first(), OME = OM.last(); @@ -1104,9 +1104,9 @@ void BT::run() { } // If block end has been reached, add the fall-through edge to the queue. if (It == End) { - MachineFunction::const_iterator BIt = &B; + MachineFunction::const_iterator BIt = B.getIterator(); MachineFunction::const_iterator Next = std::next(BIt); - if (Next != MF.end()) { + if (Next != MF.end() && B.isSuccessor(&*Next)) { int ThisN = B.getNumber(); int NextN = Next->getNumber(); FlowQ.push(CFGEdge(ThisN, NextN)); diff --git a/contrib/llvm/lib/Target/Hexagon/BitTracker.h b/contrib/llvm/lib/Target/Hexagon/BitTracker.h index ed002a794d66..959c8318fd60 100644 --- a/contrib/llvm/lib/Target/Hexagon/BitTracker.h +++ b/contrib/llvm/lib/Target/Hexagon/BitTracker.h @@ -36,9 +36,7 @@ struct BitTracker { typedef SetVector<const MachineBasicBlock *> BranchTargetList; - struct CellMapType : public std::map<unsigned,RegisterCell> { - bool has(unsigned Reg) const; - }; + typedef std::map<unsigned, RegisterCell> CellMapType; BitTracker(const MachineEvaluator &E, MachineFunction &F); ~BitTracker(); @@ -79,7 +77,6 @@ private: // Abstraction of a reference to bit at position Pos from a register Reg. struct BitTracker::BitRef { BitRef(unsigned R = 0, uint16_t P = 0) : Reg(R), Pos(P) {} - BitRef(const BitRef &BR) : Reg(BR.Reg), Pos(BR.Pos) {} bool operator== (const BitRef &BR) const { // If Reg is 0, disregard Pos. return Reg == BR.Reg && (Reg == 0 || Pos == BR.Pos); @@ -146,7 +143,6 @@ struct BitTracker::BitValue { BitValue(ValueType T = Top) : Type(T) {} BitValue(bool B) : Type(B ? One : Zero) {} - BitValue(const BitValue &V) : Type(V.Type), RefI(V.RefI) {} BitValue(unsigned Reg, uint16_t Pos) : Type(Ref), RefI(Reg, Pos) {} bool operator== (const BitValue &V) const { @@ -279,11 +275,6 @@ struct BitTracker::RegisterCell { return !operator==(RC); } - const RegisterCell &operator=(const RegisterCell &RC) { - Bits = RC.Bits; - return *this; - } - // Generate a "ref" cell for the corresponding register. In the resulting // cell each bit will be described as being the same as the corresponding // bit in register Reg (i.e. the cell is "defined" by register Reg). @@ -344,11 +335,6 @@ BitTracker::RegisterCell::ref(const RegisterCell &C) { return RC; } - -inline bool BitTracker::CellMapType::has(unsigned Reg) const { - return find(Reg) != end(); -} - // A class to evaluate target's instructions and update the cell maps. // This is used internally by the bit tracker. A target that wants to // utilize this should implement the evaluation functions (noted below) diff --git a/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp index 9cc1e944d359..4a9c3413cb29 100644 --- a/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp +++ b/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp @@ -7,42 +7,45 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "hexagon-disassembler" + #include "Hexagon.h" #include "MCTargetDesc/HexagonBaseInfo.h" -#include "MCTargetDesc/HexagonMCInstrInfo.h" +#include "MCTargetDesc/HexagonMCChecker.h" #include "MCTargetDesc/HexagonMCTargetDesc.h" - -#include "llvm/MC/MCContext.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" +#include "MCTargetDesc/HexagonInstPrinter.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LEB128.h" -#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/MemoryObject.h" #include "llvm/Support/raw_ostream.h" -#include <array> +#include "llvm/Support/TargetRegistry.h" #include <vector> using namespace llvm; using namespace Hexagon; -#define DEBUG_TYPE "hexagon-disassembler" - -// Pull DecodeStatus and its enum values into the global namespace. -typedef llvm::MCDisassembler::DecodeStatus DecodeStatus; +typedef MCDisassembler::DecodeStatus DecodeStatus; namespace { /// \brief Hexagon disassembler for all Hexagon platforms. class HexagonDisassembler : public MCDisassembler { public: + std::unique_ptr<MCInstrInfo const> const MCII; std::unique_ptr<MCInst *> CurrentBundle; - HexagonDisassembler(MCSubtargetInfo const &STI, MCContext &Ctx) - : MCDisassembler(STI, Ctx), CurrentBundle(new MCInst *) {} + HexagonDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, + MCInstrInfo const *MCII) + : MCDisassembler(STI, Ctx), MCII(MCII), CurrentBundle(new MCInst *) {} DecodeStatus getSingleInstruction(MCInst &Instr, MCInst &MCB, ArrayRef<uint8_t> Bytes, uint64_t Address, @@ -52,23 +55,57 @@ public: ArrayRef<uint8_t> Bytes, uint64_t Address, raw_ostream &VStream, raw_ostream &CStream) const override; + + void adjustExtendedInstructions(MCInst &MCI, MCInst const &MCB) const; + void addSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) const; }; } -static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo, +// Forward declare these because the auto-generated code will reference them. +// Definitions are further down. + +static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeIntRegsLow8RegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeVectorRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeVecDblRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeVecPredRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - void const *Decoder); + const void *Decoder); + +static DecodeStatus decodeSpecial(MCInst &MI, uint32_t insn); +static DecodeStatus decodeImmext(MCInst &MI, uint32_t insn, + void const *Decoder); static unsigned GetSubinstOpcode(unsigned IClass, unsigned inst, unsigned &op, raw_ostream &os); -static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst); +static unsigned getRegFromSubinstEncoding(unsigned encoded_reg); + +static DecodeStatus unsignedImmDecoder(MCInst &MI, unsigned tmp, + uint64_t Address, const void *Decoder); static DecodeStatus s16ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, const void *Decoder); static DecodeStatus s12ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, @@ -95,129 +132,19 @@ static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, const void *Decoder); static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, const void *Decoder); - -static const uint16_t IntRegDecoderTable[] = { - Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4, - Hexagon::R5, Hexagon::R6, Hexagon::R7, Hexagon::R8, Hexagon::R9, - Hexagon::R10, Hexagon::R11, Hexagon::R12, Hexagon::R13, Hexagon::R14, - Hexagon::R15, Hexagon::R16, Hexagon::R17, Hexagon::R18, Hexagon::R19, - Hexagon::R20, Hexagon::R21, Hexagon::R22, Hexagon::R23, Hexagon::R24, - Hexagon::R25, Hexagon::R26, Hexagon::R27, Hexagon::R28, Hexagon::R29, - Hexagon::R30, Hexagon::R31}; - -static const uint16_t PredRegDecoderTable[] = {Hexagon::P0, Hexagon::P1, - Hexagon::P2, Hexagon::P3}; - -static DecodeStatus DecodeRegisterClass(MCInst &Inst, unsigned RegNo, - const uint16_t Table[], size_t Size) { - if (RegNo < Size) { - Inst.addOperand(MCOperand::createReg(Table[RegNo])); - return MCDisassembler::Success; - } else - return MCDisassembler::Fail; -} - -static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t /*Address*/, - void const *Decoder) { - if (RegNo > 31) - return MCDisassembler::Fail; - - unsigned Register = IntRegDecoderTable[RegNo]; - Inst.addOperand(MCOperand::createReg(Register)); - return MCDisassembler::Success; -} - -static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t /*Address*/, - const void *Decoder) { - static const uint16_t CtrlRegDecoderTable[] = { - Hexagon::SA0, Hexagon::LC0, Hexagon::SA1, Hexagon::LC1, - Hexagon::P3_0, Hexagon::NoRegister, Hexagon::C6, Hexagon::C7, - Hexagon::USR, Hexagon::PC, Hexagon::UGP, Hexagon::GP, - Hexagon::CS0, Hexagon::CS1, Hexagon::UPCL, Hexagon::UPCH}; - - if (RegNo >= sizeof(CtrlRegDecoderTable) / sizeof(CtrlRegDecoderTable[0])) - return MCDisassembler::Fail; - - if (CtrlRegDecoderTable[RegNo] == Hexagon::NoRegister) - return MCDisassembler::Fail; - - unsigned Register = CtrlRegDecoderTable[RegNo]; - Inst.addOperand(MCOperand::createReg(Register)); - return MCDisassembler::Success; -} - -static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t /*Address*/, - void const *Decoder) { - static const uint16_t CtrlReg64DecoderTable[] = { - Hexagon::C1_0, Hexagon::NoRegister, Hexagon::C3_2, - Hexagon::NoRegister, Hexagon::NoRegister, Hexagon::NoRegister, - Hexagon::C7_6, Hexagon::NoRegister, Hexagon::C9_8, - Hexagon::NoRegister, Hexagon::C11_10, Hexagon::NoRegister, - Hexagon::CS, Hexagon::NoRegister, Hexagon::UPC, - Hexagon::NoRegister}; - - if (RegNo >= sizeof(CtrlReg64DecoderTable) / sizeof(CtrlReg64DecoderTable[0])) - return MCDisassembler::Fail; - - if (CtrlReg64DecoderTable[RegNo] == Hexagon::NoRegister) - return MCDisassembler::Fail; - - unsigned Register = CtrlReg64DecoderTable[RegNo]; - Inst.addOperand(MCOperand::createReg(Register)); - return MCDisassembler::Success; -} - -static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t /*Address*/, - const void *Decoder) { - unsigned Register = 0; - switch (RegNo) { - case 0: - Register = Hexagon::M0; - break; - case 1: - Register = Hexagon::M1; - break; - default: - return MCDisassembler::Fail; - } - Inst.addOperand(MCOperand::createReg(Register)); - return MCDisassembler::Success; -} - -static DecodeStatus DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t /*Address*/, - const void *Decoder) { - static const uint16_t DoubleRegDecoderTable[] = { - Hexagon::D0, Hexagon::D1, Hexagon::D2, Hexagon::D3, - Hexagon::D4, Hexagon::D5, Hexagon::D6, Hexagon::D7, - Hexagon::D8, Hexagon::D9, Hexagon::D10, Hexagon::D11, - Hexagon::D12, Hexagon::D13, Hexagon::D14, Hexagon::D15}; - - return (DecodeRegisterClass(Inst, RegNo >> 1, DoubleRegDecoderTable, - sizeof(DoubleRegDecoderTable))); -} - -static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t /*Address*/, - void const *Decoder) { - if (RegNo > 3) - return MCDisassembler::Fail; - - unsigned Register = PredRegDecoderTable[RegNo]; - Inst.addOperand(MCOperand::createReg(Register)); - return MCDisassembler::Success; -} +static DecodeStatus s4_6ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, + const void *Decoder); +static DecodeStatus s3_6ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, + const void *Decoder); +static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address, + const void *Decoder); #include "HexagonGenDisassemblerTables.inc" -static MCDisassembler *createHexagonDisassembler(Target const &T, - MCSubtargetInfo const &STI, +static MCDisassembler *createHexagonDisassembler(const Target &T, + const MCSubtargetInfo &STI, MCContext &Ctx) { - return new HexagonDisassembler(STI, Ctx); + return new HexagonDisassembler(STI, Ctx, T.createMCInstrInfo()); } extern "C" void LLVMInitializeHexagonDisassembler() { @@ -235,8 +162,7 @@ DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size, Size = 0; *CurrentBundle = &MI; - MI.setOpcode(Hexagon::BUNDLE); - MI.addOperand(MCOperand::createImm(0)); + MI = HexagonMCInstrInfo::createBundle(); while (Result == Success && Complete == false) { if (Bytes.size() < HEXAGON_INSTR_SIZE) return MCDisassembler::Fail; @@ -246,7 +172,21 @@ DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size, Size += HEXAGON_INSTR_SIZE; Bytes = Bytes.slice(HEXAGON_INSTR_SIZE); } - return Result; + if(Result == MCDisassembler::Fail) + return Result; + HexagonMCChecker Checker (*MCII, STI, MI, MI, *getContext().getRegisterInfo()); + if(!Checker.check()) + return MCDisassembler::Fail; + return MCDisassembler::Success; +} + +namespace { +HexagonDisassembler const &disassembler(void const *Decoder) { + return *static_cast<HexagonDisassembler const *>(Decoder); +} +MCContext &contextFromDecoder(void const *Decoder) { + return disassembler(Decoder).getContext(); +} } DecodeStatus HexagonDisassembler::getSingleInstruction( @@ -255,8 +195,7 @@ DecodeStatus HexagonDisassembler::getSingleInstruction( assert(Bytes.size() >= HEXAGON_INSTR_SIZE); uint32_t Instruction = - llvm::support::endian::read<uint32_t, llvm::support::little, - llvm::support::unaligned>(Bytes.data()); + (Bytes[3] << 24) | (Bytes[2] << 16) | (Bytes[1] << 8) | (Bytes[0] << 0); auto BundleSize = HexagonMCInstrInfo::bundleSize(MCB); if ((Instruction & HexagonII::INST_PARSE_MASK) == @@ -360,8 +299,8 @@ DecodeStatus HexagonDisassembler::getSingleInstruction( MILow->setOpcode(opLow); MCInst *MIHigh = new (getContext()) MCInst; MIHigh->setOpcode(opHigh); - AddSubinstOperands(MILow, opLow, instLow); - AddSubinstOperands(MIHigh, opHigh, instHigh); + addSubinstOperands(MILow, opLow, instLow); + addSubinstOperands(MIHigh, opHigh, instHigh); // see ConvertToSubInst() in // lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp @@ -378,102 +317,774 @@ DecodeStatus HexagonDisassembler::getSingleInstruction( // Calling the auto-generated decoder function. Result = decodeInstruction(DecoderTable32, MI, Instruction, Address, this, STI); + + // If a, "standard" insn isn't found check special cases. + if (MCDisassembler::Success != Result || + MI.getOpcode() == Hexagon::A4_ext) { + Result = decodeImmext(MI, Instruction, this); + if (MCDisassembler::Success != Result) { + Result = decodeSpecial(MI, Instruction); + } + } else { + // If the instruction is a compound instruction, register values will + // follow the duplex model, so the register values in the MCInst are + // incorrect. If the instruction is a compound, loop through the + // operands and change registers appropriately. + if (llvm::HexagonMCInstrInfo::getType(*MCII, MI) == + HexagonII::TypeCOMPOUND) { + for (MCInst::iterator i = MI.begin(), last = MI.end(); i < last; ++i) { + if (i->isReg()) { + unsigned reg = i->getReg() - Hexagon::R0; + i->setReg(getRegFromSubinstEncoding(reg)); + } + } + } + } + } + + if (HexagonMCInstrInfo::isNewValue(*MCII, MI)) { + unsigned OpIndex = HexagonMCInstrInfo::getNewValueOp(*MCII, MI); + MCOperand &MCO = MI.getOperand(OpIndex); + assert(MCO.isReg() && "New value consumers must be registers"); + unsigned Register = + getContext().getRegisterInfo()->getEncodingValue(MCO.getReg()); + if ((Register & 0x6) == 0) + // HexagonPRM 10.11 Bit 1-2 == 0 is reserved + return MCDisassembler::Fail; + unsigned Lookback = (Register & 0x6) >> 1; + unsigned Offset = 1; + bool Vector = HexagonMCInstrInfo::isVector(*MCII, MI); + auto Instructions = HexagonMCInstrInfo::bundleInstructions(**CurrentBundle); + auto i = Instructions.end() - 1; + for (auto n = Instructions.begin() - 1;; --i, ++Offset) { + if (i == n) + // Couldn't find producer + return MCDisassembler::Fail; + if (Vector && !HexagonMCInstrInfo::isVector(*MCII, *i->getInst())) + // Skip scalars when calculating distances for vectors + ++Lookback; + if (HexagonMCInstrInfo::isImmext(*i->getInst())) + ++Lookback; + if (Offset == Lookback) + break; + } + auto const &Inst = *i->getInst(); + bool SubregBit = (Register & 0x1) != 0; + if (SubregBit && HexagonMCInstrInfo::hasNewValue2(*MCII, Inst)) { + // If subreg bit is set we're selecting the second produced newvalue + unsigned Producer = + HexagonMCInstrInfo::getNewValueOperand2(*MCII, Inst).getReg(); + assert(Producer != Hexagon::NoRegister); + MCO.setReg(Producer); + } else if (HexagonMCInstrInfo::hasNewValue(*MCII, Inst)) { + unsigned Producer = + HexagonMCInstrInfo::getNewValueOperand(*MCII, Inst).getReg(); + if (Producer >= Hexagon::W0 && Producer <= Hexagon::W15) + Producer = ((Producer - Hexagon::W0) << 1) + SubregBit + Hexagon::V0; + else if (SubregBit) + // Subreg bit should not be set for non-doublevector newvalue producers + return MCDisassembler::Fail; + assert(Producer != Hexagon::NoRegister); + MCO.setReg(Producer); + } else + return MCDisassembler::Fail; } + adjustExtendedInstructions(MI, MCB); + MCInst const *Extender = + HexagonMCInstrInfo::extenderForIndex(MCB, + HexagonMCInstrInfo::bundleSize(MCB)); + if(Extender != nullptr) { + MCInst const & Inst = HexagonMCInstrInfo::isDuplex(*MCII, MI) ? + *MI.getOperand(1).getInst() : MI; + if (!HexagonMCInstrInfo::isExtendable(*MCII, Inst) && + !HexagonMCInstrInfo::isExtended(*MCII, Inst)) + return MCDisassembler::Fail; + } return Result; } +void HexagonDisassembler::adjustExtendedInstructions(MCInst &MCI, + MCInst const &MCB) const { + if (!HexagonMCInstrInfo::hasExtenderForIndex( + MCB, HexagonMCInstrInfo::bundleSize(MCB))) { + unsigned opcode; + // This code is used by the disassembler to disambiguate between GP + // relative and absolute addressing instructions since they both have + // same encoding bits. However, an absolute addressing instruction must + // follow an immediate extender. Disassembler alwaus select absolute + // addressing instructions first and uses this code to change them into + // GP relative instruction in the absence of the corresponding immediate + // extender. + switch (MCI.getOpcode()) { + case Hexagon::S2_storerbabs: + opcode = Hexagon::S2_storerbgp; + break; + case Hexagon::S2_storerhabs: + opcode = Hexagon::S2_storerhgp; + break; + case Hexagon::S2_storerfabs: + opcode = Hexagon::S2_storerfgp; + break; + case Hexagon::S2_storeriabs: + opcode = Hexagon::S2_storerigp; + break; + case Hexagon::S2_storerbnewabs: + opcode = Hexagon::S2_storerbnewgp; + break; + case Hexagon::S2_storerhnewabs: + opcode = Hexagon::S2_storerhnewgp; + break; + case Hexagon::S2_storerinewabs: + opcode = Hexagon::S2_storerinewgp; + break; + case Hexagon::S2_storerdabs: + opcode = Hexagon::S2_storerdgp; + break; + case Hexagon::L4_loadrb_abs: + opcode = Hexagon::L2_loadrbgp; + break; + case Hexagon::L4_loadrub_abs: + opcode = Hexagon::L2_loadrubgp; + break; + case Hexagon::L4_loadrh_abs: + opcode = Hexagon::L2_loadrhgp; + break; + case Hexagon::L4_loadruh_abs: + opcode = Hexagon::L2_loadruhgp; + break; + case Hexagon::L4_loadri_abs: + opcode = Hexagon::L2_loadrigp; + break; + case Hexagon::L4_loadrd_abs: + opcode = Hexagon::L2_loadrdgp; + break; + default: + opcode = MCI.getOpcode(); + } + MCI.setOpcode(opcode); + } +} + +namespace llvm { +extern const MCInstrDesc HexagonInsts[]; +} + +static DecodeStatus DecodeRegisterClass(MCInst &Inst, unsigned RegNo, + ArrayRef<MCPhysReg> Table) { + if (RegNo < Table.size()) { + Inst.addOperand(MCOperand::createReg(Table[RegNo])); + return MCDisassembler::Success; + } + + return MCDisassembler::Fail; +} + +static DecodeStatus DecodeIntRegsLow8RegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder) { + return DecodeIntRegsRegisterClass(Inst, RegNo, Address, Decoder); +} + +static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder) { + static const MCPhysReg IntRegDecoderTable[] = { + Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4, + Hexagon::R5, Hexagon::R6, Hexagon::R7, Hexagon::R8, Hexagon::R9, + Hexagon::R10, Hexagon::R11, Hexagon::R12, Hexagon::R13, Hexagon::R14, + Hexagon::R15, Hexagon::R16, Hexagon::R17, Hexagon::R18, Hexagon::R19, + Hexagon::R20, Hexagon::R21, Hexagon::R22, Hexagon::R23, Hexagon::R24, + Hexagon::R25, Hexagon::R26, Hexagon::R27, Hexagon::R28, Hexagon::R29, + Hexagon::R30, Hexagon::R31}; + + return DecodeRegisterClass(Inst, RegNo, IntRegDecoderTable); +} + +static DecodeStatus DecodeVectorRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t /*Address*/, + const void *Decoder) { + static const MCPhysReg VecRegDecoderTable[] = { + Hexagon::V0, Hexagon::V1, Hexagon::V2, Hexagon::V3, Hexagon::V4, + Hexagon::V5, Hexagon::V6, Hexagon::V7, Hexagon::V8, Hexagon::V9, + Hexagon::V10, Hexagon::V11, Hexagon::V12, Hexagon::V13, Hexagon::V14, + Hexagon::V15, Hexagon::V16, Hexagon::V17, Hexagon::V18, Hexagon::V19, + Hexagon::V20, Hexagon::V21, Hexagon::V22, Hexagon::V23, Hexagon::V24, + Hexagon::V25, Hexagon::V26, Hexagon::V27, Hexagon::V28, Hexagon::V29, + Hexagon::V30, Hexagon::V31}; + + return DecodeRegisterClass(Inst, RegNo, VecRegDecoderTable); +} + +static DecodeStatus DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t /*Address*/, + const void *Decoder) { + static const MCPhysReg DoubleRegDecoderTable[] = { + Hexagon::D0, Hexagon::D1, Hexagon::D2, Hexagon::D3, + Hexagon::D4, Hexagon::D5, Hexagon::D6, Hexagon::D7, + Hexagon::D8, Hexagon::D9, Hexagon::D10, Hexagon::D11, + Hexagon::D12, Hexagon::D13, Hexagon::D14, Hexagon::D15}; + + return DecodeRegisterClass(Inst, RegNo >> 1, DoubleRegDecoderTable); +} + +static DecodeStatus DecodeVecDblRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t /*Address*/, + const void *Decoder) { + static const MCPhysReg VecDblRegDecoderTable[] = { + Hexagon::W0, Hexagon::W1, Hexagon::W2, Hexagon::W3, + Hexagon::W4, Hexagon::W5, Hexagon::W6, Hexagon::W7, + Hexagon::W8, Hexagon::W9, Hexagon::W10, Hexagon::W11, + Hexagon::W12, Hexagon::W13, Hexagon::W14, Hexagon::W15}; + + return (DecodeRegisterClass(Inst, RegNo >> 1, VecDblRegDecoderTable)); +} + +static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t /*Address*/, + const void *Decoder) { + static const MCPhysReg PredRegDecoderTable[] = {Hexagon::P0, Hexagon::P1, + Hexagon::P2, Hexagon::P3}; + + return DecodeRegisterClass(Inst, RegNo, PredRegDecoderTable); +} + +static DecodeStatus DecodeVecPredRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t /*Address*/, + const void *Decoder) { + static const MCPhysReg VecPredRegDecoderTable[] = {Hexagon::Q0, Hexagon::Q1, + Hexagon::Q2, Hexagon::Q3}; + + return DecodeRegisterClass(Inst, RegNo, VecPredRegDecoderTable); +} + +static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t /*Address*/, + const void *Decoder) { + static const MCPhysReg CtrlRegDecoderTable[] = { + Hexagon::SA0, Hexagon::LC0, Hexagon::SA1, Hexagon::LC1, + Hexagon::P3_0, Hexagon::C5, Hexagon::C6, Hexagon::C7, + Hexagon::USR, Hexagon::PC, Hexagon::UGP, Hexagon::GP, + Hexagon::CS0, Hexagon::CS1, Hexagon::UPCL, Hexagon::UPC + }; + + if (RegNo >= array_lengthof(CtrlRegDecoderTable)) + return MCDisassembler::Fail; + + if (CtrlRegDecoderTable[RegNo] == Hexagon::NoRegister) + return MCDisassembler::Fail; + + unsigned Register = CtrlRegDecoderTable[RegNo]; + Inst.addOperand(MCOperand::createReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t /*Address*/, + const void *Decoder) { + static const MCPhysReg CtrlReg64DecoderTable[] = { + Hexagon::C1_0, Hexagon::NoRegister, + Hexagon::C3_2, Hexagon::NoRegister, + Hexagon::C7_6, Hexagon::NoRegister, + Hexagon::C9_8, Hexagon::NoRegister, + Hexagon::C11_10, Hexagon::NoRegister, + Hexagon::CS, Hexagon::NoRegister, + Hexagon::UPC, Hexagon::NoRegister + }; + + if (RegNo >= array_lengthof(CtrlReg64DecoderTable)) + return MCDisassembler::Fail; + + if (CtrlReg64DecoderTable[RegNo] == Hexagon::NoRegister) + return MCDisassembler::Fail; + + unsigned Register = CtrlReg64DecoderTable[RegNo]; + Inst.addOperand(MCOperand::createReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t /*Address*/, + const void *Decoder) { + unsigned Register = 0; + switch (RegNo) { + case 0: + Register = Hexagon::M0; + break; + case 1: + Register = Hexagon::M1; + break; + default: + return MCDisassembler::Fail; + } + Inst.addOperand(MCOperand::createReg(Register)); + return MCDisassembler::Success; +} + +namespace { +uint32_t fullValue(MCInstrInfo const &MCII, + MCInst &MCB, + MCInst &MI, + int64_t Value) { + MCInst const *Extender = HexagonMCInstrInfo::extenderForIndex( + MCB, HexagonMCInstrInfo::bundleSize(MCB)); + if(!Extender || MI.size() != HexagonMCInstrInfo::getExtendableOp(MCII, MI)) + return Value; + unsigned Alignment = HexagonMCInstrInfo::getExtentAlignment(MCII, MI); + uint32_t Lower6 = static_cast<uint32_t>(Value >> Alignment) & 0x3f; + int64_t Bits; + bool Success = Extender->getOperand(0).getExpr()->evaluateAsAbsolute(Bits); + assert(Success);(void)Success; + uint32_t Upper26 = static_cast<uint32_t>(Bits); + uint32_t Operand = Upper26 | Lower6; + return Operand; +} +template <size_t T> +void signedDecoder(MCInst &MI, unsigned tmp, const void *Decoder) { + HexagonDisassembler const &Disassembler = disassembler(Decoder); + int64_t FullValue = fullValue(*Disassembler.MCII, + **Disassembler.CurrentBundle, + MI, SignExtend64<T>(tmp)); + int64_t Extended = SignExtend64<32>(FullValue); + HexagonMCInstrInfo::addConstant(MI, Extended, + Disassembler.getContext()); +} +} + +static DecodeStatus unsignedImmDecoder(MCInst &MI, unsigned tmp, + uint64_t /*Address*/, + const void *Decoder) { + HexagonDisassembler const &Disassembler = disassembler(Decoder); + int64_t FullValue = fullValue(*Disassembler.MCII, + **Disassembler.CurrentBundle, + MI, tmp); + assert(FullValue >= 0 && "Negative in unsigned decoder"); + HexagonMCInstrInfo::addConstant(MI, FullValue, Disassembler.getContext()); + return MCDisassembler::Success; +} + static DecodeStatus s16ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/, const void *Decoder) { - uint64_t imm = SignExtend64<16>(tmp); - MI.addOperand(MCOperand::createImm(imm)); + signedDecoder<16>(MI, tmp, Decoder); return MCDisassembler::Success; } static DecodeStatus s12ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/, const void *Decoder) { - uint64_t imm = SignExtend64<12>(tmp); - MI.addOperand(MCOperand::createImm(imm)); + signedDecoder<12>(MI, tmp, Decoder); return MCDisassembler::Success; } static DecodeStatus s11_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/, const void *Decoder) { - uint64_t imm = SignExtend64<11>(tmp); - MI.addOperand(MCOperand::createImm(imm)); + signedDecoder<11>(MI, tmp, Decoder); return MCDisassembler::Success; } static DecodeStatus s11_1ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/, const void *Decoder) { - uint64_t imm = SignExtend64<12>(tmp); - MI.addOperand(MCOperand::createImm(imm)); + HexagonMCInstrInfo::addConstant(MI, SignExtend64<12>(tmp), contextFromDecoder(Decoder)); return MCDisassembler::Success; } static DecodeStatus s11_2ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/, const void *Decoder) { - uint64_t imm = SignExtend64<13>(tmp); - MI.addOperand(MCOperand::createImm(imm)); + signedDecoder<13>(MI, tmp, Decoder); return MCDisassembler::Success; } static DecodeStatus s11_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/, const void *Decoder) { - uint64_t imm = SignExtend64<14>(tmp); - MI.addOperand(MCOperand::createImm(imm)); + signedDecoder<14>(MI, tmp, Decoder); return MCDisassembler::Success; } static DecodeStatus s10ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/, const void *Decoder) { - uint64_t imm = SignExtend64<10>(tmp); - MI.addOperand(MCOperand::createImm(imm)); + signedDecoder<10>(MI, tmp, Decoder); return MCDisassembler::Success; } static DecodeStatus s8ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/, const void *Decoder) { - uint64_t imm = SignExtend64<8>(tmp); - MI.addOperand(MCOperand::createImm(imm)); + signedDecoder<8>(MI, tmp, Decoder); return MCDisassembler::Success; } static DecodeStatus s6_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/, const void *Decoder) { - uint64_t imm = SignExtend64<6>(tmp); - MI.addOperand(MCOperand::createImm(imm)); + signedDecoder<6>(MI, tmp, Decoder); return MCDisassembler::Success; } static DecodeStatus s4_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/, const void *Decoder) { - uint64_t imm = SignExtend64<4>(tmp); - MI.addOperand(MCOperand::createImm(imm)); + signedDecoder<4>(MI, tmp, Decoder); return MCDisassembler::Success; } static DecodeStatus s4_1ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/, const void *Decoder) { - uint64_t imm = SignExtend64<5>(tmp); - MI.addOperand(MCOperand::createImm(imm)); + signedDecoder<5>(MI, tmp, Decoder); return MCDisassembler::Success; } static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/, const void *Decoder) { - uint64_t imm = SignExtend64<6>(tmp); - MI.addOperand(MCOperand::createImm(imm)); + signedDecoder<6>(MI, tmp, Decoder); return MCDisassembler::Success; } static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/, const void *Decoder) { - uint64_t imm = SignExtend64<7>(tmp); - MI.addOperand(MCOperand::createImm(imm)); + signedDecoder<7>(MI, tmp, Decoder); return MCDisassembler::Success; } +static DecodeStatus s4_6ImmDecoder(MCInst &MI, unsigned tmp, + uint64_t /*Address*/, const void *Decoder) { + signedDecoder<10>(MI, tmp, Decoder); + return MCDisassembler::Success; +} + +static DecodeStatus s3_6ImmDecoder(MCInst &MI, unsigned tmp, + uint64_t /*Address*/, const void *Decoder) { + signedDecoder<19>(MI, tmp, Decoder); + return MCDisassembler::Success; +} + +// custom decoder for various jump/call immediates +static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address, + const void *Decoder) { + HexagonDisassembler const &Disassembler = disassembler(Decoder); + unsigned Bits = HexagonMCInstrInfo::getExtentBits(*Disassembler.MCII, MI); + // r13_2 is not extendable, so if there are no extent bits, it's r13_2 + if (Bits == 0) + Bits = 15; + uint32_t FullValue = fullValue(*Disassembler.MCII, + **Disassembler.CurrentBundle, + MI, SignExtend64(tmp, Bits)); + int64_t Extended = SignExtend64<32>(FullValue) + Address; + if (!Disassembler.tryAddingSymbolicOperand(MI, Extended, Address, true, + 0, 4)) + HexagonMCInstrInfo::addConstant(MI, Extended, Disassembler.getContext()); + return MCDisassembler::Success; +} + +// Addressing mode dependent load store opcode map. +// - If an insn is preceded by an extender the address is absolute. +// - memw(##symbol) = r0 +// - If an insn is not preceded by an extender the address is GP relative. +// - memw(gp + #symbol) = r0 +// Please note that the instructions must be ordered in the descending order +// of their opcode. +// HexagonII::INST_ICLASS_ST +static const unsigned int StoreConditionalOpcodeData[][2] = { + {S4_pstorerdfnew_abs, 0xafc02084}, + {S4_pstorerdtnew_abs, 0xafc02080}, + {S4_pstorerdf_abs, 0xafc00084}, + {S4_pstorerdt_abs, 0xafc00080}, + {S4_pstorerinewfnew_abs, 0xafa03084}, + {S4_pstorerinewtnew_abs, 0xafa03080}, + {S4_pstorerhnewfnew_abs, 0xafa02884}, + {S4_pstorerhnewtnew_abs, 0xafa02880}, + {S4_pstorerbnewfnew_abs, 0xafa02084}, + {S4_pstorerbnewtnew_abs, 0xafa02080}, + {S4_pstorerinewf_abs, 0xafa01084}, + {S4_pstorerinewt_abs, 0xafa01080}, + {S4_pstorerhnewf_abs, 0xafa00884}, + {S4_pstorerhnewt_abs, 0xafa00880}, + {S4_pstorerbnewf_abs, 0xafa00084}, + {S4_pstorerbnewt_abs, 0xafa00080}, + {S4_pstorerifnew_abs, 0xaf802084}, + {S4_pstoreritnew_abs, 0xaf802080}, + {S4_pstorerif_abs, 0xaf800084}, + {S4_pstorerit_abs, 0xaf800080}, + {S4_pstorerhfnew_abs, 0xaf402084}, + {S4_pstorerhtnew_abs, 0xaf402080}, + {S4_pstorerhf_abs, 0xaf400084}, + {S4_pstorerht_abs, 0xaf400080}, + {S4_pstorerbfnew_abs, 0xaf002084}, + {S4_pstorerbtnew_abs, 0xaf002080}, + {S4_pstorerbf_abs, 0xaf000084}, + {S4_pstorerbt_abs, 0xaf000080}}; +// HexagonII::INST_ICLASS_LD + +// HexagonII::INST_ICLASS_LD_ST_2 +static unsigned int LoadStoreOpcodeData[][2] = {{L4_loadrd_abs, 0x49c00000}, + {L4_loadri_abs, 0x49800000}, + {L4_loadruh_abs, 0x49600000}, + {L4_loadrh_abs, 0x49400000}, + {L4_loadrub_abs, 0x49200000}, + {L4_loadrb_abs, 0x49000000}, + {S2_storerdabs, 0x48c00000}, + {S2_storerinewabs, 0x48a01000}, + {S2_storerhnewabs, 0x48a00800}, + {S2_storerbnewabs, 0x48a00000}, + {S2_storeriabs, 0x48800000}, + {S2_storerfabs, 0x48600000}, + {S2_storerhabs, 0x48400000}, + {S2_storerbabs, 0x48000000}}; +static const size_t NumCondS = array_lengthof(StoreConditionalOpcodeData); +static const size_t NumLS = array_lengthof(LoadStoreOpcodeData); + +static DecodeStatus decodeSpecial(MCInst &MI, uint32_t insn) { + + unsigned MachineOpcode = 0; + unsigned LLVMOpcode = 0; + + if ((insn & HexagonII::INST_ICLASS_MASK) == HexagonII::INST_ICLASS_ST) { + for (size_t i = 0; i < NumCondS; ++i) { + if ((insn & StoreConditionalOpcodeData[i][1]) == + StoreConditionalOpcodeData[i][1]) { + MachineOpcode = StoreConditionalOpcodeData[i][1]; + LLVMOpcode = StoreConditionalOpcodeData[i][0]; + break; + } + } + } + if ((insn & HexagonII::INST_ICLASS_MASK) == HexagonII::INST_ICLASS_LD_ST_2) { + for (size_t i = 0; i < NumLS; ++i) { + if ((insn & LoadStoreOpcodeData[i][1]) == LoadStoreOpcodeData[i][1]) { + MachineOpcode = LoadStoreOpcodeData[i][1]; + LLVMOpcode = LoadStoreOpcodeData[i][0]; + break; + } + } + } + + if (MachineOpcode) { + unsigned Value = 0; + unsigned shift = 0; + MI.setOpcode(LLVMOpcode); + // Remove the parse bits from the insn. + insn &= ~HexagonII::INST_PARSE_MASK; + + switch (LLVMOpcode) { + default: + return MCDisassembler::Fail; + break; + + case Hexagon::S4_pstorerdf_abs: + case Hexagon::S4_pstorerdt_abs: + case Hexagon::S4_pstorerdfnew_abs: + case Hexagon::S4_pstorerdtnew_abs: { + // op: Pv + Value = insn & UINT64_C(3); + DecodePredRegsRegisterClass(MI, Value, 0, 0); + // op: u6 + Value = (insn >> 12) & UINT64_C(48); + Value |= (insn >> 3) & UINT64_C(15); + MI.addOperand(MCOperand::createImm(Value)); + // op: Rtt + Value = (insn >> 8) & UINT64_C(31); + DecodeDoubleRegsRegisterClass(MI, Value, 0, 0); + break; + } + + case Hexagon::S4_pstorerbnewf_abs: + case Hexagon::S4_pstorerbnewt_abs: + case Hexagon::S4_pstorerbnewfnew_abs: + case Hexagon::S4_pstorerbnewtnew_abs: + case Hexagon::S4_pstorerhnewf_abs: + case Hexagon::S4_pstorerhnewt_abs: + case Hexagon::S4_pstorerhnewfnew_abs: + case Hexagon::S4_pstorerhnewtnew_abs: + case Hexagon::S4_pstorerinewf_abs: + case Hexagon::S4_pstorerinewt_abs: + case Hexagon::S4_pstorerinewfnew_abs: + case Hexagon::S4_pstorerinewtnew_abs: { + // op: Pv + Value = insn & UINT64_C(3); + DecodePredRegsRegisterClass(MI, Value, 0, 0); + // op: u6 + Value = (insn >> 12) & UINT64_C(48); + Value |= (insn >> 3) & UINT64_C(15); + MI.addOperand(MCOperand::createImm(Value)); + // op: Nt + Value = (insn >> 8) & UINT64_C(7); + DecodeIntRegsRegisterClass(MI, Value, 0, 0); + break; + } + + case Hexagon::S4_pstorerbf_abs: + case Hexagon::S4_pstorerbt_abs: + case Hexagon::S4_pstorerbfnew_abs: + case Hexagon::S4_pstorerbtnew_abs: + case Hexagon::S4_pstorerhf_abs: + case Hexagon::S4_pstorerht_abs: + case Hexagon::S4_pstorerhfnew_abs: + case Hexagon::S4_pstorerhtnew_abs: + case Hexagon::S4_pstorerif_abs: + case Hexagon::S4_pstorerit_abs: + case Hexagon::S4_pstorerifnew_abs: + case Hexagon::S4_pstoreritnew_abs: { + // op: Pv + Value = insn & UINT64_C(3); + DecodePredRegsRegisterClass(MI, Value, 0, 0); + // op: u6 + Value = (insn >> 12) & UINT64_C(48); + Value |= (insn >> 3) & UINT64_C(15); + MI.addOperand(MCOperand::createImm(Value)); + // op: Rt + Value = (insn >> 8) & UINT64_C(31); + DecodeIntRegsRegisterClass(MI, Value, 0, 0); + break; + } + + case Hexagon::L4_ploadrdf_abs: + case Hexagon::L4_ploadrdt_abs: + case Hexagon::L4_ploadrdfnew_abs: + case Hexagon::L4_ploadrdtnew_abs: { + // op: Rdd + Value = insn & UINT64_C(31); + DecodeDoubleRegsRegisterClass(MI, Value, 0, 0); + // op: Pt + Value = ((insn >> 9) & UINT64_C(3)); + DecodePredRegsRegisterClass(MI, Value, 0, 0); + // op: u6 + Value = ((insn >> 15) & UINT64_C(62)); + Value |= ((insn >> 8) & UINT64_C(1)); + MI.addOperand(MCOperand::createImm(Value)); + break; + } + + case Hexagon::L4_ploadrbf_abs: + case Hexagon::L4_ploadrbt_abs: + case Hexagon::L4_ploadrbfnew_abs: + case Hexagon::L4_ploadrbtnew_abs: + case Hexagon::L4_ploadrhf_abs: + case Hexagon::L4_ploadrht_abs: + case Hexagon::L4_ploadrhfnew_abs: + case Hexagon::L4_ploadrhtnew_abs: + case Hexagon::L4_ploadrubf_abs: + case Hexagon::L4_ploadrubt_abs: + case Hexagon::L4_ploadrubfnew_abs: + case Hexagon::L4_ploadrubtnew_abs: + case Hexagon::L4_ploadruhf_abs: + case Hexagon::L4_ploadruht_abs: + case Hexagon::L4_ploadruhfnew_abs: + case Hexagon::L4_ploadruhtnew_abs: + case Hexagon::L4_ploadrif_abs: + case Hexagon::L4_ploadrit_abs: + case Hexagon::L4_ploadrifnew_abs: + case Hexagon::L4_ploadritnew_abs: + // op: Rd + Value = insn & UINT64_C(31); + DecodeIntRegsRegisterClass(MI, Value, 0, 0); + // op: Pt + Value = (insn >> 9) & UINT64_C(3); + DecodePredRegsRegisterClass(MI, Value, 0, 0); + // op: u6 + Value = (insn >> 15) & UINT64_C(62); + Value |= (insn >> 8) & UINT64_C(1); + MI.addOperand(MCOperand::createImm(Value)); + break; + + // op: g16_2 + case (Hexagon::L4_loadri_abs): + ++shift; + // op: g16_1 + case Hexagon::L4_loadrh_abs: + case Hexagon::L4_loadruh_abs: + ++shift; + // op: g16_0 + case Hexagon::L4_loadrb_abs: + case Hexagon::L4_loadrub_abs: { + // op: Rd + Value |= insn & UINT64_C(31); + DecodeIntRegsRegisterClass(MI, Value, 0, 0); + Value = (insn >> 11) & UINT64_C(49152); + Value |= (insn >> 7) & UINT64_C(15872); + Value |= (insn >> 5) & UINT64_C(511); + MI.addOperand(MCOperand::createImm(Value << shift)); + break; + } + + case Hexagon::L4_loadrd_abs: { + Value = insn & UINT64_C(31); + DecodeDoubleRegsRegisterClass(MI, Value, 0, 0); + Value = (insn >> 11) & UINT64_C(49152); + Value |= (insn >> 7) & UINT64_C(15872); + Value |= (insn >> 5) & UINT64_C(511); + MI.addOperand(MCOperand::createImm(Value << 3)); + break; + } + + case Hexagon::S2_storerdabs: { + // op: g16_3 + Value = (insn >> 11) & UINT64_C(49152); + Value |= (insn >> 7) & UINT64_C(15872); + Value |= (insn >> 5) & UINT64_C(256); + Value |= insn & UINT64_C(255); + MI.addOperand(MCOperand::createImm(Value << 3)); + // op: Rtt + Value = (insn >> 8) & UINT64_C(31); + DecodeDoubleRegsRegisterClass(MI, Value, 0, 0); + break; + } + + // op: g16_2 + case Hexagon::S2_storerinewabs: + ++shift; + // op: g16_1 + case Hexagon::S2_storerhnewabs: + ++shift; + // op: g16_0 + case Hexagon::S2_storerbnewabs: { + Value = (insn >> 11) & UINT64_C(49152); + Value |= (insn >> 7) & UINT64_C(15872); + Value |= (insn >> 5) & UINT64_C(256); + Value |= insn & UINT64_C(255); + MI.addOperand(MCOperand::createImm(Value << shift)); + // op: Nt + Value = (insn >> 8) & UINT64_C(7); + DecodeIntRegsRegisterClass(MI, Value, 0, 0); + break; + } + + // op: g16_2 + case Hexagon::S2_storeriabs: + ++shift; + // op: g16_1 + case Hexagon::S2_storerhabs: + case Hexagon::S2_storerfabs: + ++shift; + // op: g16_0 + case Hexagon::S2_storerbabs: { + Value = (insn >> 11) & UINT64_C(49152); + Value |= (insn >> 7) & UINT64_C(15872); + Value |= (insn >> 5) & UINT64_C(256); + Value |= insn & UINT64_C(255); + MI.addOperand(MCOperand::createImm(Value << shift)); + // op: Rt + Value = (insn >> 8) & UINT64_C(31); + DecodeIntRegsRegisterClass(MI, Value, 0, 0); + break; + } + } + return MCDisassembler::Success; + } + return MCDisassembler::Fail; +} + +static DecodeStatus decodeImmext(MCInst &MI, uint32_t insn, + void const *Decoder) { + + // Instruction Class for a constant a extender: bits 31:28 = 0x0000 + if ((~insn & 0xf0000000) == 0xf0000000) { + unsigned Value; + // 27:16 High 12 bits of 26-bit extender. + Value = (insn & 0x0fff0000) << 4; + // 13:0 Low 14 bits of 26-bit extender. + Value |= ((insn & 0x3fff) << 6); + MI.setOpcode(Hexagon::A4_ext); + HexagonMCInstrInfo::addConstant(MI, Value, contextFromDecoder(Decoder)); + return MCDisassembler::Success; + } + return MCDisassembler::Fail; +} + // These values are from HexagonGenMCCodeEmitter.inc and HexagonIsetDx.td enum subInstBinaryValues { V4_SA1_addi_BITS = 0x0000, @@ -731,6 +1342,8 @@ static unsigned getRegFromSubinstEncoding(unsigned encoded_reg) { return Hexagon::R0 + encoded_reg; else if (encoded_reg < 16) return Hexagon::R0 + encoded_reg + 8; + + // patently false value return Hexagon::NoRegister; } @@ -739,10 +1352,13 @@ static unsigned getDRegFromSubinstEncoding(unsigned encoded_dreg) { return Hexagon::D0 + encoded_dreg; else if (encoded_dreg < 8) return Hexagon::D0 + encoded_dreg + 4; + + // patently false value return Hexagon::NoRegister; } -static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) { +void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode, + unsigned inst) const { int64_t operand; MCOperand Op; switch (opcode) { @@ -762,8 +1378,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) { case Hexagon::V4_SS2_allocframe: // u 8-4{5_3} operand = ((inst & 0x1f0) >> 4) << 3; - Op = MCOperand::createImm(operand); - MI->addOperand(Op); + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; case Hexagon::V4_SL1_loadri_io: // Rd 3-0, Rs 7-4, u 11-8{4_2} @@ -774,8 +1389,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) { Op = MCOperand::createReg(operand); MI->addOperand(Op); operand = (inst & 0xf00) >> 6; - Op = MCOperand::createImm(operand); - MI->addOperand(Op); + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; case Hexagon::V4_SL1_loadrub_io: // Rd 3-0, Rs 7-4, u 11-8 @@ -786,8 +1400,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) { Op = MCOperand::createReg(operand); MI->addOperand(Op); operand = (inst & 0xf00) >> 8; - Op = MCOperand::createImm(operand); - MI->addOperand(Op); + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; case Hexagon::V4_SL2_loadrb_io: // Rd 3-0, Rs 7-4, u 10-8 @@ -798,8 +1411,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) { Op = MCOperand::createReg(operand); MI->addOperand(Op); operand = (inst & 0x700) >> 8; - Op = MCOperand::createImm(operand); - MI->addOperand(Op); + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; case Hexagon::V4_SL2_loadrh_io: case Hexagon::V4_SL2_loadruh_io: @@ -811,8 +1423,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) { Op = MCOperand::createReg(operand); MI->addOperand(Op); operand = ((inst & 0x700) >> 8) << 1; - Op = MCOperand::createImm(operand); - MI->addOperand(Op); + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; case Hexagon::V4_SL2_loadrd_sp: // Rdd 2-0, u 7-3{5_3} @@ -820,8 +1431,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) { Op = MCOperand::createReg(operand); MI->addOperand(Op); operand = ((inst & 0x0f8) >> 3) << 3; - Op = MCOperand::createImm(operand); - MI->addOperand(Op); + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; case Hexagon::V4_SL2_loadri_sp: // Rd 3-0, u 8-4{5_2} @@ -829,8 +1439,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) { Op = MCOperand::createReg(operand); MI->addOperand(Op); operand = ((inst & 0x1f0) >> 4) << 2; - Op = MCOperand::createImm(operand); - MI->addOperand(Op); + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; case Hexagon::V4_SA1_addi: // Rx 3-0 (x2), s7 10-4 @@ -839,8 +1448,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) { MI->addOperand(Op); MI->addOperand(Op); operand = SignExtend64<7>((inst & 0x7f0) >> 4); - Op = MCOperand::createImm(operand); - MI->addOperand(Op); + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; case Hexagon::V4_SA1_addrx: // Rx 3-0 (x2), Rs 7-4 @@ -873,8 +1481,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) { Op = MCOperand::createReg(operand); MI->addOperand(Op); operand = ((inst & 0x3f0) >> 4) << 2; - Op = MCOperand::createImm(operand); - MI->addOperand(Op); + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; case Hexagon::V4_SA1_seti: // Rd 3-0, u 9-4 @@ -882,8 +1489,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) { Op = MCOperand::createReg(operand); MI->addOperand(Op); operand = (inst & 0x3f0) >> 4; - Op = MCOperand::createImm(operand); - MI->addOperand(Op); + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; case Hexagon::V4_SA1_clrf: case Hexagon::V4_SA1_clrfnew: @@ -901,8 +1507,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) { Op = MCOperand::createReg(operand); MI->addOperand(Op); operand = inst & 0x3; - Op = MCOperand::createImm(operand); - MI->addOperand(Op); + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; case Hexagon::V4_SA1_combine0i: case Hexagon::V4_SA1_combine1i: @@ -913,8 +1518,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) { Op = MCOperand::createReg(operand); MI->addOperand(Op); operand = (inst & 0x060) >> 5; - Op = MCOperand::createImm(operand); - MI->addOperand(Op); + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; case Hexagon::V4_SA1_combinerz: case Hexagon::V4_SA1_combinezr: @@ -932,8 +1536,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) { Op = MCOperand::createReg(operand); MI->addOperand(Op); operand = (inst & 0xf00) >> 8; - Op = MCOperand::createImm(operand); - MI->addOperand(Op); + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); operand = getRegFromSubinstEncoding(inst & 0xf); Op = MCOperand::createReg(operand); MI->addOperand(Op); @@ -944,8 +1547,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) { Op = MCOperand::createReg(operand); MI->addOperand(Op); operand = ((inst & 0xf00) >> 8) << 2; - Op = MCOperand::createImm(operand); - MI->addOperand(Op); + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); operand = getRegFromSubinstEncoding(inst & 0xf); Op = MCOperand::createReg(operand); MI->addOperand(Op); @@ -957,8 +1559,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) { Op = MCOperand::createReg(operand); MI->addOperand(Op); operand = inst & 0xf; - Op = MCOperand::createImm(operand); - MI->addOperand(Op); + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; case Hexagon::V4_SS2_storewi0: case Hexagon::V4_SS2_storewi1: @@ -967,25 +1568,23 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) { Op = MCOperand::createReg(operand); MI->addOperand(Op); operand = (inst & 0xf) << 2; - Op = MCOperand::createImm(operand); - MI->addOperand(Op); + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; case Hexagon::V4_SS2_stored_sp: // s 8-3{6_3}, Rtt 2-0 operand = SignExtend64<9>(((inst & 0x1f8) >> 3) << 3); - Op = MCOperand::createImm(operand); - MI->addOperand(Op); + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); operand = getDRegFromSubinstEncoding(inst & 0x7); Op = MCOperand::createReg(operand); MI->addOperand(Op); + break; case Hexagon::V4_SS2_storeh_io: // Rs 7-4, u 10-8{3_1}, Rt 3-0 operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); Op = MCOperand::createReg(operand); MI->addOperand(Op); operand = ((inst & 0x700) >> 8) << 1; - Op = MCOperand::createImm(operand); - MI->addOperand(Op); + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); operand = getRegFromSubinstEncoding(inst & 0xf); Op = MCOperand::createReg(operand); MI->addOperand(Op); @@ -993,8 +1592,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) { case Hexagon::V4_SS2_storew_sp: // u 8-4{5_2}, Rd 3-0 operand = ((inst & 0x1f0) >> 4) << 2; - Op = MCOperand::createImm(operand); - MI->addOperand(Op); + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); operand = getRegFromSubinstEncoding(inst & 0xf); Op = MCOperand::createReg(operand); MI->addOperand(Op); diff --git a/contrib/llvm/lib/Target/Hexagon/Hexagon.h b/contrib/llvm/lib/Target/Hexagon/Hexagon.h index d360be2aa5b2..ed7d9578902e 100644 --- a/contrib/llvm/lib/Target/Hexagon/Hexagon.h +++ b/contrib/llvm/lib/Target/Hexagon/Hexagon.h @@ -47,15 +47,8 @@ #include "llvm/Target/TargetMachine.h" namespace llvm { - class MachineInstr; - class MCInst; - class MCInstrInfo; - class HexagonAsmPrinter; class HexagonTargetMachine; - void HexagonLowerToMC(const MachineInstr *MI, MCInst &MCI, - HexagonAsmPrinter &AP); - /// \brief Creates a Hexagon-specific Target Transformation Info pass. ImmutablePass *createHexagonTargetTransformInfoPass(const HexagonTargetMachine *TM); } // end namespace llvm; diff --git a/contrib/llvm/lib/Target/Hexagon/Hexagon.td b/contrib/llvm/lib/Target/Hexagon/Hexagon.td index 53a687c337ec..1189cfd488ee 100644 --- a/contrib/llvm/lib/Target/Hexagon/Hexagon.td +++ b/contrib/llvm/lib/Target/Hexagon/Hexagon.td @@ -24,14 +24,32 @@ include "llvm/Target/Target.td" // Hexagon Architectures def ArchV4: SubtargetFeature<"v4", "HexagonArchVersion", "V4", "Hexagon V4">; def ArchV5: SubtargetFeature<"v5", "HexagonArchVersion", "V5", "Hexagon V5">; +def ArchV55: SubtargetFeature<"v55", "HexagonArchVersion", "V55", "Hexagon V55">; +def ArchV60: SubtargetFeature<"v60", "HexagonArchVersion", "V60", "Hexagon V60">; + +// Hexagon ISA Extensions +def ExtensionHVX: SubtargetFeature<"hvx", "UseHVXOps", + "true", "Hexagon HVX instructions">; +def ExtensionHVXDbl: SubtargetFeature<"hvx-double", "UseHVXDblOps", + "true", "Hexagon HVX Double instructions">; //===----------------------------------------------------------------------===// // Hexagon Instruction Predicate Definitions. //===----------------------------------------------------------------------===// -def HasV5T : Predicate<"HST->hasV5TOps()">; -def NoV5T : Predicate<"!HST->hasV5TOps()">; -def UseMEMOP : Predicate<"HST->useMemOps()">; -def IEEERndNearV5T : Predicate<"HST->modeIEEERndNear()">; +def HasV5T : Predicate<"HST->hasV5TOps()">; +def NoV5T : Predicate<"!HST->hasV5TOps()">; +def HasV55T : Predicate<"HST->hasV55TOps()">, + AssemblerPredicate<"ArchV55">; +def HasV60T : Predicate<"HST->hasV60TOps()">, + AssemblerPredicate<"ArchV60">; +def UseMEMOP : Predicate<"HST->useMemOps()">; +def IEEERndNearV5T : Predicate<"HST->modeIEEERndNear()">; +def UseHVXDbl : Predicate<"HST->useHVXDblOps()">, + AssemblerPredicate<"ExtensionHVXDbl">; +def UseHVXSgl : Predicate<"HST->useHVXSglOps()">; + +def UseHVX : Predicate<"HST->useHVXSglOps() ||HST->useHVXDblOps()">, + AssemblerPredicate<"ExtensionHVX">; //===----------------------------------------------------------------------===// // Classes used for relation maps. @@ -53,6 +71,7 @@ class NewValueRel: PredNewRel; // NewValueRel - Filter class used to relate load/store instructions having // different addressing modes with each other. class AddrModeRel: NewValueRel; +class IntrinsicsRel; //===----------------------------------------------------------------------===// // Generate mapping table to relate non-predicate instructions with their @@ -62,7 +81,7 @@ class AddrModeRel: NewValueRel; def getPredOpcode : InstrMapping { let FilterClass = "PredRel"; // Instructions with the same BaseOpcode and isNVStore values form a row. - let RowFields = ["BaseOpcode", "isNVStore", "PNewValue"]; + let RowFields = ["BaseOpcode", "isNVStore", "PNewValue", "isNT"]; // Instructions with the same predicate sense form a column. let ColFields = ["PredSense"]; // The key column is the unpredicated instructions. @@ -77,7 +96,7 @@ def getPredOpcode : InstrMapping { // def getFalsePredOpcode : InstrMapping { let FilterClass = "PredRel"; - let RowFields = ["BaseOpcode", "PNewValue", "isNVStore", "isBrTaken"]; + let RowFields = ["BaseOpcode", "PNewValue", "isNVStore", "isBrTaken", "isNT"]; let ColFields = ["PredSense"]; let KeyCol = ["true"]; let ValueCols = [["false"]]; @@ -89,7 +108,7 @@ def getFalsePredOpcode : InstrMapping { // def getTruePredOpcode : InstrMapping { let FilterClass = "PredRel"; - let RowFields = ["BaseOpcode", "PNewValue", "isNVStore", "isBrTaken"]; + let RowFields = ["BaseOpcode", "PNewValue", "isNVStore", "isBrTaken", "isNT"]; let ColFields = ["PredSense"]; let KeyCol = ["false"]; let ValueCols = [["true"]]; @@ -125,7 +144,7 @@ def getPredOldOpcode : InstrMapping { // def getNewValueOpcode : InstrMapping { let FilterClass = "NewValueRel"; - let RowFields = ["BaseOpcode", "PredSense", "PNewValue", "addrMode"]; + let RowFields = ["BaseOpcode", "PredSense", "PNewValue", "addrMode", "isNT"]; let ColFields = ["NValueST"]; let KeyCol = ["false"]; let ValueCols = [["true"]]; @@ -137,16 +156,16 @@ def getNewValueOpcode : InstrMapping { // def getNonNVStore : InstrMapping { let FilterClass = "NewValueRel"; - let RowFields = ["BaseOpcode", "PredSense", "PNewValue", "addrMode"]; + let RowFields = ["BaseOpcode", "PredSense", "PNewValue", "addrMode", "isNT"]; let ColFields = ["NValueST"]; let KeyCol = ["true"]; let ValueCols = [["false"]]; } -def getBasedWithImmOffset : InstrMapping { +def getBaseWithImmOffset : InstrMapping { let FilterClass = "AddrModeRel"; let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore", - "isMEMri", "isFloat"]; + "isFloat"]; let ColFields = ["addrMode"]; let KeyCol = ["Absolute"]; let ValueCols = [["BaseImmOffset"]]; @@ -168,6 +187,37 @@ def getRegForm : InstrMapping { let ValueCols = [["reg"]]; } +def getRegShlForm : InstrMapping { + let FilterClass = "ImmRegShl"; + let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore"]; + let ColFields = ["InputType"]; + let KeyCol = ["imm"]; + let ValueCols = [["reg"]]; +} + +def notTakenBranchPrediction : InstrMapping { + let FilterClass = "PredRel"; + let RowFields = ["BaseOpcode", "PNewValue", "PredSense", "isBranch", "isPredicated"]; + let ColFields = ["isBrTaken"]; + let KeyCol = ["true"]; + let ValueCols = [["false"]]; +} + +def takenBranchPrediction : InstrMapping { + let FilterClass = "PredRel"; + let RowFields = ["BaseOpcode", "PNewValue", "PredSense", "isBranch", "isPredicated"]; + let ColFields = ["isBrTaken"]; + let KeyCol = ["false"]; + let ValueCols = [["true"]]; +} + +def getRealHWInstr : InstrMapping { + let FilterClass = "IntrinsicsRel"; + let RowFields = ["BaseOpcode"]; + let ColFields = ["InstrType"]; + let KeyCol = ["Pseudo"]; + let ValueCols = [["Pseudo"], ["Real"]]; +} //===----------------------------------------------------------------------===// // Register File, Calling Conv, Instruction Descriptions //===----------------------------------------------------------------------===// @@ -192,12 +242,22 @@ def : Proc<"hexagonv4", HexagonModelV4, [ArchV4]>; def : Proc<"hexagonv5", HexagonModelV4, [ArchV4, ArchV5]>; +def : Proc<"hexagonv55", HexagonModelV55, + [ArchV4, ArchV5, ArchV55]>; +def : Proc<"hexagonv60", HexagonModelV60, + [ArchV4, ArchV5, ArchV55, ArchV60, ExtensionHVX]>; //===----------------------------------------------------------------------===// // Declare the target which we are implementing //===----------------------------------------------------------------------===// +def HexagonAsmParserVariant : AsmParserVariant { + int Variant = 0; + string TokenizingCharacters = "#()=:.<>!+*"; +} + def Hexagon : Target { // Pull in Instruction Info: let InstructionSet = HexagonInstrInfo; + let AssemblyParserVariants = [HexagonAsmParserVariant]; } diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp index 05728d2b627e..e213089687e8 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -40,11 +40,13 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSection.h" +#include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ELF.h" #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetRegistry.h" @@ -56,12 +58,27 @@ using namespace llvm; +namespace llvm { + void HexagonLowerToMC(const MCInstrInfo &MCII, const MachineInstr *MI, + MCInst &MCB, HexagonAsmPrinter &AP); +} + #define DEBUG_TYPE "asm-printer" static cl::opt<bool> AlignCalls( "hexagon-align-calls", cl::Hidden, cl::init(true), cl::desc("Insert falign after call instruction for Hexagon target")); +// Given a scalar register return its pair. +inline static unsigned getHexagonRegisterPair(unsigned Reg, + const MCRegisterInfo *RI) { + assert(Hexagon::IntRegsRegClass.contains(Reg)); + MCSuperRegIterator SR(Reg, RI, false); + unsigned Pair = *SR; + assert(Hexagon::DoubleRegsRegClass.contains(Pair)); + return Pair; +} + HexagonAsmPrinter::HexagonAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer) : AsmPrinter(TM, std::move(Streamer)), Subtarget(nullptr) {} @@ -102,9 +119,8 @@ void HexagonAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, // bool HexagonAsmPrinter:: isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { - if (MBB->hasAddressTaken()) { + if (MBB->hasAddressTaken()) return false; - } return AsmPrinter::isBlockOnlyReachableByFallthrough(MBB); } @@ -117,7 +133,8 @@ bool HexagonAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &OS) { // Does this asm operand have a single letter operand modifier? if (ExtraCode && ExtraCode[0]) { - if (ExtraCode[1] != 0) return true; // Unknown modifier. + if (ExtraCode[1] != 0) + return true; // Unknown modifier. switch (ExtraCode[0]) { default: @@ -173,45 +190,407 @@ bool HexagonAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, return false; } +MCSymbol *smallData(AsmPrinter &AP, const MachineInstr &MI, + MCStreamer &OutStreamer, + const MCOperand &Imm, int AlignSize) { + MCSymbol *Sym; + int64_t Value; + if (Imm.getExpr()->evaluateAsAbsolute(Value)) { + StringRef sectionPrefix; + std::string ImmString; + StringRef Name; + if (AlignSize == 8) { + Name = ".CONST_0000000000000000"; + sectionPrefix = ".gnu.linkonce.l8"; + ImmString = utohexstr(Value); + } else { + Name = ".CONST_00000000"; + sectionPrefix = ".gnu.linkonce.l4"; + ImmString = utohexstr(static_cast<uint32_t>(Value)); + } + + std::string symbolName = // Yes, leading zeros are kept. + Name.drop_back(ImmString.size()).str() + ImmString; + std::string sectionName = sectionPrefix.str() + symbolName; + + MCSectionELF *Section = OutStreamer.getContext().getELFSection( + sectionName, ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC); + OutStreamer.SwitchSection(Section); + + Sym = AP.OutContext.getOrCreateSymbol(Twine(symbolName)); + if (Sym->isUndefined()) { + OutStreamer.EmitLabel(Sym); + OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global); + OutStreamer.EmitIntValue(Value, AlignSize); + OutStreamer.EmitCodeAlignment(AlignSize); + } + } else { + assert(Imm.isExpr() && "Expected expression and found none"); + const MachineOperand &MO = MI.getOperand(1); + assert(MO.isGlobal() || MO.isCPI() || MO.isJTI()); + MCSymbol *MOSymbol = nullptr; + if (MO.isGlobal()) + MOSymbol = AP.getSymbol(MO.getGlobal()); + else if (MO.isCPI()) + MOSymbol = AP.GetCPISymbol(MO.getIndex()); + else if (MO.isJTI()) + MOSymbol = AP.GetJTISymbol(MO.getIndex()); + else + llvm_unreachable("Unknown operand type!"); + + StringRef SymbolName = MOSymbol->getName(); + std::string LitaName = ".CONST_" + SymbolName.str(); + + MCSectionELF *Section = OutStreamer.getContext().getELFSection( + ".lita", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC); + + OutStreamer.SwitchSection(Section); + Sym = AP.OutContext.getOrCreateSymbol(Twine(LitaName)); + if (Sym->isUndefined()) { + OutStreamer.EmitLabel(Sym); + OutStreamer.EmitSymbolAttribute(Sym, MCSA_Local); + OutStreamer.EmitValue(Imm.getExpr(), AlignSize); + OutStreamer.EmitCodeAlignment(AlignSize); + } + } + return Sym; +} + +void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, + const MachineInstr &MI) { + MCInst &MappedInst = static_cast <MCInst &>(Inst); + const MCRegisterInfo *RI = OutStreamer->getContext().getRegisterInfo(); + + switch (Inst.getOpcode()) { + default: return; + + // "$dst = CONST64(#$src1)", + case Hexagon::CONST64_Float_Real: + case Hexagon::CONST64_Int_Real: + if (!OutStreamer->hasRawTextSupport()) { + const MCOperand &Imm = MappedInst.getOperand(1); + MCSectionSubPair Current = OutStreamer->getCurrentSection(); + + MCSymbol *Sym = smallData(*this, MI, *OutStreamer, Imm, 8); + + OutStreamer->SwitchSection(Current.first, Current.second); + MCInst TmpInst; + MCOperand &Reg = MappedInst.getOperand(0); + TmpInst.setOpcode(Hexagon::L2_loadrdgp); + TmpInst.addOperand(Reg); + TmpInst.addOperand(MCOperand::createExpr( + MCSymbolRefExpr::create(Sym, OutContext))); + MappedInst = TmpInst; + + } + break; + case Hexagon::CONST32: + case Hexagon::CONST32_Float_Real: + case Hexagon::CONST32_Int_Real: + case Hexagon::FCONST32_nsdata: + if (!OutStreamer->hasRawTextSupport()) { + MCOperand &Imm = MappedInst.getOperand(1); + MCSectionSubPair Current = OutStreamer->getCurrentSection(); + MCSymbol *Sym = smallData(*this, MI, *OutStreamer, Imm, 4); + OutStreamer->SwitchSection(Current.first, Current.second); + MCInst TmpInst; + MCOperand &Reg = MappedInst.getOperand(0); + TmpInst.setOpcode(Hexagon::L2_loadrigp); + TmpInst.addOperand(Reg); + TmpInst.addOperand(MCOperand::createExpr( + MCSymbolRefExpr::create(Sym, OutContext))); + MappedInst = TmpInst; + } + break; + + // C2_pxfer_map maps to C2_or instruction. Though, it's possible to use + // C2_or during instruction selection itself but it results + // into suboptimal code. + case Hexagon::C2_pxfer_map: { + MCOperand &Ps = Inst.getOperand(1); + MappedInst.setOpcode(Hexagon::C2_or); + MappedInst.addOperand(Ps); + return; + } + + // Vector reduce complex multiply by scalar, Rt & 1 map to :hi else :lo + // The insn is mapped from the 4 operand to the 3 operand raw form taking + // 3 register pairs. + case Hexagon::M2_vrcmpys_acc_s1: { + MCOperand &Rt = Inst.getOperand(3); + assert (Rt.isReg() && "Expected register and none was found"); + unsigned Reg = RI->getEncodingValue(Rt.getReg()); + if (Reg & 1) + MappedInst.setOpcode(Hexagon::M2_vrcmpys_acc_s1_h); + else + MappedInst.setOpcode(Hexagon::M2_vrcmpys_acc_s1_l); + Rt.setReg(getHexagonRegisterPair(Rt.getReg(), RI)); + return; + } + case Hexagon::M2_vrcmpys_s1: { + MCOperand &Rt = Inst.getOperand(2); + assert (Rt.isReg() && "Expected register and none was found"); + unsigned Reg = RI->getEncodingValue(Rt.getReg()); + if (Reg & 1) + MappedInst.setOpcode(Hexagon::M2_vrcmpys_s1_h); + else + MappedInst.setOpcode(Hexagon::M2_vrcmpys_s1_l); + Rt.setReg(getHexagonRegisterPair(Rt.getReg(), RI)); + return; + } + + case Hexagon::M2_vrcmpys_s1rp: { + MCOperand &Rt = Inst.getOperand(2); + assert (Rt.isReg() && "Expected register and none was found"); + unsigned Reg = RI->getEncodingValue(Rt.getReg()); + if (Reg & 1) + MappedInst.setOpcode(Hexagon::M2_vrcmpys_s1rp_h); + else + MappedInst.setOpcode(Hexagon::M2_vrcmpys_s1rp_l); + Rt.setReg(getHexagonRegisterPair(Rt.getReg(), RI)); + return; + } + + case Hexagon::A4_boundscheck: { + MCOperand &Rs = Inst.getOperand(1); + assert (Rs.isReg() && "Expected register and none was found"); + unsigned Reg = RI->getEncodingValue(Rs.getReg()); + if (Reg & 1) // Odd mapped to raw:hi, regpair is rodd:odd-1, like r3:2 + MappedInst.setOpcode(Hexagon::A4_boundscheck_hi); + else // raw:lo + MappedInst.setOpcode(Hexagon::A4_boundscheck_lo); + Rs.setReg(getHexagonRegisterPair(Rs.getReg(), RI)); + return; + } + case Hexagon::S5_asrhub_rnd_sat_goodsyntax: { + MCOperand &MO = MappedInst.getOperand(2); + int64_t Imm; + MCExpr const *Expr = MO.getExpr(); + bool Success = Expr->evaluateAsAbsolute(Imm); + assert (Success && "Expected immediate and none was found");(void)Success; + MCInst TmpInst; + if (Imm == 0) { + TmpInst.setOpcode(Hexagon::S2_vsathub); + TmpInst.addOperand(MappedInst.getOperand(0)); + TmpInst.addOperand(MappedInst.getOperand(1)); + MappedInst = TmpInst; + return; + } + TmpInst.setOpcode(Hexagon::S5_asrhub_rnd_sat); + TmpInst.addOperand(MappedInst.getOperand(0)); + TmpInst.addOperand(MappedInst.getOperand(1)); + const MCExpr *One = MCConstantExpr::create(1, OutContext); + const MCExpr *Sub = MCBinaryExpr::createSub(Expr, One, OutContext); + TmpInst.addOperand(MCOperand::createExpr(Sub)); + MappedInst = TmpInst; + return; + } + case Hexagon::S5_vasrhrnd_goodsyntax: + case Hexagon::S2_asr_i_p_rnd_goodsyntax: { + MCOperand &MO2 = MappedInst.getOperand(2); + MCExpr const *Expr = MO2.getExpr(); + int64_t Imm; + bool Success = Expr->evaluateAsAbsolute(Imm); + assert (Success && "Expected immediate and none was found");(void)Success; + MCInst TmpInst; + if (Imm == 0) { + TmpInst.setOpcode(Hexagon::A2_combinew); + TmpInst.addOperand(MappedInst.getOperand(0)); + MCOperand &MO1 = MappedInst.getOperand(1); + unsigned High = RI->getSubReg(MO1.getReg(), Hexagon::subreg_hireg); + unsigned Low = RI->getSubReg(MO1.getReg(), Hexagon::subreg_loreg); + // Add a new operand for the second register in the pair. + TmpInst.addOperand(MCOperand::createReg(High)); + TmpInst.addOperand(MCOperand::createReg(Low)); + MappedInst = TmpInst; + return; + } + + if (Inst.getOpcode() == Hexagon::S2_asr_i_p_rnd_goodsyntax) + TmpInst.setOpcode(Hexagon::S2_asr_i_p_rnd); + else + TmpInst.setOpcode(Hexagon::S5_vasrhrnd); + TmpInst.addOperand(MappedInst.getOperand(0)); + TmpInst.addOperand(MappedInst.getOperand(1)); + const MCExpr *One = MCConstantExpr::create(1, OutContext); + const MCExpr *Sub = MCBinaryExpr::createSub(Expr, One, OutContext); + TmpInst.addOperand(MCOperand::createExpr(Sub)); + MappedInst = TmpInst; + return; + } + // if ("#u5==0") Assembler mapped to: "Rd=Rs"; else Rd=asr(Rs,#u5-1):rnd + case Hexagon::S2_asr_i_r_rnd_goodsyntax: { + MCOperand &MO = Inst.getOperand(2); + MCExpr const *Expr = MO.getExpr(); + int64_t Imm; + bool Success = Expr->evaluateAsAbsolute(Imm); + assert (Success && "Expected immediate and none was found");(void)Success; + MCInst TmpInst; + if (Imm == 0) { + TmpInst.setOpcode(Hexagon::A2_tfr); + TmpInst.addOperand(MappedInst.getOperand(0)); + TmpInst.addOperand(MappedInst.getOperand(1)); + MappedInst = TmpInst; + return; + } + TmpInst.setOpcode(Hexagon::S2_asr_i_r_rnd); + TmpInst.addOperand(MappedInst.getOperand(0)); + TmpInst.addOperand(MappedInst.getOperand(1)); + const MCExpr *One = MCConstantExpr::create(1, OutContext); + const MCExpr *Sub = MCBinaryExpr::createSub(Expr, One, OutContext); + TmpInst.addOperand(MCOperand::createExpr(Sub)); + MappedInst = TmpInst; + return; + } + case Hexagon::TFRI_f: + MappedInst.setOpcode(Hexagon::A2_tfrsi); + return; + case Hexagon::TFRI_cPt_f: + MappedInst.setOpcode(Hexagon::C2_cmoveit); + return; + case Hexagon::TFRI_cNotPt_f: + MappedInst.setOpcode(Hexagon::C2_cmoveif); + return; + case Hexagon::MUX_ri_f: + MappedInst.setOpcode(Hexagon::C2_muxri); + return; + case Hexagon::MUX_ir_f: + MappedInst.setOpcode(Hexagon::C2_muxir); + return; + + // Translate a "$Rdd = #imm" to "$Rdd = combine(#[-1,0], #imm)" + case Hexagon::A2_tfrpi: { + MCInst TmpInst; + MCOperand &Rdd = MappedInst.getOperand(0); + MCOperand &MO = MappedInst.getOperand(1); + + TmpInst.setOpcode(Hexagon::A2_combineii); + TmpInst.addOperand(Rdd); + int64_t Imm; + bool Success = MO.getExpr()->evaluateAsAbsolute(Imm); + if (Success && Imm < 0) { + const MCExpr *MOne = MCConstantExpr::create(-1, OutContext); + TmpInst.addOperand(MCOperand::createExpr(MOne)); + } else { + const MCExpr *Zero = MCConstantExpr::create(0, OutContext); + TmpInst.addOperand(MCOperand::createExpr(Zero)); + } + TmpInst.addOperand(MO); + MappedInst = TmpInst; + return; + } + // Translate a "$Rdd = $Rss" to "$Rdd = combine($Rs, $Rt)" + case Hexagon::A2_tfrp: { + MCOperand &MO = MappedInst.getOperand(1); + unsigned High = RI->getSubReg(MO.getReg(), Hexagon::subreg_hireg); + unsigned Low = RI->getSubReg(MO.getReg(), Hexagon::subreg_loreg); + MO.setReg(High); + // Add a new operand for the second register in the pair. + MappedInst.addOperand(MCOperand::createReg(Low)); + MappedInst.setOpcode(Hexagon::A2_combinew); + return; + } + + case Hexagon::A2_tfrpt: + case Hexagon::A2_tfrpf: { + MCOperand &MO = MappedInst.getOperand(2); + unsigned High = RI->getSubReg(MO.getReg(), Hexagon::subreg_hireg); + unsigned Low = RI->getSubReg(MO.getReg(), Hexagon::subreg_loreg); + MO.setReg(High); + // Add a new operand for the second register in the pair. + MappedInst.addOperand(MCOperand::createReg(Low)); + MappedInst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrpt) + ? Hexagon::C2_ccombinewt + : Hexagon::C2_ccombinewf); + return; + } + case Hexagon::A2_tfrptnew: + case Hexagon::A2_tfrpfnew: { + MCOperand &MO = MappedInst.getOperand(2); + unsigned High = RI->getSubReg(MO.getReg(), Hexagon::subreg_hireg); + unsigned Low = RI->getSubReg(MO.getReg(), Hexagon::subreg_loreg); + MO.setReg(High); + // Add a new operand for the second register in the pair. + MappedInst.addOperand(MCOperand::createReg(Low)); + MappedInst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrptnew) + ? Hexagon::C2_ccombinewnewt + : Hexagon::C2_ccombinewnewf); + return; + } + + case Hexagon::M2_mpysmi: { + MCOperand &Imm = MappedInst.getOperand(2); + MCExpr const *Expr = Imm.getExpr(); + int64_t Value; + bool Success = Expr->evaluateAsAbsolute(Value); + assert(Success);(void)Success; + if (Value < 0 && Value > -256) { + MappedInst.setOpcode(Hexagon::M2_mpysin); + Imm.setExpr(MCUnaryExpr::createMinus(Expr, OutContext)); + } + else + MappedInst.setOpcode(Hexagon::M2_mpysip); + return; + } + + case Hexagon::A2_addsp: { + MCOperand &Rt = Inst.getOperand(1); + assert (Rt.isReg() && "Expected register and none was found"); + unsigned Reg = RI->getEncodingValue(Rt.getReg()); + if (Reg & 1) + MappedInst.setOpcode(Hexagon::A2_addsph); + else + MappedInst.setOpcode(Hexagon::A2_addspl); + Rt.setReg(getHexagonRegisterPair(Rt.getReg(), RI)); + return; + } + case Hexagon::HEXAGON_V6_vd0_pseudo: + case Hexagon::HEXAGON_V6_vd0_pseudo_128B: { + MCInst TmpInst; + assert (Inst.getOperand(0).isReg() && + "Expected register and none was found"); + + TmpInst.setOpcode(Hexagon::V6_vxor); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(0)); + MappedInst = TmpInst; + return; + } + + } +} + /// printMachineInstruction -- Print out a single Hexagon MI in Darwin syntax to /// the current output stream. /// void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) { - MCInst MCB; - MCB.setOpcode(Hexagon::BUNDLE); - MCB.addOperand(MCOperand::createImm(0)); + MCInst MCB = HexagonMCInstrInfo::createBundle(); + const MCInstrInfo &MCII = *Subtarget->getInstrInfo(); if (MI->isBundle()) { const MachineBasicBlock* MBB = MI->getParent(); - MachineBasicBlock::const_instr_iterator MII = MI; + MachineBasicBlock::const_instr_iterator MII = MI->getIterator(); unsigned IgnoreCount = 0; - for (++MII; MII != MBB->end() && MII->isInsideBundle(); ++MII) { + for (++MII; MII != MBB->instr_end() && MII->isInsideBundle(); ++MII) if (MII->getOpcode() == TargetOpcode::DBG_VALUE || MII->getOpcode() == TargetOpcode::IMPLICIT_DEF) ++IgnoreCount; - else { - HexagonLowerToMC(MII, MCB, *this); - } - } + else + HexagonLowerToMC(MCII, &*MII, MCB, *this); } - else { - HexagonLowerToMC(MI, MCB, *this); - HexagonMCInstrInfo::padEndloop(MCB); - } - // Examine the packet and try to find instructions that can be converted - // to compounds. - HexagonMCInstrInfo::tryCompound(*Subtarget->getInstrInfo(), - OutStreamer->getContext(), MCB); - // Examine the packet and convert pairs of instructions to duplex - // instructions when possible. - SmallVector<DuplexCandidate, 8> possibleDuplexes; - possibleDuplexes = HexagonMCInstrInfo::getDuplexPossibilties( - *Subtarget->getInstrInfo(), MCB); - HexagonMCShuffle(*Subtarget->getInstrInfo(), *Subtarget, - OutStreamer->getContext(), MCB, possibleDuplexes); - EmitToStreamer(*OutStreamer, MCB); + else + HexagonLowerToMC(MCII, MI, MCB, *this); + + bool Ok = HexagonMCInstrInfo::canonicalizePacket( + MCII, *Subtarget, OutStreamer->getContext(), MCB, nullptr); + assert(Ok); + (void)Ok; + if(HexagonMCInstrInfo::bundleSize(MCB) == 0) + return; + OutStreamer->EmitInstruction(MCB, getSubtargetInfo()); } extern "C" void LLVMInitializeHexagonAsmPrinter() { diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h index 792fc8b7af3a..a78d97e28427 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h +++ b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h @@ -42,6 +42,10 @@ namespace llvm { void EmitInstruction(const MachineInstr *MI) override; + void HexagonProcessInstruction(MCInst &Inst, + const MachineInstr &MBB); + + void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O); bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp new file mode 100644 index 000000000000..77907b054d54 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp @@ -0,0 +1,2778 @@ +//===--- HexagonBitSimplify.cpp -------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "hexbit" + +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "HexagonTargetMachine.h" +#include "HexagonBitTracker.h" + +using namespace llvm; + +namespace llvm { + void initializeHexagonBitSimplifyPass(PassRegistry& Registry); + FunctionPass *createHexagonBitSimplify(); +} + +namespace { + // Set of virtual registers, based on BitVector. + struct RegisterSet : private BitVector { + RegisterSet() : BitVector() {} + explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {} + RegisterSet(const RegisterSet &RS) : BitVector(RS) {} + + using BitVector::clear; + using BitVector::count; + + unsigned find_first() const { + int First = BitVector::find_first(); + if (First < 0) + return 0; + return x2v(First); + } + + unsigned find_next(unsigned Prev) const { + int Next = BitVector::find_next(v2x(Prev)); + if (Next < 0) + return 0; + return x2v(Next); + } + + RegisterSet &insert(unsigned R) { + unsigned Idx = v2x(R); + ensure(Idx); + return static_cast<RegisterSet&>(BitVector::set(Idx)); + } + RegisterSet &remove(unsigned R) { + unsigned Idx = v2x(R); + if (Idx >= size()) + return *this; + return static_cast<RegisterSet&>(BitVector::reset(Idx)); + } + + RegisterSet &insert(const RegisterSet &Rs) { + return static_cast<RegisterSet&>(BitVector::operator|=(Rs)); + } + RegisterSet &remove(const RegisterSet &Rs) { + return static_cast<RegisterSet&>(BitVector::reset(Rs)); + } + + reference operator[](unsigned R) { + unsigned Idx = v2x(R); + ensure(Idx); + return BitVector::operator[](Idx); + } + bool operator[](unsigned R) const { + unsigned Idx = v2x(R); + assert(Idx < size()); + return BitVector::operator[](Idx); + } + bool has(unsigned R) const { + unsigned Idx = v2x(R); + if (Idx >= size()) + return false; + return BitVector::test(Idx); + } + + bool empty() const { + return !BitVector::any(); + } + bool includes(const RegisterSet &Rs) const { + // A.BitVector::test(B) <=> A-B != {} + return !Rs.BitVector::test(*this); + } + bool intersects(const RegisterSet &Rs) const { + return BitVector::anyCommon(Rs); + } + + private: + void ensure(unsigned Idx) { + if (size() <= Idx) + resize(std::max(Idx+1, 32U)); + } + static inline unsigned v2x(unsigned v) { + return TargetRegisterInfo::virtReg2Index(v); + } + static inline unsigned x2v(unsigned x) { + return TargetRegisterInfo::index2VirtReg(x); + } + }; + + + struct PrintRegSet { + PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI) + : RS(S), TRI(RI) {} + friend raw_ostream &operator<< (raw_ostream &OS, + const PrintRegSet &P); + private: + const RegisterSet &RS; + const TargetRegisterInfo *TRI; + }; + + raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) + LLVM_ATTRIBUTE_UNUSED; + raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) { + OS << '{'; + for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R)) + OS << ' ' << PrintReg(R, P.TRI); + OS << " }"; + return OS; + } +} + + +namespace { + class Transformation; + + class HexagonBitSimplify : public MachineFunctionPass { + public: + static char ID; + HexagonBitSimplify() : MachineFunctionPass(ID), MDT(0) { + initializeHexagonBitSimplifyPass(*PassRegistry::getPassRegistry()); + } + virtual const char *getPassName() const { + return "Hexagon bit simplification"; + } + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + virtual bool runOnMachineFunction(MachineFunction &MF); + + static void getInstrDefs(const MachineInstr &MI, RegisterSet &Defs); + static void getInstrUses(const MachineInstr &MI, RegisterSet &Uses); + static bool isEqual(const BitTracker::RegisterCell &RC1, uint16_t B1, + const BitTracker::RegisterCell &RC2, uint16_t B2, uint16_t W); + static bool isConst(const BitTracker::RegisterCell &RC, uint16_t B, + uint16_t W); + static bool isZero(const BitTracker::RegisterCell &RC, uint16_t B, + uint16_t W); + static bool getConst(const BitTracker::RegisterCell &RC, uint16_t B, + uint16_t W, uint64_t &U); + static bool replaceReg(unsigned OldR, unsigned NewR, + MachineRegisterInfo &MRI); + static bool getSubregMask(const BitTracker::RegisterRef &RR, + unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI); + static bool replaceRegWithSub(unsigned OldR, unsigned NewR, + unsigned NewSR, MachineRegisterInfo &MRI); + static bool replaceSubWithSub(unsigned OldR, unsigned OldSR, + unsigned NewR, unsigned NewSR, MachineRegisterInfo &MRI); + static bool parseRegSequence(const MachineInstr &I, + BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH); + + static bool getUsedBitsInStore(unsigned Opc, BitVector &Bits, + uint16_t Begin); + static bool getUsedBits(unsigned Opc, unsigned OpN, BitVector &Bits, + uint16_t Begin, const HexagonInstrInfo &HII); + + static const TargetRegisterClass *getFinalVRegClass( + const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI); + static bool isTransparentCopy(const BitTracker::RegisterRef &RD, + const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI); + + private: + MachineDominatorTree *MDT; + + bool visitBlock(MachineBasicBlock &B, Transformation &T, RegisterSet &AVs); + }; + + char HexagonBitSimplify::ID = 0; + typedef HexagonBitSimplify HBS; + + + // The purpose of this class is to provide a common facility to traverse + // the function top-down or bottom-up via the dominator tree, and keep + // track of the available registers. + class Transformation { + public: + bool TopDown; + Transformation(bool TD) : TopDown(TD) {} + virtual bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) = 0; + virtual ~Transformation() {} + }; +} + +INITIALIZE_PASS_BEGIN(HexagonBitSimplify, "hexbit", + "Hexagon bit simplification", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_END(HexagonBitSimplify, "hexbit", + "Hexagon bit simplification", false, false) + + +bool HexagonBitSimplify::visitBlock(MachineBasicBlock &B, Transformation &T, + RegisterSet &AVs) { + MachineDomTreeNode *N = MDT->getNode(&B); + typedef GraphTraits<MachineDomTreeNode*> GTN; + bool Changed = false; + + if (T.TopDown) + Changed = T.processBlock(B, AVs); + + RegisterSet Defs; + for (auto &I : B) + getInstrDefs(I, Defs); + RegisterSet NewAVs = AVs; + NewAVs.insert(Defs); + + for (auto I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I) { + MachineBasicBlock *SB = (*I)->getBlock(); + Changed |= visitBlock(*SB, T, NewAVs); + } + if (!T.TopDown) + Changed |= T.processBlock(B, AVs); + + return Changed; +} + +// +// Utility functions: +// +void HexagonBitSimplify::getInstrDefs(const MachineInstr &MI, + RegisterSet &Defs) { + for (auto &Op : MI.operands()) { + if (!Op.isReg() || !Op.isDef()) + continue; + unsigned R = Op.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(R)) + continue; + Defs.insert(R); + } +} + +void HexagonBitSimplify::getInstrUses(const MachineInstr &MI, + RegisterSet &Uses) { + for (auto &Op : MI.operands()) { + if (!Op.isReg() || !Op.isUse()) + continue; + unsigned R = Op.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(R)) + continue; + Uses.insert(R); + } +} + +// Check if all the bits in range [B, E) in both cells are equal. +bool HexagonBitSimplify::isEqual(const BitTracker::RegisterCell &RC1, + uint16_t B1, const BitTracker::RegisterCell &RC2, uint16_t B2, + uint16_t W) { + for (uint16_t i = 0; i < W; ++i) { + // If RC1[i] is "bottom", it cannot be proven equal to RC2[i]. + if (RC1[B1+i].Type == BitTracker::BitValue::Ref && RC1[B1+i].RefI.Reg == 0) + return false; + // Same for RC2[i]. + if (RC2[B2+i].Type == BitTracker::BitValue::Ref && RC2[B2+i].RefI.Reg == 0) + return false; + if (RC1[B1+i] != RC2[B2+i]) + return false; + } + return true; +} + + +bool HexagonBitSimplify::isConst(const BitTracker::RegisterCell &RC, + uint16_t B, uint16_t W) { + assert(B < RC.width() && B+W <= RC.width()); + for (uint16_t i = B; i < B+W; ++i) + if (!RC[i].num()) + return false; + return true; +} + + +bool HexagonBitSimplify::isZero(const BitTracker::RegisterCell &RC, + uint16_t B, uint16_t W) { + assert(B < RC.width() && B+W <= RC.width()); + for (uint16_t i = B; i < B+W; ++i) + if (!RC[i].is(0)) + return false; + return true; +} + + +bool HexagonBitSimplify::getConst(const BitTracker::RegisterCell &RC, + uint16_t B, uint16_t W, uint64_t &U) { + assert(B < RC.width() && B+W <= RC.width()); + int64_t T = 0; + for (uint16_t i = B+W; i > B; --i) { + const BitTracker::BitValue &BV = RC[i-1]; + T <<= 1; + if (BV.is(1)) + T |= 1; + else if (!BV.is(0)) + return false; + } + U = T; + return true; +} + + +bool HexagonBitSimplify::replaceReg(unsigned OldR, unsigned NewR, + MachineRegisterInfo &MRI) { + if (!TargetRegisterInfo::isVirtualRegister(OldR) || + !TargetRegisterInfo::isVirtualRegister(NewR)) + return false; + auto Begin = MRI.use_begin(OldR), End = MRI.use_end(); + decltype(End) NextI; + for (auto I = Begin; I != End; I = NextI) { + NextI = std::next(I); + I->setReg(NewR); + } + return Begin != End; +} + + +bool HexagonBitSimplify::replaceRegWithSub(unsigned OldR, unsigned NewR, + unsigned NewSR, MachineRegisterInfo &MRI) { + if (!TargetRegisterInfo::isVirtualRegister(OldR) || + !TargetRegisterInfo::isVirtualRegister(NewR)) + return false; + auto Begin = MRI.use_begin(OldR), End = MRI.use_end(); + decltype(End) NextI; + for (auto I = Begin; I != End; I = NextI) { + NextI = std::next(I); + I->setReg(NewR); + I->setSubReg(NewSR); + } + return Begin != End; +} + + +bool HexagonBitSimplify::replaceSubWithSub(unsigned OldR, unsigned OldSR, + unsigned NewR, unsigned NewSR, MachineRegisterInfo &MRI) { + if (!TargetRegisterInfo::isVirtualRegister(OldR) || + !TargetRegisterInfo::isVirtualRegister(NewR)) + return false; + auto Begin = MRI.use_begin(OldR), End = MRI.use_end(); + decltype(End) NextI; + for (auto I = Begin; I != End; I = NextI) { + NextI = std::next(I); + if (I->getSubReg() != OldSR) + continue; + I->setReg(NewR); + I->setSubReg(NewSR); + } + return Begin != End; +} + + +// For a register ref (pair Reg:Sub), set Begin to the position of the LSB +// of Sub in Reg, and set Width to the size of Sub in bits. Return true, +// if this succeeded, otherwise return false. +bool HexagonBitSimplify::getSubregMask(const BitTracker::RegisterRef &RR, + unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI) { + const TargetRegisterClass *RC = MRI.getRegClass(RR.Reg); + if (RC == &Hexagon::IntRegsRegClass) { + assert(RR.Sub == 0); + Begin = 0; + Width = 32; + return true; + } + if (RC == &Hexagon::DoubleRegsRegClass) { + if (RR.Sub == 0) { + Begin = 0; + Width = 64; + return true; + } + assert(RR.Sub == Hexagon::subreg_loreg || RR.Sub == Hexagon::subreg_hireg); + Width = 32; + Begin = (RR.Sub == Hexagon::subreg_loreg ? 0 : 32); + return true; + } + return false; +} + + +// For a REG_SEQUENCE, set SL to the low subregister and SH to the high +// subregister. +bool HexagonBitSimplify::parseRegSequence(const MachineInstr &I, + BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH) { + assert(I.getOpcode() == TargetOpcode::REG_SEQUENCE); + unsigned Sub1 = I.getOperand(2).getImm(), Sub2 = I.getOperand(4).getImm(); + assert(Sub1 != Sub2); + if (Sub1 == Hexagon::subreg_loreg && Sub2 == Hexagon::subreg_hireg) { + SL = I.getOperand(1); + SH = I.getOperand(3); + return true; + } + if (Sub1 == Hexagon::subreg_hireg && Sub2 == Hexagon::subreg_loreg) { + SH = I.getOperand(1); + SL = I.getOperand(3); + return true; + } + return false; +} + + +// All stores (except 64-bit stores) take a 32-bit register as the source +// of the value to be stored. If the instruction stores into a location +// that is shorter than 32 bits, some bits of the source register are not +// used. For each store instruction, calculate the set of used bits in +// the source register, and set appropriate bits in Bits. Return true if +// the bits are calculated, false otherwise. +bool HexagonBitSimplify::getUsedBitsInStore(unsigned Opc, BitVector &Bits, + uint16_t Begin) { + using namespace Hexagon; + + switch (Opc) { + // Store byte + case S2_storerb_io: // memb(Rs32+#s11:0)=Rt32 + case S2_storerbnew_io: // memb(Rs32+#s11:0)=Nt8.new + case S2_pstorerbt_io: // if (Pv4) memb(Rs32+#u6:0)=Rt32 + case S2_pstorerbf_io: // if (!Pv4) memb(Rs32+#u6:0)=Rt32 + case S4_pstorerbtnew_io: // if (Pv4.new) memb(Rs32+#u6:0)=Rt32 + case S4_pstorerbfnew_io: // if (!Pv4.new) memb(Rs32+#u6:0)=Rt32 + case S2_pstorerbnewt_io: // if (Pv4) memb(Rs32+#u6:0)=Nt8.new + case S2_pstorerbnewf_io: // if (!Pv4) memb(Rs32+#u6:0)=Nt8.new + case S4_pstorerbnewtnew_io: // if (Pv4.new) memb(Rs32+#u6:0)=Nt8.new + case S4_pstorerbnewfnew_io: // if (!Pv4.new) memb(Rs32+#u6:0)=Nt8.new + case S2_storerb_pi: // memb(Rx32++#s4:0)=Rt32 + case S2_storerbnew_pi: // memb(Rx32++#s4:0)=Nt8.new + case S2_pstorerbt_pi: // if (Pv4) memb(Rx32++#s4:0)=Rt32 + case S2_pstorerbf_pi: // if (!Pv4) memb(Rx32++#s4:0)=Rt32 + case S2_pstorerbtnew_pi: // if (Pv4.new) memb(Rx32++#s4:0)=Rt32 + case S2_pstorerbfnew_pi: // if (!Pv4.new) memb(Rx32++#s4:0)=Rt32 + case S2_pstorerbnewt_pi: // if (Pv4) memb(Rx32++#s4:0)=Nt8.new + case S2_pstorerbnewf_pi: // if (!Pv4) memb(Rx32++#s4:0)=Nt8.new + case S2_pstorerbnewtnew_pi: // if (Pv4.new) memb(Rx32++#s4:0)=Nt8.new + case S2_pstorerbnewfnew_pi: // if (!Pv4.new) memb(Rx32++#s4:0)=Nt8.new + case S4_storerb_ap: // memb(Re32=#U6)=Rt32 + case S4_storerbnew_ap: // memb(Re32=#U6)=Nt8.new + case S2_storerb_pr: // memb(Rx32++Mu2)=Rt32 + case S2_storerbnew_pr: // memb(Rx32++Mu2)=Nt8.new + case S4_storerb_ur: // memb(Ru32<<#u2+#U6)=Rt32 + case S4_storerbnew_ur: // memb(Ru32<<#u2+#U6)=Nt8.new + case S2_storerb_pbr: // memb(Rx32++Mu2:brev)=Rt32 + case S2_storerbnew_pbr: // memb(Rx32++Mu2:brev)=Nt8.new + case S2_storerb_pci: // memb(Rx32++#s4:0:circ(Mu2))=Rt32 + case S2_storerbnew_pci: // memb(Rx32++#s4:0:circ(Mu2))=Nt8.new + case S2_storerb_pcr: // memb(Rx32++I:circ(Mu2))=Rt32 + case S2_storerbnew_pcr: // memb(Rx32++I:circ(Mu2))=Nt8.new + case S4_storerb_rr: // memb(Rs32+Ru32<<#u2)=Rt32 + case S4_storerbnew_rr: // memb(Rs32+Ru32<<#u2)=Nt8.new + case S4_pstorerbt_rr: // if (Pv4) memb(Rs32+Ru32<<#u2)=Rt32 + case S4_pstorerbf_rr: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Rt32 + case S4_pstorerbtnew_rr: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32 + case S4_pstorerbfnew_rr: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32 + case S4_pstorerbnewt_rr: // if (Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new + case S4_pstorerbnewf_rr: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new + case S4_pstorerbnewtnew_rr: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new + case S4_pstorerbnewfnew_rr: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new + case S2_storerbgp: // memb(gp+#u16:0)=Rt32 + case S2_storerbnewgp: // memb(gp+#u16:0)=Nt8.new + case S4_pstorerbt_abs: // if (Pv4) memb(#u6)=Rt32 + case S4_pstorerbf_abs: // if (!Pv4) memb(#u6)=Rt32 + case S4_pstorerbtnew_abs: // if (Pv4.new) memb(#u6)=Rt32 + case S4_pstorerbfnew_abs: // if (!Pv4.new) memb(#u6)=Rt32 + case S4_pstorerbnewt_abs: // if (Pv4) memb(#u6)=Nt8.new + case S4_pstorerbnewf_abs: // if (!Pv4) memb(#u6)=Nt8.new + case S4_pstorerbnewtnew_abs: // if (Pv4.new) memb(#u6)=Nt8.new + case S4_pstorerbnewfnew_abs: // if (!Pv4.new) memb(#u6)=Nt8.new + Bits.set(Begin, Begin+8); + return true; + + // Store low half + case S2_storerh_io: // memh(Rs32+#s11:1)=Rt32 + case S2_storerhnew_io: // memh(Rs32+#s11:1)=Nt8.new + case S2_pstorerht_io: // if (Pv4) memh(Rs32+#u6:1)=Rt32 + case S2_pstorerhf_io: // if (!Pv4) memh(Rs32+#u6:1)=Rt32 + case S4_pstorerhtnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Rt32 + case S4_pstorerhfnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt32 + case S2_pstorerhnewt_io: // if (Pv4) memh(Rs32+#u6:1)=Nt8.new + case S2_pstorerhnewf_io: // if (!Pv4) memh(Rs32+#u6:1)=Nt8.new + case S4_pstorerhnewtnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Nt8.new + case S4_pstorerhnewfnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Nt8.new + case S2_storerh_pi: // memh(Rx32++#s4:1)=Rt32 + case S2_storerhnew_pi: // memh(Rx32++#s4:1)=Nt8.new + case S2_pstorerht_pi: // if (Pv4) memh(Rx32++#s4:1)=Rt32 + case S2_pstorerhf_pi: // if (!Pv4) memh(Rx32++#s4:1)=Rt32 + case S2_pstorerhtnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Rt32 + case S2_pstorerhfnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt32 + case S2_pstorerhnewt_pi: // if (Pv4) memh(Rx32++#s4:1)=Nt8.new + case S2_pstorerhnewf_pi: // if (!Pv4) memh(Rx32++#s4:1)=Nt8.new + case S2_pstorerhnewtnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Nt8.new + case S2_pstorerhnewfnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Nt8.new + case S4_storerh_ap: // memh(Re32=#U6)=Rt32 + case S4_storerhnew_ap: // memh(Re32=#U6)=Nt8.new + case S2_storerh_pr: // memh(Rx32++Mu2)=Rt32 + case S2_storerhnew_pr: // memh(Rx32++Mu2)=Nt8.new + case S4_storerh_ur: // memh(Ru32<<#u2+#U6)=Rt32 + case S4_storerhnew_ur: // memh(Ru32<<#u2+#U6)=Nt8.new + case S2_storerh_pbr: // memh(Rx32++Mu2:brev)=Rt32 + case S2_storerhnew_pbr: // memh(Rx32++Mu2:brev)=Nt8.new + case S2_storerh_pci: // memh(Rx32++#s4:1:circ(Mu2))=Rt32 + case S2_storerhnew_pci: // memh(Rx32++#s4:1:circ(Mu2))=Nt8.new + case S2_storerh_pcr: // memh(Rx32++I:circ(Mu2))=Rt32 + case S2_storerhnew_pcr: // memh(Rx32++I:circ(Mu2))=Nt8.new + case S4_storerh_rr: // memh(Rs32+Ru32<<#u2)=Rt32 + case S4_pstorerht_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt32 + case S4_pstorerhf_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt32 + case S4_pstorerhtnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32 + case S4_pstorerhfnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32 + case S4_storerhnew_rr: // memh(Rs32+Ru32<<#u2)=Nt8.new + case S4_pstorerhnewt_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new + case S4_pstorerhnewf_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new + case S4_pstorerhnewtnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new + case S4_pstorerhnewfnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new + case S2_storerhgp: // memh(gp+#u16:1)=Rt32 + case S2_storerhnewgp: // memh(gp+#u16:1)=Nt8.new + case S4_pstorerht_abs: // if (Pv4) memh(#u6)=Rt32 + case S4_pstorerhf_abs: // if (!Pv4) memh(#u6)=Rt32 + case S4_pstorerhtnew_abs: // if (Pv4.new) memh(#u6)=Rt32 + case S4_pstorerhfnew_abs: // if (!Pv4.new) memh(#u6)=Rt32 + case S4_pstorerhnewt_abs: // if (Pv4) memh(#u6)=Nt8.new + case S4_pstorerhnewf_abs: // if (!Pv4) memh(#u6)=Nt8.new + case S4_pstorerhnewtnew_abs: // if (Pv4.new) memh(#u6)=Nt8.new + case S4_pstorerhnewfnew_abs: // if (!Pv4.new) memh(#u6)=Nt8.new + Bits.set(Begin, Begin+16); + return true; + + // Store high half + case S2_storerf_io: // memh(Rs32+#s11:1)=Rt.H32 + case S2_pstorerft_io: // if (Pv4) memh(Rs32+#u6:1)=Rt.H32 + case S2_pstorerff_io: // if (!Pv4) memh(Rs32+#u6:1)=Rt.H32 + case S4_pstorerftnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Rt.H32 + case S4_pstorerffnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt.H32 + case S2_storerf_pi: // memh(Rx32++#s4:1)=Rt.H32 + case S2_pstorerft_pi: // if (Pv4) memh(Rx32++#s4:1)=Rt.H32 + case S2_pstorerff_pi: // if (!Pv4) memh(Rx32++#s4:1)=Rt.H32 + case S2_pstorerftnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Rt.H32 + case S2_pstorerffnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt.H32 + case S4_storerf_ap: // memh(Re32=#U6)=Rt.H32 + case S2_storerf_pr: // memh(Rx32++Mu2)=Rt.H32 + case S4_storerf_ur: // memh(Ru32<<#u2+#U6)=Rt.H32 + case S2_storerf_pbr: // memh(Rx32++Mu2:brev)=Rt.H32 + case S2_storerf_pci: // memh(Rx32++#s4:1:circ(Mu2))=Rt.H32 + case S2_storerf_pcr: // memh(Rx32++I:circ(Mu2))=Rt.H32 + case S4_storerf_rr: // memh(Rs32+Ru32<<#u2)=Rt.H32 + case S4_pstorerft_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32 + case S4_pstorerff_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32 + case S4_pstorerftnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32 + case S4_pstorerffnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32 + case S2_storerfgp: // memh(gp+#u16:1)=Rt.H32 + case S4_pstorerft_abs: // if (Pv4) memh(#u6)=Rt.H32 + case S4_pstorerff_abs: // if (!Pv4) memh(#u6)=Rt.H32 + case S4_pstorerftnew_abs: // if (Pv4.new) memh(#u6)=Rt.H32 + case S4_pstorerffnew_abs: // if (!Pv4.new) memh(#u6)=Rt.H32 + Bits.set(Begin+16, Begin+32); + return true; + } + + return false; +} + + +// For an instruction with opcode Opc, calculate the set of bits that it +// uses in a register in operand OpN. This only calculates the set of used +// bits for cases where it does not depend on any operands (as is the case +// in shifts, for example). For concrete instructions from a program, the +// operand may be a subregister of a larger register, while Bits would +// correspond to the larger register in its entirety. Because of that, +// the parameter Begin can be used to indicate which bit of Bits should be +// considered the LSB of of the operand. +bool HexagonBitSimplify::getUsedBits(unsigned Opc, unsigned OpN, + BitVector &Bits, uint16_t Begin, const HexagonInstrInfo &HII) { + using namespace Hexagon; + + const MCInstrDesc &D = HII.get(Opc); + if (D.mayStore()) { + if (OpN == D.getNumOperands()-1) + return getUsedBitsInStore(Opc, Bits, Begin); + return false; + } + + switch (Opc) { + // One register source. Used bits: R1[0-7]. + case A2_sxtb: + case A2_zxtb: + case A4_cmpbeqi: + case A4_cmpbgti: + case A4_cmpbgtui: + if (OpN == 1) { + Bits.set(Begin, Begin+8); + return true; + } + break; + + // One register source. Used bits: R1[0-15]. + case A2_aslh: + case A2_sxth: + case A2_zxth: + case A4_cmpheqi: + case A4_cmphgti: + case A4_cmphgtui: + if (OpN == 1) { + Bits.set(Begin, Begin+16); + return true; + } + break; + + // One register source. Used bits: R1[16-31]. + case A2_asrh: + if (OpN == 1) { + Bits.set(Begin+16, Begin+32); + return true; + } + break; + + // Two register sources. Used bits: R1[0-7], R2[0-7]. + case A4_cmpbeq: + case A4_cmpbgt: + case A4_cmpbgtu: + if (OpN == 1) { + Bits.set(Begin, Begin+8); + return true; + } + break; + + // Two register sources. Used bits: R1[0-15], R2[0-15]. + case A4_cmpheq: + case A4_cmphgt: + case A4_cmphgtu: + case A2_addh_h16_ll: + case A2_addh_h16_sat_ll: + case A2_addh_l16_ll: + case A2_addh_l16_sat_ll: + case A2_combine_ll: + case A2_subh_h16_ll: + case A2_subh_h16_sat_ll: + case A2_subh_l16_ll: + case A2_subh_l16_sat_ll: + case M2_mpy_acc_ll_s0: + case M2_mpy_acc_ll_s1: + case M2_mpy_acc_sat_ll_s0: + case M2_mpy_acc_sat_ll_s1: + case M2_mpy_ll_s0: + case M2_mpy_ll_s1: + case M2_mpy_nac_ll_s0: + case M2_mpy_nac_ll_s1: + case M2_mpy_nac_sat_ll_s0: + case M2_mpy_nac_sat_ll_s1: + case M2_mpy_rnd_ll_s0: + case M2_mpy_rnd_ll_s1: + case M2_mpy_sat_ll_s0: + case M2_mpy_sat_ll_s1: + case M2_mpy_sat_rnd_ll_s0: + case M2_mpy_sat_rnd_ll_s1: + case M2_mpyd_acc_ll_s0: + case M2_mpyd_acc_ll_s1: + case M2_mpyd_ll_s0: + case M2_mpyd_ll_s1: + case M2_mpyd_nac_ll_s0: + case M2_mpyd_nac_ll_s1: + case M2_mpyd_rnd_ll_s0: + case M2_mpyd_rnd_ll_s1: + case M2_mpyu_acc_ll_s0: + case M2_mpyu_acc_ll_s1: + case M2_mpyu_ll_s0: + case M2_mpyu_ll_s1: + case M2_mpyu_nac_ll_s0: + case M2_mpyu_nac_ll_s1: + case M2_mpyud_acc_ll_s0: + case M2_mpyud_acc_ll_s1: + case M2_mpyud_ll_s0: + case M2_mpyud_ll_s1: + case M2_mpyud_nac_ll_s0: + case M2_mpyud_nac_ll_s1: + if (OpN == 1 || OpN == 2) { + Bits.set(Begin, Begin+16); + return true; + } + break; + + // Two register sources. Used bits: R1[0-15], R2[16-31]. + case A2_addh_h16_lh: + case A2_addh_h16_sat_lh: + case A2_combine_lh: + case A2_subh_h16_lh: + case A2_subh_h16_sat_lh: + case M2_mpy_acc_lh_s0: + case M2_mpy_acc_lh_s1: + case M2_mpy_acc_sat_lh_s0: + case M2_mpy_acc_sat_lh_s1: + case M2_mpy_lh_s0: + case M2_mpy_lh_s1: + case M2_mpy_nac_lh_s0: + case M2_mpy_nac_lh_s1: + case M2_mpy_nac_sat_lh_s0: + case M2_mpy_nac_sat_lh_s1: + case M2_mpy_rnd_lh_s0: + case M2_mpy_rnd_lh_s1: + case M2_mpy_sat_lh_s0: + case M2_mpy_sat_lh_s1: + case M2_mpy_sat_rnd_lh_s0: + case M2_mpy_sat_rnd_lh_s1: + case M2_mpyd_acc_lh_s0: + case M2_mpyd_acc_lh_s1: + case M2_mpyd_lh_s0: + case M2_mpyd_lh_s1: + case M2_mpyd_nac_lh_s0: + case M2_mpyd_nac_lh_s1: + case M2_mpyd_rnd_lh_s0: + case M2_mpyd_rnd_lh_s1: + case M2_mpyu_acc_lh_s0: + case M2_mpyu_acc_lh_s1: + case M2_mpyu_lh_s0: + case M2_mpyu_lh_s1: + case M2_mpyu_nac_lh_s0: + case M2_mpyu_nac_lh_s1: + case M2_mpyud_acc_lh_s0: + case M2_mpyud_acc_lh_s1: + case M2_mpyud_lh_s0: + case M2_mpyud_lh_s1: + case M2_mpyud_nac_lh_s0: + case M2_mpyud_nac_lh_s1: + // These four are actually LH. + case A2_addh_l16_hl: + case A2_addh_l16_sat_hl: + case A2_subh_l16_hl: + case A2_subh_l16_sat_hl: + if (OpN == 1) { + Bits.set(Begin, Begin+16); + return true; + } + if (OpN == 2) { + Bits.set(Begin+16, Begin+32); + return true; + } + break; + + // Two register sources, used bits: R1[16-31], R2[0-15]. + case A2_addh_h16_hl: + case A2_addh_h16_sat_hl: + case A2_combine_hl: + case A2_subh_h16_hl: + case A2_subh_h16_sat_hl: + case M2_mpy_acc_hl_s0: + case M2_mpy_acc_hl_s1: + case M2_mpy_acc_sat_hl_s0: + case M2_mpy_acc_sat_hl_s1: + case M2_mpy_hl_s0: + case M2_mpy_hl_s1: + case M2_mpy_nac_hl_s0: + case M2_mpy_nac_hl_s1: + case M2_mpy_nac_sat_hl_s0: + case M2_mpy_nac_sat_hl_s1: + case M2_mpy_rnd_hl_s0: + case M2_mpy_rnd_hl_s1: + case M2_mpy_sat_hl_s0: + case M2_mpy_sat_hl_s1: + case M2_mpy_sat_rnd_hl_s0: + case M2_mpy_sat_rnd_hl_s1: + case M2_mpyd_acc_hl_s0: + case M2_mpyd_acc_hl_s1: + case M2_mpyd_hl_s0: + case M2_mpyd_hl_s1: + case M2_mpyd_nac_hl_s0: + case M2_mpyd_nac_hl_s1: + case M2_mpyd_rnd_hl_s0: + case M2_mpyd_rnd_hl_s1: + case M2_mpyu_acc_hl_s0: + case M2_mpyu_acc_hl_s1: + case M2_mpyu_hl_s0: + case M2_mpyu_hl_s1: + case M2_mpyu_nac_hl_s0: + case M2_mpyu_nac_hl_s1: + case M2_mpyud_acc_hl_s0: + case M2_mpyud_acc_hl_s1: + case M2_mpyud_hl_s0: + case M2_mpyud_hl_s1: + case M2_mpyud_nac_hl_s0: + case M2_mpyud_nac_hl_s1: + if (OpN == 1) { + Bits.set(Begin+16, Begin+32); + return true; + } + if (OpN == 2) { + Bits.set(Begin, Begin+16); + return true; + } + break; + + // Two register sources, used bits: R1[16-31], R2[16-31]. + case A2_addh_h16_hh: + case A2_addh_h16_sat_hh: + case A2_combine_hh: + case A2_subh_h16_hh: + case A2_subh_h16_sat_hh: + case M2_mpy_acc_hh_s0: + case M2_mpy_acc_hh_s1: + case M2_mpy_acc_sat_hh_s0: + case M2_mpy_acc_sat_hh_s1: + case M2_mpy_hh_s0: + case M2_mpy_hh_s1: + case M2_mpy_nac_hh_s0: + case M2_mpy_nac_hh_s1: + case M2_mpy_nac_sat_hh_s0: + case M2_mpy_nac_sat_hh_s1: + case M2_mpy_rnd_hh_s0: + case M2_mpy_rnd_hh_s1: + case M2_mpy_sat_hh_s0: + case M2_mpy_sat_hh_s1: + case M2_mpy_sat_rnd_hh_s0: + case M2_mpy_sat_rnd_hh_s1: + case M2_mpyd_acc_hh_s0: + case M2_mpyd_acc_hh_s1: + case M2_mpyd_hh_s0: + case M2_mpyd_hh_s1: + case M2_mpyd_nac_hh_s0: + case M2_mpyd_nac_hh_s1: + case M2_mpyd_rnd_hh_s0: + case M2_mpyd_rnd_hh_s1: + case M2_mpyu_acc_hh_s0: + case M2_mpyu_acc_hh_s1: + case M2_mpyu_hh_s0: + case M2_mpyu_hh_s1: + case M2_mpyu_nac_hh_s0: + case M2_mpyu_nac_hh_s1: + case M2_mpyud_acc_hh_s0: + case M2_mpyud_acc_hh_s1: + case M2_mpyud_hh_s0: + case M2_mpyud_hh_s1: + case M2_mpyud_nac_hh_s0: + case M2_mpyud_nac_hh_s1: + if (OpN == 1 || OpN == 2) { + Bits.set(Begin+16, Begin+32); + return true; + } + break; + } + + return false; +} + + +// Calculate the register class that matches Reg:Sub. For example, if +// vreg1 is a double register, then vreg1:subreg_hireg would match "int" +// register class. +const TargetRegisterClass *HexagonBitSimplify::getFinalVRegClass( + const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI) { + if (!TargetRegisterInfo::isVirtualRegister(RR.Reg)) + return nullptr; + auto *RC = MRI.getRegClass(RR.Reg); + if (RR.Sub == 0) + return RC; + + auto VerifySR = [] (unsigned Sub) -> void { + assert(Sub == Hexagon::subreg_hireg || Sub == Hexagon::subreg_loreg); + }; + + switch (RC->getID()) { + case Hexagon::DoubleRegsRegClassID: + VerifySR(RR.Sub); + return &Hexagon::IntRegsRegClass; + } + return nullptr; +} + + +// Check if RD could be replaced with RS at any possible use of RD. +// For example a predicate register cannot be replaced with a integer +// register, but a 64-bit register with a subregister can be replaced +// with a 32-bit register. +bool HexagonBitSimplify::isTransparentCopy(const BitTracker::RegisterRef &RD, + const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI) { + if (!TargetRegisterInfo::isVirtualRegister(RD.Reg) || + !TargetRegisterInfo::isVirtualRegister(RS.Reg)) + return false; + // Return false if one (or both) classes are nullptr. + auto *DRC = getFinalVRegClass(RD, MRI); + if (!DRC) + return false; + + return DRC == getFinalVRegClass(RS, MRI); +} + + +// +// Dead code elimination +// +namespace { + class DeadCodeElimination { + public: + DeadCodeElimination(MachineFunction &mf, MachineDominatorTree &mdt) + : MF(mf), HII(*MF.getSubtarget<HexagonSubtarget>().getInstrInfo()), + MDT(mdt), MRI(mf.getRegInfo()) {} + + bool run() { + return runOnNode(MDT.getRootNode()); + } + + private: + bool isDead(unsigned R) const; + bool runOnNode(MachineDomTreeNode *N); + + MachineFunction &MF; + const HexagonInstrInfo &HII; + MachineDominatorTree &MDT; + MachineRegisterInfo &MRI; + }; +} + + +bool DeadCodeElimination::isDead(unsigned R) const { + for (auto I = MRI.use_begin(R), E = MRI.use_end(); I != E; ++I) { + MachineInstr *UseI = I->getParent(); + if (UseI->isDebugValue()) + continue; + if (UseI->isPHI()) { + assert(!UseI->getOperand(0).getSubReg()); + unsigned DR = UseI->getOperand(0).getReg(); + if (DR == R) + continue; + } + return false; + } + return true; +} + + +bool DeadCodeElimination::runOnNode(MachineDomTreeNode *N) { + bool Changed = false; + typedef GraphTraits<MachineDomTreeNode*> GTN; + for (auto I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I) + Changed |= runOnNode(*I); + + MachineBasicBlock *B = N->getBlock(); + std::vector<MachineInstr*> Instrs; + for (auto I = B->rbegin(), E = B->rend(); I != E; ++I) + Instrs.push_back(&*I); + + for (auto MI : Instrs) { + unsigned Opc = MI->getOpcode(); + // Do not touch lifetime markers. This is why the target-independent DCE + // cannot be used. + if (Opc == TargetOpcode::LIFETIME_START || + Opc == TargetOpcode::LIFETIME_END) + continue; + bool Store = false; + if (MI->isInlineAsm()) + continue; + // Delete PHIs if possible. + if (!MI->isPHI() && !MI->isSafeToMove(nullptr, Store)) + continue; + + bool AllDead = true; + SmallVector<unsigned,2> Regs; + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isDef()) + continue; + unsigned R = Op.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(R) || !isDead(R)) { + AllDead = false; + break; + } + Regs.push_back(R); + } + if (!AllDead) + continue; + + B->erase(MI); + for (unsigned i = 0, n = Regs.size(); i != n; ++i) + MRI.markUsesInDebugValueAsUndef(Regs[i]); + Changed = true; + } + + return Changed; +} + + +// +// Eliminate redundant instructions +// +// This transformation will identify instructions where the output register +// is the same as one of its input registers. This only works on instructions +// that define a single register (unlike post-increment loads, for example). +// The equality check is actually more detailed: the code calculates which +// bits of the output are used, and only compares these bits with the input +// registers. +// If the output matches an input, the instruction is replaced with COPY. +// The copies will be removed by another transformation. +namespace { + class RedundantInstrElimination : public Transformation { + public: + RedundantInstrElimination(BitTracker &bt, const HexagonInstrInfo &hii, + MachineRegisterInfo &mri) + : Transformation(true), HII(hii), MRI(mri), BT(bt) {} + bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; + private: + bool isLossyShiftLeft(const MachineInstr &MI, unsigned OpN, + unsigned &LostB, unsigned &LostE); + bool isLossyShiftRight(const MachineInstr &MI, unsigned OpN, + unsigned &LostB, unsigned &LostE); + bool computeUsedBits(unsigned Reg, BitVector &Bits); + bool computeUsedBits(const MachineInstr &MI, unsigned OpN, BitVector &Bits, + uint16_t Begin); + bool usedBitsEqual(BitTracker::RegisterRef RD, BitTracker::RegisterRef RS); + + const HexagonInstrInfo &HII; + MachineRegisterInfo &MRI; + BitTracker &BT; + }; +} + + +// Check if the instruction is a lossy shift left, where the input being +// shifted is the operand OpN of MI. If true, [LostB, LostE) is the range +// of bit indices that are lost. +bool RedundantInstrElimination::isLossyShiftLeft(const MachineInstr &MI, + unsigned OpN, unsigned &LostB, unsigned &LostE) { + using namespace Hexagon; + unsigned Opc = MI.getOpcode(); + unsigned ImN, RegN, Width; + switch (Opc) { + case S2_asl_i_p: + ImN = 2; + RegN = 1; + Width = 64; + break; + case S2_asl_i_p_acc: + case S2_asl_i_p_and: + case S2_asl_i_p_nac: + case S2_asl_i_p_or: + case S2_asl_i_p_xacc: + ImN = 3; + RegN = 2; + Width = 64; + break; + case S2_asl_i_r: + ImN = 2; + RegN = 1; + Width = 32; + break; + case S2_addasl_rrri: + case S4_andi_asl_ri: + case S4_ori_asl_ri: + case S4_addi_asl_ri: + case S4_subi_asl_ri: + case S2_asl_i_r_acc: + case S2_asl_i_r_and: + case S2_asl_i_r_nac: + case S2_asl_i_r_or: + case S2_asl_i_r_sat: + case S2_asl_i_r_xacc: + ImN = 3; + RegN = 2; + Width = 32; + break; + default: + return false; + } + + if (RegN != OpN) + return false; + + assert(MI.getOperand(ImN).isImm()); + unsigned S = MI.getOperand(ImN).getImm(); + if (S == 0) + return false; + LostB = Width-S; + LostE = Width; + return true; +} + + +// Check if the instruction is a lossy shift right, where the input being +// shifted is the operand OpN of MI. If true, [LostB, LostE) is the range +// of bit indices that are lost. +bool RedundantInstrElimination::isLossyShiftRight(const MachineInstr &MI, + unsigned OpN, unsigned &LostB, unsigned &LostE) { + using namespace Hexagon; + unsigned Opc = MI.getOpcode(); + unsigned ImN, RegN; + switch (Opc) { + case S2_asr_i_p: + case S2_lsr_i_p: + ImN = 2; + RegN = 1; + break; + case S2_asr_i_p_acc: + case S2_asr_i_p_and: + case S2_asr_i_p_nac: + case S2_asr_i_p_or: + case S2_lsr_i_p_acc: + case S2_lsr_i_p_and: + case S2_lsr_i_p_nac: + case S2_lsr_i_p_or: + case S2_lsr_i_p_xacc: + ImN = 3; + RegN = 2; + break; + case S2_asr_i_r: + case S2_lsr_i_r: + ImN = 2; + RegN = 1; + break; + case S4_andi_lsr_ri: + case S4_ori_lsr_ri: + case S4_addi_lsr_ri: + case S4_subi_lsr_ri: + case S2_asr_i_r_acc: + case S2_asr_i_r_and: + case S2_asr_i_r_nac: + case S2_asr_i_r_or: + case S2_lsr_i_r_acc: + case S2_lsr_i_r_and: + case S2_lsr_i_r_nac: + case S2_lsr_i_r_or: + case S2_lsr_i_r_xacc: + ImN = 3; + RegN = 2; + break; + + default: + return false; + } + + if (RegN != OpN) + return false; + + assert(MI.getOperand(ImN).isImm()); + unsigned S = MI.getOperand(ImN).getImm(); + LostB = 0; + LostE = S; + return true; +} + + +// Calculate the bit vector that corresponds to the used bits of register Reg. +// The vector Bits has the same size, as the size of Reg in bits. If the cal- +// culation fails (i.e. the used bits are unknown), it returns false. Other- +// wise, it returns true and sets the corresponding bits in Bits. +bool RedundantInstrElimination::computeUsedBits(unsigned Reg, BitVector &Bits) { + BitVector Used(Bits.size()); + RegisterSet Visited; + std::vector<unsigned> Pending; + Pending.push_back(Reg); + + for (unsigned i = 0; i < Pending.size(); ++i) { + unsigned R = Pending[i]; + if (Visited.has(R)) + continue; + Visited.insert(R); + for (auto I = MRI.use_begin(R), E = MRI.use_end(); I != E; ++I) { + BitTracker::RegisterRef UR = *I; + unsigned B, W; + if (!HBS::getSubregMask(UR, B, W, MRI)) + return false; + MachineInstr &UseI = *I->getParent(); + if (UseI.isPHI() || UseI.isCopy()) { + unsigned DefR = UseI.getOperand(0).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(DefR)) + return false; + Pending.push_back(DefR); + } else { + if (!computeUsedBits(UseI, I.getOperandNo(), Used, B)) + return false; + } + } + } + Bits |= Used; + return true; +} + + +// Calculate the bits used by instruction MI in a register in operand OpN. +// Return true/false if the calculation succeeds/fails. If is succeeds, set +// used bits in Bits. This function does not reset any bits in Bits, so +// subsequent calls over different instructions will result in the union +// of the used bits in all these instructions. +// The register in question may be used with a sub-register, whereas Bits +// holds the bits for the entire register. To keep track of that, the +// argument Begin indicates where in Bits is the lowest-significant bit +// of the register used in operand OpN. For example, in instruction: +// vreg1 = S2_lsr_i_r vreg2:subreg_hireg, 10 +// the operand 1 is a 32-bit register, which happens to be a subregister +// of the 64-bit register vreg2, and that subregister starts at position 32. +// In this case Begin=32, since Bits[32] would be the lowest-significant bit +// of vreg2:subreg_hireg. +bool RedundantInstrElimination::computeUsedBits(const MachineInstr &MI, + unsigned OpN, BitVector &Bits, uint16_t Begin) { + unsigned Opc = MI.getOpcode(); + BitVector T(Bits.size()); + bool GotBits = HBS::getUsedBits(Opc, OpN, T, Begin, HII); + // Even if we don't have bits yet, we could still provide some information + // if the instruction is a lossy shift: the lost bits will be marked as + // not used. + unsigned LB, LE; + if (isLossyShiftLeft(MI, OpN, LB, LE) || isLossyShiftRight(MI, OpN, LB, LE)) { + assert(MI.getOperand(OpN).isReg()); + BitTracker::RegisterRef RR = MI.getOperand(OpN); + const TargetRegisterClass *RC = HBS::getFinalVRegClass(RR, MRI); + uint16_t Width = RC->getSize()*8; + + if (!GotBits) + T.set(Begin, Begin+Width); + assert(LB <= LE && LB < Width && LE <= Width); + T.reset(Begin+LB, Begin+LE); + GotBits = true; + } + if (GotBits) + Bits |= T; + return GotBits; +} + + +// Calculates the used bits in RD ("defined register"), and checks if these +// bits in RS ("used register") and RD are identical. +bool RedundantInstrElimination::usedBitsEqual(BitTracker::RegisterRef RD, + BitTracker::RegisterRef RS) { + const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg); + const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg); + + unsigned DB, DW; + if (!HBS::getSubregMask(RD, DB, DW, MRI)) + return false; + unsigned SB, SW; + if (!HBS::getSubregMask(RS, SB, SW, MRI)) + return false; + if (SW != DW) + return false; + + BitVector Used(DC.width()); + if (!computeUsedBits(RD.Reg, Used)) + return false; + + for (unsigned i = 0; i != DW; ++i) + if (Used[i+DB] && DC[DB+i] != SC[SB+i]) + return false; + return true; +} + + +bool RedundantInstrElimination::processBlock(MachineBasicBlock &B, + const RegisterSet&) { + bool Changed = false; + + for (auto I = B.begin(), E = B.end(), NextI = I; I != E; ++I) { + NextI = std::next(I); + MachineInstr *MI = &*I; + + if (MI->getOpcode() == TargetOpcode::COPY) + continue; + if (MI->hasUnmodeledSideEffects() || MI->isInlineAsm()) + continue; + unsigned NumD = MI->getDesc().getNumDefs(); + if (NumD != 1) + continue; + + BitTracker::RegisterRef RD = MI->getOperand(0); + if (!BT.has(RD.Reg)) + continue; + const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg); + + // Find a source operand that is equal to the result. + for (auto &Op : MI->uses()) { + if (!Op.isReg()) + continue; + BitTracker::RegisterRef RS = Op; + if (!BT.has(RS.Reg)) + continue; + if (!HBS::isTransparentCopy(RD, RS, MRI)) + continue; + + unsigned BN, BW; + if (!HBS::getSubregMask(RS, BN, BW, MRI)) + continue; + + const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg); + if (!usedBitsEqual(RD, RS) && !HBS::isEqual(DC, 0, SC, BN, BW)) + continue; + + // If found, replace the instruction with a COPY. + DebugLoc DL = MI->getDebugLoc(); + const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI); + unsigned NewR = MRI.createVirtualRegister(FRC); + BuildMI(B, I, DL, HII.get(TargetOpcode::COPY), NewR) + .addReg(RS.Reg, 0, RS.Sub); + HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI); + BT.put(BitTracker::RegisterRef(NewR), SC); + Changed = true; + break; + } + } + + return Changed; +} + + +// +// Const generation +// +// Recognize instructions that produce constant values known at compile-time. +// Replace them with register definitions that load these constants directly. +namespace { + class ConstGeneration : public Transformation { + public: + ConstGeneration(BitTracker &bt, const HexagonInstrInfo &hii, + MachineRegisterInfo &mri) + : Transformation(true), HII(hii), MRI(mri), BT(bt) {} + bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; + private: + bool isTfrConst(const MachineInstr *MI) const; + bool isConst(unsigned R, int64_t &V) const; + unsigned genTfrConst(const TargetRegisterClass *RC, int64_t C, + MachineBasicBlock &B, MachineBasicBlock::iterator At, DebugLoc &DL); + + const HexagonInstrInfo &HII; + MachineRegisterInfo &MRI; + BitTracker &BT; + }; +} + +bool ConstGeneration::isConst(unsigned R, int64_t &C) const { + if (!BT.has(R)) + return false; + const BitTracker::RegisterCell &RC = BT.lookup(R); + int64_t T = 0; + for (unsigned i = RC.width(); i > 0; --i) { + const BitTracker::BitValue &V = RC[i-1]; + T <<= 1; + if (V.is(1)) + T |= 1; + else if (!V.is(0)) + return false; + } + C = T; + return true; +} + + +bool ConstGeneration::isTfrConst(const MachineInstr *MI) const { + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case Hexagon::A2_combineii: + case Hexagon::A4_combineii: + case Hexagon::A2_tfrsi: + case Hexagon::A2_tfrpi: + case Hexagon::TFR_PdTrue: + case Hexagon::TFR_PdFalse: + case Hexagon::CONST32_Int_Real: + case Hexagon::CONST64_Int_Real: + return true; + } + return false; +} + + +// Generate a transfer-immediate instruction that is appropriate for the +// register class and the actual value being transferred. +unsigned ConstGeneration::genTfrConst(const TargetRegisterClass *RC, int64_t C, + MachineBasicBlock &B, MachineBasicBlock::iterator At, DebugLoc &DL) { + unsigned Reg = MRI.createVirtualRegister(RC); + if (RC == &Hexagon::IntRegsRegClass) { + BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrsi), Reg) + .addImm(int32_t(C)); + return Reg; + } + + if (RC == &Hexagon::DoubleRegsRegClass) { + if (isInt<8>(C)) { + BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrpi), Reg) + .addImm(C); + return Reg; + } + + unsigned Lo = Lo_32(C), Hi = Hi_32(C); + if (isInt<8>(Lo) || isInt<8>(Hi)) { + unsigned Opc = isInt<8>(Lo) ? Hexagon::A2_combineii + : Hexagon::A4_combineii; + BuildMI(B, At, DL, HII.get(Opc), Reg) + .addImm(int32_t(Hi)) + .addImm(int32_t(Lo)); + return Reg; + } + + BuildMI(B, At, DL, HII.get(Hexagon::CONST64_Int_Real), Reg) + .addImm(C); + return Reg; + } + + if (RC == &Hexagon::PredRegsRegClass) { + unsigned Opc; + if (C == 0) + Opc = Hexagon::TFR_PdFalse; + else if ((C & 0xFF) == 0xFF) + Opc = Hexagon::TFR_PdTrue; + else + return 0; + BuildMI(B, At, DL, HII.get(Opc), Reg); + return Reg; + } + + return 0; +} + + +bool ConstGeneration::processBlock(MachineBasicBlock &B, const RegisterSet&) { + bool Changed = false; + RegisterSet Defs; + + for (auto I = B.begin(), E = B.end(); I != E; ++I) { + if (isTfrConst(I)) + continue; + Defs.clear(); + HBS::getInstrDefs(*I, Defs); + if (Defs.count() != 1) + continue; + unsigned DR = Defs.find_first(); + if (!TargetRegisterInfo::isVirtualRegister(DR)) + continue; + int64_t C; + if (isConst(DR, C)) { + DebugLoc DL = I->getDebugLoc(); + auto At = I->isPHI() ? B.getFirstNonPHI() : I; + unsigned ImmReg = genTfrConst(MRI.getRegClass(DR), C, B, At, DL); + if (ImmReg) { + HBS::replaceReg(DR, ImmReg, MRI); + BT.put(ImmReg, BT.lookup(DR)); + Changed = true; + } + } + } + return Changed; +} + + +// +// Copy generation +// +// Identify pairs of available registers which hold identical values. +// In such cases, only one of them needs to be calculated, the other one +// will be defined as a copy of the first. +// +// Copy propagation +// +// Eliminate register copies RD = RS, by replacing the uses of RD with +// with uses of RS. +namespace { + class CopyGeneration : public Transformation { + public: + CopyGeneration(BitTracker &bt, const HexagonInstrInfo &hii, + MachineRegisterInfo &mri) + : Transformation(true), HII(hii), MRI(mri), BT(bt) {} + bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; + private: + bool findMatch(const BitTracker::RegisterRef &Inp, + BitTracker::RegisterRef &Out, const RegisterSet &AVs); + + const HexagonInstrInfo &HII; + MachineRegisterInfo &MRI; + BitTracker &BT; + }; + + class CopyPropagation : public Transformation { + public: + CopyPropagation(const HexagonRegisterInfo &hri, MachineRegisterInfo &mri) + : Transformation(false), MRI(mri) {} + bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; + static bool isCopyReg(unsigned Opc); + private: + bool propagateRegCopy(MachineInstr &MI); + + MachineRegisterInfo &MRI; + }; + +} + + +/// Check if there is a register in AVs that is identical to Inp. If so, +/// set Out to the found register. The output may be a pair Reg:Sub. +bool CopyGeneration::findMatch(const BitTracker::RegisterRef &Inp, + BitTracker::RegisterRef &Out, const RegisterSet &AVs) { + if (!BT.has(Inp.Reg)) + return false; + const BitTracker::RegisterCell &InpRC = BT.lookup(Inp.Reg); + unsigned B, W; + if (!HBS::getSubregMask(Inp, B, W, MRI)) + return false; + + for (unsigned R = AVs.find_first(); R; R = AVs.find_next(R)) { + if (!BT.has(R) || !HBS::isTransparentCopy(R, Inp, MRI)) + continue; + const BitTracker::RegisterCell &RC = BT.lookup(R); + unsigned RW = RC.width(); + if (W == RW) { + if (MRI.getRegClass(Inp.Reg) != MRI.getRegClass(R)) + continue; + if (!HBS::isEqual(InpRC, B, RC, 0, W)) + continue; + Out.Reg = R; + Out.Sub = 0; + return true; + } + // Check if there is a super-register, whose part (with a subregister) + // is equal to the input. + // Only do double registers for now. + if (W*2 != RW) + continue; + if (MRI.getRegClass(R) != &Hexagon::DoubleRegsRegClass) + continue; + + if (HBS::isEqual(InpRC, B, RC, 0, W)) + Out.Sub = Hexagon::subreg_loreg; + else if (HBS::isEqual(InpRC, B, RC, W, W)) + Out.Sub = Hexagon::subreg_hireg; + else + continue; + Out.Reg = R; + return true; + } + return false; +} + + +bool CopyGeneration::processBlock(MachineBasicBlock &B, + const RegisterSet &AVs) { + RegisterSet AVB(AVs); + bool Changed = false; + RegisterSet Defs; + + for (auto I = B.begin(), E = B.end(), NextI = I; I != E; + ++I, AVB.insert(Defs)) { + NextI = std::next(I); + Defs.clear(); + HBS::getInstrDefs(*I, Defs); + + unsigned Opc = I->getOpcode(); + if (CopyPropagation::isCopyReg(Opc)) + continue; + + for (unsigned R = Defs.find_first(); R; R = Defs.find_next(R)) { + BitTracker::RegisterRef MR; + if (!findMatch(R, MR, AVB)) + continue; + DebugLoc DL = I->getDebugLoc(); + auto *FRC = HBS::getFinalVRegClass(MR, MRI); + unsigned NewR = MRI.createVirtualRegister(FRC); + auto At = I->isPHI() ? B.getFirstNonPHI() : I; + BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR) + .addReg(MR.Reg, 0, MR.Sub); + BT.put(BitTracker::RegisterRef(NewR), BT.get(MR)); + } + } + + return Changed; +} + + +bool CopyPropagation::isCopyReg(unsigned Opc) { + switch (Opc) { + case TargetOpcode::COPY: + case TargetOpcode::REG_SEQUENCE: + case Hexagon::A2_tfr: + case Hexagon::A2_tfrp: + case Hexagon::A2_combinew: + case Hexagon::A4_combineir: + case Hexagon::A4_combineri: + return true; + default: + break; + } + return false; +} + + +bool CopyPropagation::propagateRegCopy(MachineInstr &MI) { + bool Changed = false; + unsigned Opc = MI.getOpcode(); + BitTracker::RegisterRef RD = MI.getOperand(0); + assert(MI.getOperand(0).getSubReg() == 0); + + switch (Opc) { + case TargetOpcode::COPY: + case Hexagon::A2_tfr: + case Hexagon::A2_tfrp: { + BitTracker::RegisterRef RS = MI.getOperand(1); + if (!HBS::isTransparentCopy(RD, RS, MRI)) + break; + if (RS.Sub != 0) + Changed = HBS::replaceRegWithSub(RD.Reg, RS.Reg, RS.Sub, MRI); + else + Changed = HBS::replaceReg(RD.Reg, RS.Reg, MRI); + break; + } + case TargetOpcode::REG_SEQUENCE: { + BitTracker::RegisterRef SL, SH; + if (HBS::parseRegSequence(MI, SL, SH)) { + Changed = HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_loreg, + SL.Reg, SL.Sub, MRI); + Changed |= HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_hireg, + SH.Reg, SH.Sub, MRI); + } + break; + } + case Hexagon::A2_combinew: { + BitTracker::RegisterRef RH = MI.getOperand(1), RL = MI.getOperand(2); + Changed = HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_loreg, + RL.Reg, RL.Sub, MRI); + Changed |= HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_hireg, + RH.Reg, RH.Sub, MRI); + break; + } + case Hexagon::A4_combineir: + case Hexagon::A4_combineri: { + unsigned SrcX = (Opc == Hexagon::A4_combineir) ? 2 : 1; + unsigned Sub = (Opc == Hexagon::A4_combineir) ? Hexagon::subreg_loreg + : Hexagon::subreg_hireg; + BitTracker::RegisterRef RS = MI.getOperand(SrcX); + Changed = HBS::replaceSubWithSub(RD.Reg, Sub, RS.Reg, RS.Sub, MRI); + break; + } + } + return Changed; +} + + +bool CopyPropagation::processBlock(MachineBasicBlock &B, const RegisterSet&) { + std::vector<MachineInstr*> Instrs; + for (auto I = B.rbegin(), E = B.rend(); I != E; ++I) + Instrs.push_back(&*I); + + bool Changed = false; + for (auto I : Instrs) { + unsigned Opc = I->getOpcode(); + if (!CopyPropagation::isCopyReg(Opc)) + continue; + Changed |= propagateRegCopy(*I); + } + + return Changed; +} + + +// +// Bit simplification +// +// Recognize patterns that can be simplified and replace them with the +// simpler forms. +// This is by no means complete +namespace { + class BitSimplification : public Transformation { + public: + BitSimplification(BitTracker &bt, const HexagonInstrInfo &hii, + MachineRegisterInfo &mri) + : Transformation(true), HII(hii), MRI(mri), BT(bt) {} + bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; + private: + struct RegHalf : public BitTracker::RegisterRef { + bool Low; // Low/High halfword. + }; + + bool matchHalf(unsigned SelfR, const BitTracker::RegisterCell &RC, + unsigned B, RegHalf &RH); + + bool matchPackhl(unsigned SelfR, const BitTracker::RegisterCell &RC, + BitTracker::RegisterRef &Rs, BitTracker::RegisterRef &Rt); + unsigned getCombineOpcode(bool HLow, bool LLow); + + bool genStoreUpperHalf(MachineInstr *MI); + bool genStoreImmediate(MachineInstr *MI); + bool genPackhl(MachineInstr *MI, BitTracker::RegisterRef RD, + const BitTracker::RegisterCell &RC); + bool genExtractHalf(MachineInstr *MI, BitTracker::RegisterRef RD, + const BitTracker::RegisterCell &RC); + bool genCombineHalf(MachineInstr *MI, BitTracker::RegisterRef RD, + const BitTracker::RegisterCell &RC); + bool genExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD, + const BitTracker::RegisterCell &RC); + bool simplifyTstbit(MachineInstr *MI, BitTracker::RegisterRef RD, + const BitTracker::RegisterCell &RC); + + const HexagonInstrInfo &HII; + MachineRegisterInfo &MRI; + BitTracker &BT; + }; +} + + +// Check if the bits [B..B+16) in register cell RC form a valid halfword, +// i.e. [0..16), [16..32), etc. of some register. If so, return true and +// set the information about the found register in RH. +bool BitSimplification::matchHalf(unsigned SelfR, + const BitTracker::RegisterCell &RC, unsigned B, RegHalf &RH) { + // XXX This could be searching in the set of available registers, in case + // the match is not exact. + + // Match 16-bit chunks, where the RC[B..B+15] references exactly one + // register and all the bits B..B+15 match between RC and the register. + // This is meant to match "v1[0-15]", where v1 = { [0]:0 [1-15]:v1... }, + // and RC = { [0]:0 [1-15]:v1[1-15]... }. + bool Low = false; + unsigned I = B; + while (I < B+16 && RC[I].num()) + I++; + if (I == B+16) + return false; + + unsigned Reg = RC[I].RefI.Reg; + unsigned P = RC[I].RefI.Pos; // The RefI.Pos will be advanced by I-B. + if (P < I-B) + return false; + unsigned Pos = P - (I-B); + + if (Reg == 0 || Reg == SelfR) // Don't match "self". + return false; + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + return false; + if (!BT.has(Reg)) + return false; + + const BitTracker::RegisterCell &SC = BT.lookup(Reg); + if (Pos+16 > SC.width()) + return false; + + for (unsigned i = 0; i < 16; ++i) { + const BitTracker::BitValue &RV = RC[i+B]; + if (RV.Type == BitTracker::BitValue::Ref) { + if (RV.RefI.Reg != Reg) + return false; + if (RV.RefI.Pos != i+Pos) + return false; + continue; + } + if (RC[i+B] != SC[i+Pos]) + return false; + } + + unsigned Sub = 0; + switch (Pos) { + case 0: + Sub = Hexagon::subreg_loreg; + Low = true; + break; + case 16: + Sub = Hexagon::subreg_loreg; + Low = false; + break; + case 32: + Sub = Hexagon::subreg_hireg; + Low = true; + break; + case 48: + Sub = Hexagon::subreg_hireg; + Low = false; + break; + default: + return false; + } + + RH.Reg = Reg; + RH.Sub = Sub; + RH.Low = Low; + // If the subregister is not valid with the register, set it to 0. + if (!HBS::getFinalVRegClass(RH, MRI)) + RH.Sub = 0; + + return true; +} + + +// Check if RC matches the pattern of a S2_packhl. If so, return true and +// set the inputs Rs and Rt. +bool BitSimplification::matchPackhl(unsigned SelfR, + const BitTracker::RegisterCell &RC, BitTracker::RegisterRef &Rs, + BitTracker::RegisterRef &Rt) { + RegHalf L1, H1, L2, H2; + + if (!matchHalf(SelfR, RC, 0, L2) || !matchHalf(SelfR, RC, 16, L1)) + return false; + if (!matchHalf(SelfR, RC, 32, H2) || !matchHalf(SelfR, RC, 48, H1)) + return false; + + // Rs = H1.L1, Rt = H2.L2 + if (H1.Reg != L1.Reg || H1.Sub != L1.Sub || H1.Low || !L1.Low) + return false; + if (H2.Reg != L2.Reg || H2.Sub != L2.Sub || H2.Low || !L2.Low) + return false; + + Rs = H1; + Rt = H2; + return true; +} + + +unsigned BitSimplification::getCombineOpcode(bool HLow, bool LLow) { + return HLow ? LLow ? Hexagon::A2_combine_ll + : Hexagon::A2_combine_lh + : LLow ? Hexagon::A2_combine_hl + : Hexagon::A2_combine_hh; +} + + +// If MI stores the upper halfword of a register (potentially obtained via +// shifts or extracts), replace it with a storerf instruction. This could +// cause the "extraction" code to become dead. +bool BitSimplification::genStoreUpperHalf(MachineInstr *MI) { + unsigned Opc = MI->getOpcode(); + if (Opc != Hexagon::S2_storerh_io) + return false; + + MachineOperand &ValOp = MI->getOperand(2); + BitTracker::RegisterRef RS = ValOp; + if (!BT.has(RS.Reg)) + return false; + const BitTracker::RegisterCell &RC = BT.lookup(RS.Reg); + RegHalf H; + if (!matchHalf(0, RC, 0, H)) + return false; + if (H.Low) + return false; + MI->setDesc(HII.get(Hexagon::S2_storerf_io)); + ValOp.setReg(H.Reg); + ValOp.setSubReg(H.Sub); + return true; +} + + +// If MI stores a value known at compile-time, and the value is within a range +// that avoids using constant-extenders, replace it with a store-immediate. +bool BitSimplification::genStoreImmediate(MachineInstr *MI) { + unsigned Opc = MI->getOpcode(); + unsigned Align = 0; + switch (Opc) { + case Hexagon::S2_storeri_io: + Align++; + case Hexagon::S2_storerh_io: + Align++; + case Hexagon::S2_storerb_io: + break; + default: + return false; + } + + // Avoid stores to frame-indices (due to an unknown offset). + if (!MI->getOperand(0).isReg()) + return false; + MachineOperand &OffOp = MI->getOperand(1); + if (!OffOp.isImm()) + return false; + + int64_t Off = OffOp.getImm(); + // Offset is u6:a. Sadly, there is no isShiftedUInt(n,x). + if (!isUIntN(6+Align, Off) || (Off & ((1<<Align)-1))) + return false; + // Source register: + BitTracker::RegisterRef RS = MI->getOperand(2); + if (!BT.has(RS.Reg)) + return false; + const BitTracker::RegisterCell &RC = BT.lookup(RS.Reg); + uint64_t U; + if (!HBS::getConst(RC, 0, RC.width(), U)) + return false; + + // Only consider 8-bit values to avoid constant-extenders. + int V; + switch (Opc) { + case Hexagon::S2_storerb_io: + V = int8_t(U); + break; + case Hexagon::S2_storerh_io: + V = int16_t(U); + break; + case Hexagon::S2_storeri_io: + V = int32_t(U); + break; + } + if (!isInt<8>(V)) + return false; + + MI->RemoveOperand(2); + switch (Opc) { + case Hexagon::S2_storerb_io: + MI->setDesc(HII.get(Hexagon::S4_storeirb_io)); + break; + case Hexagon::S2_storerh_io: + MI->setDesc(HII.get(Hexagon::S4_storeirh_io)); + break; + case Hexagon::S2_storeri_io: + MI->setDesc(HII.get(Hexagon::S4_storeiri_io)); + break; + } + MI->addOperand(MachineOperand::CreateImm(V)); + return true; +} + + +// If MI is equivalent o S2_packhl, generate the S2_packhl. MI could be the +// last instruction in a sequence that results in something equivalent to +// the pack-halfwords. The intent is to cause the entire sequence to become +// dead. +bool BitSimplification::genPackhl(MachineInstr *MI, + BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { + unsigned Opc = MI->getOpcode(); + if (Opc == Hexagon::S2_packhl) + return false; + BitTracker::RegisterRef Rs, Rt; + if (!matchPackhl(RD.Reg, RC, Rs, Rt)) + return false; + + MachineBasicBlock &B = *MI->getParent(); + unsigned NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass); + DebugLoc DL = MI->getDebugLoc(); + BuildMI(B, MI, DL, HII.get(Hexagon::S2_packhl), NewR) + .addReg(Rs.Reg, 0, Rs.Sub) + .addReg(Rt.Reg, 0, Rt.Sub); + HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI); + BT.put(BitTracker::RegisterRef(NewR), RC); + return true; +} + + +// If MI produces halfword of the input in the low half of the output, +// replace it with zero-extend or extractu. +bool BitSimplification::genExtractHalf(MachineInstr *MI, + BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { + RegHalf L; + // Check for halfword in low 16 bits, zeros elsewhere. + if (!matchHalf(RD.Reg, RC, 0, L) || !HBS::isZero(RC, 16, 16)) + return false; + + unsigned Opc = MI->getOpcode(); + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + + // Prefer zxth, since zxth can go in any slot, while extractu only in + // slots 2 and 3. + unsigned NewR = 0; + if (L.Low && Opc != Hexagon::A2_zxth) { + NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + BuildMI(B, MI, DL, HII.get(Hexagon::A2_zxth), NewR) + .addReg(L.Reg, 0, L.Sub); + } else if (!L.Low && Opc != Hexagon::S2_extractu) { + NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + BuildMI(B, MI, DL, HII.get(Hexagon::S2_extractu), NewR) + .addReg(L.Reg, 0, L.Sub) + .addImm(16) + .addImm(16); + } + if (NewR == 0) + return false; + HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI); + BT.put(BitTracker::RegisterRef(NewR), RC); + return true; +} + + +// If MI is equivalent to a combine(.L/.H, .L/.H) replace with with the +// combine. +bool BitSimplification::genCombineHalf(MachineInstr *MI, + BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { + RegHalf L, H; + // Check for combine h/l + if (!matchHalf(RD.Reg, RC, 0, L) || !matchHalf(RD.Reg, RC, 16, H)) + return false; + // Do nothing if this is just a reg copy. + if (L.Reg == H.Reg && L.Sub == H.Sub && !H.Low && L.Low) + return false; + + unsigned Opc = MI->getOpcode(); + unsigned COpc = getCombineOpcode(H.Low, L.Low); + if (COpc == Opc) + return false; + + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + BuildMI(B, MI, DL, HII.get(COpc), NewR) + .addReg(H.Reg, 0, H.Sub) + .addReg(L.Reg, 0, L.Sub); + HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI); + BT.put(BitTracker::RegisterRef(NewR), RC); + return true; +} + + +// If MI resets high bits of a register and keeps the lower ones, replace it +// with zero-extend byte/half, and-immediate, or extractu, as appropriate. +bool BitSimplification::genExtractLow(MachineInstr *MI, + BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case Hexagon::A2_zxtb: + case Hexagon::A2_zxth: + case Hexagon::S2_extractu: + return false; + } + if (Opc == Hexagon::A2_andir && MI->getOperand(2).isImm()) { + int32_t Imm = MI->getOperand(2).getImm(); + if (isInt<10>(Imm)) + return false; + } + + if (MI->hasUnmodeledSideEffects() || MI->isInlineAsm()) + return false; + unsigned W = RC.width(); + while (W > 0 && RC[W-1].is(0)) + W--; + if (W == 0 || W == RC.width()) + return false; + unsigned NewOpc = (W == 8) ? Hexagon::A2_zxtb + : (W == 16) ? Hexagon::A2_zxth + : (W < 10) ? Hexagon::A2_andir + : Hexagon::S2_extractu; + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + + for (auto &Op : MI->uses()) { + if (!Op.isReg()) + continue; + BitTracker::RegisterRef RS = Op; + if (!BT.has(RS.Reg)) + continue; + const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg); + unsigned BN, BW; + if (!HBS::getSubregMask(RS, BN, BW, MRI)) + continue; + if (BW < W || !HBS::isEqual(RC, 0, SC, BN, W)) + continue; + + unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + auto MIB = BuildMI(B, MI, DL, HII.get(NewOpc), NewR) + .addReg(RS.Reg, 0, RS.Sub); + if (NewOpc == Hexagon::A2_andir) + MIB.addImm((1 << W) - 1); + else if (NewOpc == Hexagon::S2_extractu) + MIB.addImm(W).addImm(0); + HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI); + BT.put(BitTracker::RegisterRef(NewR), RC); + return true; + } + return false; +} + + +// Check for tstbit simplification opportunity, where the bit being checked +// can be tracked back to another register. For example: +// vreg2 = S2_lsr_i_r vreg1, 5 +// vreg3 = S2_tstbit_i vreg2, 0 +// => +// vreg3 = S2_tstbit_i vreg1, 5 +bool BitSimplification::simplifyTstbit(MachineInstr *MI, + BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { + unsigned Opc = MI->getOpcode(); + if (Opc != Hexagon::S2_tstbit_i) + return false; + + unsigned BN = MI->getOperand(2).getImm(); + BitTracker::RegisterRef RS = MI->getOperand(1); + unsigned F, W; + DebugLoc DL = MI->getDebugLoc(); + if (!BT.has(RS.Reg) || !HBS::getSubregMask(RS, F, W, MRI)) + return false; + MachineBasicBlock &B = *MI->getParent(); + + const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg); + const BitTracker::BitValue &V = SC[F+BN]; + if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg != RS.Reg) { + const TargetRegisterClass *TC = MRI.getRegClass(V.RefI.Reg); + // Need to map V.RefI.Reg to a 32-bit register, i.e. if it is + // a double register, need to use a subregister and adjust bit + // number. + unsigned P = UINT_MAX; + BitTracker::RegisterRef RR(V.RefI.Reg, 0); + if (TC == &Hexagon::DoubleRegsRegClass) { + P = V.RefI.Pos; + RR.Sub = Hexagon::subreg_loreg; + if (P >= 32) { + P -= 32; + RR.Sub = Hexagon::subreg_hireg; + } + } else if (TC == &Hexagon::IntRegsRegClass) { + P = V.RefI.Pos; + } + if (P != UINT_MAX) { + unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass); + BuildMI(B, MI, DL, HII.get(Hexagon::S2_tstbit_i), NewR) + .addReg(RR.Reg, 0, RR.Sub) + .addImm(P); + HBS::replaceReg(RD.Reg, NewR, MRI); + BT.put(NewR, RC); + return true; + } + } else if (V.is(0) || V.is(1)) { + unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass); + unsigned NewOpc = V.is(0) ? Hexagon::TFR_PdFalse : Hexagon::TFR_PdTrue; + BuildMI(B, MI, DL, HII.get(NewOpc), NewR); + HBS::replaceReg(RD.Reg, NewR, MRI); + return true; + } + + return false; +} + + +bool BitSimplification::processBlock(MachineBasicBlock &B, + const RegisterSet &AVs) { + bool Changed = false; + RegisterSet AVB = AVs; + RegisterSet Defs; + + for (auto I = B.begin(), E = B.end(); I != E; ++I, AVB.insert(Defs)) { + MachineInstr *MI = &*I; + Defs.clear(); + HBS::getInstrDefs(*MI, Defs); + + unsigned Opc = MI->getOpcode(); + if (Opc == TargetOpcode::COPY || Opc == TargetOpcode::REG_SEQUENCE) + continue; + + if (MI->mayStore()) { + bool T = genStoreUpperHalf(MI); + T = T || genStoreImmediate(MI); + Changed |= T; + continue; + } + + if (Defs.count() != 1) + continue; + const MachineOperand &Op0 = MI->getOperand(0); + if (!Op0.isReg() || !Op0.isDef()) + continue; + BitTracker::RegisterRef RD = Op0; + if (!BT.has(RD.Reg)) + continue; + const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI); + const BitTracker::RegisterCell &RC = BT.lookup(RD.Reg); + + if (FRC->getID() == Hexagon::DoubleRegsRegClassID) { + bool T = genPackhl(MI, RD, RC); + Changed |= T; + continue; + } + + if (FRC->getID() == Hexagon::IntRegsRegClassID) { + bool T = genExtractHalf(MI, RD, RC); + T = T || genCombineHalf(MI, RD, RC); + T = T || genExtractLow(MI, RD, RC); + Changed |= T; + continue; + } + + if (FRC->getID() == Hexagon::PredRegsRegClassID) { + bool T = simplifyTstbit(MI, RD, RC); + Changed |= T; + continue; + } + } + return Changed; +} + + +bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) { + auto &HST = MF.getSubtarget<HexagonSubtarget>(); + auto &HRI = *HST.getRegisterInfo(); + auto &HII = *HST.getInstrInfo(); + + MDT = &getAnalysis<MachineDominatorTree>(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + bool Changed; + + Changed = DeadCodeElimination(MF, *MDT).run(); + + const HexagonEvaluator HE(HRI, MRI, HII, MF); + BitTracker BT(HE, MF); + DEBUG(BT.trace(true)); + BT.run(); + + MachineBasicBlock &Entry = MF.front(); + + RegisterSet AIG; // Available registers for IG. + ConstGeneration ImmG(BT, HII, MRI); + Changed |= visitBlock(Entry, ImmG, AIG); + + RegisterSet ARE; // Available registers for RIE. + RedundantInstrElimination RIE(BT, HII, MRI); + Changed |= visitBlock(Entry, RIE, ARE); + + RegisterSet ACG; // Available registers for CG. + CopyGeneration CopyG(BT, HII, MRI); + Changed |= visitBlock(Entry, CopyG, ACG); + + RegisterSet ACP; // Available registers for CP. + CopyPropagation CopyP(HRI, MRI); + Changed |= visitBlock(Entry, CopyP, ACP); + + Changed = DeadCodeElimination(MF, *MDT).run() || Changed; + + BT.run(); + RegisterSet ABS; // Available registers for BS. + BitSimplification BitS(BT, HII, MRI); + Changed |= visitBlock(Entry, BitS, ABS); + + Changed = DeadCodeElimination(MF, *MDT).run() || Changed; + + if (Changed) { + for (auto &B : MF) + for (auto &I : B) + I.clearKillInfo(); + DeadCodeElimination(MF, *MDT).run(); + } + return Changed; +} + + +// Recognize loops where the code at the end of the loop matches the code +// before the entry of the loop, and the matching code is such that is can +// be simplified. This pass relies on the bit simplification above and only +// prepares code in a way that can be handled by the bit simplifcation. +// +// This is the motivating testcase (and explanation): +// +// { +// loop0(.LBB0_2, r1) // %for.body.preheader +// r5:4 = memd(r0++#8) +// } +// { +// r3 = lsr(r4, #16) +// r7:6 = combine(r5, r5) +// } +// { +// r3 = insert(r5, #16, #16) +// r7:6 = vlsrw(r7:6, #16) +// } +// .LBB0_2: +// { +// memh(r2+#4) = r5 +// memh(r2+#6) = r6 # R6 is really R5.H +// } +// { +// r2 = add(r2, #8) +// memh(r2+#0) = r4 +// memh(r2+#2) = r3 # R3 is really R4.H +// } +// { +// r5:4 = memd(r0++#8) +// } +// { # "Shuffling" code that sets up R3 and R6 +// r3 = lsr(r4, #16) # so that their halves can be stored in the +// r7:6 = combine(r5, r5) # next iteration. This could be folded into +// } # the stores if the code was at the beginning +// { # of the loop iteration. Since the same code +// r3 = insert(r5, #16, #16) # precedes the loop, it can actually be moved +// r7:6 = vlsrw(r7:6, #16) # there. +// }:endloop0 +// +// +// The outcome: +// +// { +// loop0(.LBB0_2, r1) +// r5:4 = memd(r0++#8) +// } +// .LBB0_2: +// { +// memh(r2+#4) = r5 +// memh(r2+#6) = r5.h +// } +// { +// r2 = add(r2, #8) +// memh(r2+#0) = r4 +// memh(r2+#2) = r4.h +// } +// { +// r5:4 = memd(r0++#8) +// }:endloop0 + +namespace llvm { + FunctionPass *createHexagonLoopRescheduling(); + void initializeHexagonLoopReschedulingPass(PassRegistry&); +} + +namespace { + class HexagonLoopRescheduling : public MachineFunctionPass { + public: + static char ID; + HexagonLoopRescheduling() : MachineFunctionPass(ID), + HII(0), HRI(0), MRI(0), BTP(0) { + initializeHexagonLoopReschedulingPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + private: + const HexagonInstrInfo *HII; + const HexagonRegisterInfo *HRI; + MachineRegisterInfo *MRI; + BitTracker *BTP; + + struct LoopCand { + LoopCand(MachineBasicBlock *lb, MachineBasicBlock *pb, + MachineBasicBlock *eb) : LB(lb), PB(pb), EB(eb) {} + MachineBasicBlock *LB, *PB, *EB; + }; + typedef std::vector<MachineInstr*> InstrList; + struct InstrGroup { + BitTracker::RegisterRef Inp, Out; + InstrList Ins; + }; + struct PhiInfo { + PhiInfo(MachineInstr &P, MachineBasicBlock &B); + unsigned DefR; + BitTracker::RegisterRef LR, PR; + MachineBasicBlock *LB, *PB; + }; + + static unsigned getDefReg(const MachineInstr *MI); + bool isConst(unsigned Reg) const; + bool isBitShuffle(const MachineInstr *MI, unsigned DefR) const; + bool isStoreInput(const MachineInstr *MI, unsigned DefR) const; + bool isShuffleOf(unsigned OutR, unsigned InpR) const; + bool isSameShuffle(unsigned OutR1, unsigned InpR1, unsigned OutR2, + unsigned &InpR2) const; + void moveGroup(InstrGroup &G, MachineBasicBlock &LB, MachineBasicBlock &PB, + MachineBasicBlock::iterator At, unsigned OldPhiR, unsigned NewPredR); + bool processLoop(LoopCand &C); + }; +} + +char HexagonLoopRescheduling::ID = 0; + +INITIALIZE_PASS(HexagonLoopRescheduling, "hexagon-loop-resched", + "Hexagon Loop Rescheduling", false, false) + + +HexagonLoopRescheduling::PhiInfo::PhiInfo(MachineInstr &P, + MachineBasicBlock &B) { + DefR = HexagonLoopRescheduling::getDefReg(&P); + LB = &B; + PB = nullptr; + for (unsigned i = 1, n = P.getNumOperands(); i < n; i += 2) { + const MachineOperand &OpB = P.getOperand(i+1); + if (OpB.getMBB() == &B) { + LR = P.getOperand(i); + continue; + } + PB = OpB.getMBB(); + PR = P.getOperand(i); + } +} + + +unsigned HexagonLoopRescheduling::getDefReg(const MachineInstr *MI) { + RegisterSet Defs; + HBS::getInstrDefs(*MI, Defs); + if (Defs.count() != 1) + return 0; + return Defs.find_first(); +} + + +bool HexagonLoopRescheduling::isConst(unsigned Reg) const { + if (!BTP->has(Reg)) + return false; + const BitTracker::RegisterCell &RC = BTP->lookup(Reg); + for (unsigned i = 0, w = RC.width(); i < w; ++i) { + const BitTracker::BitValue &V = RC[i]; + if (!V.is(0) && !V.is(1)) + return false; + } + return true; +} + + +bool HexagonLoopRescheduling::isBitShuffle(const MachineInstr *MI, + unsigned DefR) const { + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case TargetOpcode::COPY: + case Hexagon::S2_lsr_i_r: + case Hexagon::S2_asr_i_r: + case Hexagon::S2_asl_i_r: + case Hexagon::S2_lsr_i_p: + case Hexagon::S2_asr_i_p: + case Hexagon::S2_asl_i_p: + case Hexagon::S2_insert: + case Hexagon::A2_or: + case Hexagon::A2_orp: + case Hexagon::A2_and: + case Hexagon::A2_andp: + case Hexagon::A2_combinew: + case Hexagon::A4_combineri: + case Hexagon::A4_combineir: + case Hexagon::A2_combineii: + case Hexagon::A4_combineii: + case Hexagon::A2_combine_ll: + case Hexagon::A2_combine_lh: + case Hexagon::A2_combine_hl: + case Hexagon::A2_combine_hh: + return true; + } + return false; +} + + +bool HexagonLoopRescheduling::isStoreInput(const MachineInstr *MI, + unsigned InpR) const { + for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { + const MachineOperand &Op = MI->getOperand(i); + if (!Op.isReg()) + continue; + if (Op.getReg() == InpR) + return i == n-1; + } + return false; +} + + +bool HexagonLoopRescheduling::isShuffleOf(unsigned OutR, unsigned InpR) const { + if (!BTP->has(OutR) || !BTP->has(InpR)) + return false; + const BitTracker::RegisterCell &OutC = BTP->lookup(OutR); + for (unsigned i = 0, w = OutC.width(); i < w; ++i) { + const BitTracker::BitValue &V = OutC[i]; + if (V.Type != BitTracker::BitValue::Ref) + continue; + if (V.RefI.Reg != InpR) + return false; + } + return true; +} + + +bool HexagonLoopRescheduling::isSameShuffle(unsigned OutR1, unsigned InpR1, + unsigned OutR2, unsigned &InpR2) const { + if (!BTP->has(OutR1) || !BTP->has(InpR1) || !BTP->has(OutR2)) + return false; + const BitTracker::RegisterCell &OutC1 = BTP->lookup(OutR1); + const BitTracker::RegisterCell &OutC2 = BTP->lookup(OutR2); + unsigned W = OutC1.width(); + unsigned MatchR = 0; + if (W != OutC2.width()) + return false; + for (unsigned i = 0; i < W; ++i) { + const BitTracker::BitValue &V1 = OutC1[i], &V2 = OutC2[i]; + if (V1.Type != V2.Type || V1.Type == BitTracker::BitValue::One) + return false; + if (V1.Type != BitTracker::BitValue::Ref) + continue; + if (V1.RefI.Pos != V2.RefI.Pos) + return false; + if (V1.RefI.Reg != InpR1) + return false; + if (V2.RefI.Reg == 0 || V2.RefI.Reg == OutR2) + return false; + if (!MatchR) + MatchR = V2.RefI.Reg; + else if (V2.RefI.Reg != MatchR) + return false; + } + InpR2 = MatchR; + return true; +} + + +void HexagonLoopRescheduling::moveGroup(InstrGroup &G, MachineBasicBlock &LB, + MachineBasicBlock &PB, MachineBasicBlock::iterator At, unsigned OldPhiR, + unsigned NewPredR) { + DenseMap<unsigned,unsigned> RegMap; + + const TargetRegisterClass *PhiRC = MRI->getRegClass(NewPredR); + unsigned PhiR = MRI->createVirtualRegister(PhiRC); + BuildMI(LB, At, At->getDebugLoc(), HII->get(TargetOpcode::PHI), PhiR) + .addReg(NewPredR) + .addMBB(&PB) + .addReg(G.Inp.Reg) + .addMBB(&LB); + RegMap.insert(std::make_pair(G.Inp.Reg, PhiR)); + + for (unsigned i = G.Ins.size(); i > 0; --i) { + const MachineInstr *SI = G.Ins[i-1]; + unsigned DR = getDefReg(SI); + const TargetRegisterClass *RC = MRI->getRegClass(DR); + unsigned NewDR = MRI->createVirtualRegister(RC); + DebugLoc DL = SI->getDebugLoc(); + + auto MIB = BuildMI(LB, At, DL, HII->get(SI->getOpcode()), NewDR); + for (unsigned j = 0, m = SI->getNumOperands(); j < m; ++j) { + const MachineOperand &Op = SI->getOperand(j); + if (!Op.isReg()) { + MIB.addOperand(Op); + continue; + } + if (!Op.isUse()) + continue; + unsigned UseR = RegMap[Op.getReg()]; + MIB.addReg(UseR, 0, Op.getSubReg()); + } + RegMap.insert(std::make_pair(DR, NewDR)); + } + + HBS::replaceReg(OldPhiR, RegMap[G.Out.Reg], *MRI); +} + + +bool HexagonLoopRescheduling::processLoop(LoopCand &C) { + DEBUG(dbgs() << "Processing loop in BB#" << C.LB->getNumber() << "\n"); + std::vector<PhiInfo> Phis; + for (auto &I : *C.LB) { + if (!I.isPHI()) + break; + unsigned PR = getDefReg(&I); + if (isConst(PR)) + continue; + bool BadUse = false, GoodUse = false; + for (auto UI = MRI->use_begin(PR), UE = MRI->use_end(); UI != UE; ++UI) { + MachineInstr *UseI = UI->getParent(); + if (UseI->getParent() != C.LB) { + BadUse = true; + break; + } + if (isBitShuffle(UseI, PR) || isStoreInput(UseI, PR)) + GoodUse = true; + } + if (BadUse || !GoodUse) + continue; + + Phis.push_back(PhiInfo(I, *C.LB)); + } + + DEBUG({ + dbgs() << "Phis: {"; + for (auto &I : Phis) { + dbgs() << ' ' << PrintReg(I.DefR, HRI) << "=phi(" + << PrintReg(I.PR.Reg, HRI, I.PR.Sub) << ":b" << I.PB->getNumber() + << ',' << PrintReg(I.LR.Reg, HRI, I.LR.Sub) << ":b" + << I.LB->getNumber() << ')'; + } + dbgs() << " }\n"; + }); + + if (Phis.empty()) + return false; + + bool Changed = false; + InstrList ShufIns; + + // Go backwards in the block: for each bit shuffling instruction, check + // if that instruction could potentially be moved to the front of the loop: + // the output of the loop cannot be used in a non-shuffling instruction + // in this loop. + for (auto I = C.LB->rbegin(), E = C.LB->rend(); I != E; ++I) { + if (I->isTerminator()) + continue; + if (I->isPHI()) + break; + + RegisterSet Defs; + HBS::getInstrDefs(*I, Defs); + if (Defs.count() != 1) + continue; + unsigned DefR = Defs.find_first(); + if (!TargetRegisterInfo::isVirtualRegister(DefR)) + continue; + if (!isBitShuffle(&*I, DefR)) + continue; + + bool BadUse = false; + for (auto UI = MRI->use_begin(DefR), UE = MRI->use_end(); UI != UE; ++UI) { + MachineInstr *UseI = UI->getParent(); + if (UseI->getParent() == C.LB) { + if (UseI->isPHI()) { + // If the use is in a phi node in this loop, then it should be + // the value corresponding to the back edge. + unsigned Idx = UI.getOperandNo(); + if (UseI->getOperand(Idx+1).getMBB() != C.LB) + BadUse = true; + } else { + auto F = std::find(ShufIns.begin(), ShufIns.end(), UseI); + if (F == ShufIns.end()) + BadUse = true; + } + } else { + // There is a use outside of the loop, but there is no epilog block + // suitable for a copy-out. + if (C.EB == nullptr) + BadUse = true; + } + if (BadUse) + break; + } + + if (BadUse) + continue; + ShufIns.push_back(&*I); + } + + // Partition the list of shuffling instructions into instruction groups, + // where each group has to be moved as a whole (i.e. a group is a chain of + // dependent instructions). A group produces a single live output register, + // which is meant to be the input of the loop phi node (although this is + // not checked here yet). It also uses a single register as its input, + // which is some value produced in the loop body. After moving the group + // to the beginning of the loop, that input register would need to be + // the loop-carried register (through a phi node) instead of the (currently + // loop-carried) output register. + typedef std::vector<InstrGroup> InstrGroupList; + InstrGroupList Groups; + + for (unsigned i = 0, n = ShufIns.size(); i < n; ++i) { + MachineInstr *SI = ShufIns[i]; + if (SI == nullptr) + continue; + + InstrGroup G; + G.Ins.push_back(SI); + G.Out.Reg = getDefReg(SI); + RegisterSet Inputs; + HBS::getInstrUses(*SI, Inputs); + + for (unsigned j = i+1; j < n; ++j) { + MachineInstr *MI = ShufIns[j]; + if (MI == nullptr) + continue; + RegisterSet Defs; + HBS::getInstrDefs(*MI, Defs); + // If this instruction does not define any pending inputs, skip it. + if (!Defs.intersects(Inputs)) + continue; + // Otherwise, add it to the current group and remove the inputs that + // are defined by MI. + G.Ins.push_back(MI); + Inputs.remove(Defs); + // Then add all registers used by MI. + HBS::getInstrUses(*MI, Inputs); + ShufIns[j] = nullptr; + } + + // Only add a group if it requires at most one register. + if (Inputs.count() > 1) + continue; + auto LoopInpEq = [G] (const PhiInfo &P) -> bool { + return G.Out.Reg == P.LR.Reg; + }; + if (std::find_if(Phis.begin(), Phis.end(), LoopInpEq) == Phis.end()) + continue; + + G.Inp.Reg = Inputs.find_first(); + Groups.push_back(G); + } + + DEBUG({ + for (unsigned i = 0, n = Groups.size(); i < n; ++i) { + InstrGroup &G = Groups[i]; + dbgs() << "Group[" << i << "] inp: " + << PrintReg(G.Inp.Reg, HRI, G.Inp.Sub) + << " out: " << PrintReg(G.Out.Reg, HRI, G.Out.Sub) << "\n"; + for (unsigned j = 0, m = G.Ins.size(); j < m; ++j) + dbgs() << " " << *G.Ins[j]; + } + }); + + for (unsigned i = 0, n = Groups.size(); i < n; ++i) { + InstrGroup &G = Groups[i]; + if (!isShuffleOf(G.Out.Reg, G.Inp.Reg)) + continue; + auto LoopInpEq = [G] (const PhiInfo &P) -> bool { + return G.Out.Reg == P.LR.Reg; + }; + auto F = std::find_if(Phis.begin(), Phis.end(), LoopInpEq); + if (F == Phis.end()) + continue; + unsigned PredR = 0; + if (!isSameShuffle(G.Out.Reg, G.Inp.Reg, F->PR.Reg, PredR)) { + const MachineInstr *DefPredR = MRI->getVRegDef(F->PR.Reg); + unsigned Opc = DefPredR->getOpcode(); + if (Opc != Hexagon::A2_tfrsi && Opc != Hexagon::A2_tfrpi) + continue; + if (!DefPredR->getOperand(1).isImm()) + continue; + if (DefPredR->getOperand(1).getImm() != 0) + continue; + const TargetRegisterClass *RC = MRI->getRegClass(G.Inp.Reg); + if (RC != MRI->getRegClass(F->PR.Reg)) { + PredR = MRI->createVirtualRegister(RC); + unsigned TfrI = (RC == &Hexagon::IntRegsRegClass) ? Hexagon::A2_tfrsi + : Hexagon::A2_tfrpi; + auto T = C.PB->getFirstTerminator(); + DebugLoc DL = (T != C.PB->end()) ? T->getDebugLoc() : DebugLoc(); + BuildMI(*C.PB, T, DL, HII->get(TfrI), PredR) + .addImm(0); + } else { + PredR = F->PR.Reg; + } + } + assert(MRI->getRegClass(PredR) == MRI->getRegClass(G.Inp.Reg)); + moveGroup(G, *F->LB, *F->PB, F->LB->getFirstNonPHI(), F->DefR, PredR); + Changed = true; + } + + return Changed; +} + + +bool HexagonLoopRescheduling::runOnMachineFunction(MachineFunction &MF) { + auto &HST = MF.getSubtarget<HexagonSubtarget>(); + HII = HST.getInstrInfo(); + HRI = HST.getRegisterInfo(); + MRI = &MF.getRegInfo(); + const HexagonEvaluator HE(*HRI, *MRI, *HII, MF); + BitTracker BT(HE, MF); + DEBUG(BT.trace(true)); + BT.run(); + BTP = &BT; + + std::vector<LoopCand> Cand; + + for (auto &B : MF) { + if (B.pred_size() != 2 || B.succ_size() != 2) + continue; + MachineBasicBlock *PB = nullptr; + bool IsLoop = false; + for (auto PI = B.pred_begin(), PE = B.pred_end(); PI != PE; ++PI) { + if (*PI != &B) + PB = *PI; + else + IsLoop = true; + } + if (!IsLoop) + continue; + + MachineBasicBlock *EB = nullptr; + for (auto SI = B.succ_begin(), SE = B.succ_end(); SI != SE; ++SI) { + if (*SI == &B) + continue; + // Set EP to the epilog block, if it has only 1 predecessor (i.e. the + // edge from B to EP is non-critical. + if ((*SI)->pred_size() == 1) + EB = *SI; + break; + } + + Cand.push_back(LoopCand(&B, PB, EB)); + } + + bool Changed = false; + for (auto &C : Cand) + Changed |= processLoop(C); + + return Changed; +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +FunctionPass *llvm::createHexagonLoopRescheduling() { + return new HexagonLoopRescheduling(); +} + +FunctionPass *llvm::createHexagonBitSimplify() { + return new HexagonBitSimplify(); +} + diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp index 021e58a1d08a..d5848dc45a3b 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp @@ -84,6 +84,8 @@ BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const { uint16_t RW = getRegBitWidth(RegisterRef(Reg, Sub)); switch (ID) { case DoubleRegsRegClassID: + case VecDblRegsRegClassID: + case VecDblRegs128BRegClassID: return (Sub == subreg_loreg) ? BT::BitMask(0, RW-1) : BT::BitMask(RW, 2*RW-1); default: @@ -95,30 +97,29 @@ BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const { llvm_unreachable("Unexpected register/subregister"); } - namespace { - struct RegisterRefs : public std::vector<BT::RegisterRef> { - typedef std::vector<BT::RegisterRef> Base; - RegisterRefs(const MachineInstr *MI); - const BT::RegisterRef &operator[](unsigned n) const { - // The main purpose of this operator is to assert with bad argument. - assert(n < size()); - return Base::operator[](n); - } - }; +class RegisterRefs { + std::vector<BT::RegisterRef> Vector; - RegisterRefs::RegisterRefs(const MachineInstr *MI) - : Base(MI->getNumOperands()) { - for (unsigned i = 0, n = size(); i < n; ++i) { +public: + RegisterRefs(const MachineInstr *MI) : Vector(MI->getNumOperands()) { + for (unsigned i = 0, n = Vector.size(); i < n; ++i) { const MachineOperand &MO = MI->getOperand(i); if (MO.isReg()) - at(i) = BT::RegisterRef(MO); + Vector[i] = BT::RegisterRef(MO); // For indices that don't correspond to registers, the entry will // remain constructed via the default constructor. } } -} + size_t size() const { return Vector.size(); } + const BT::RegisterRef &operator[](unsigned n) const { + // The main purpose of this operator is to assert with bad argument. + assert(n < Vector.size()); + return Vector[n]; + } +}; +} bool HexagonEvaluator::evaluate(const MachineInstr *MI, const CellMapType &Inputs, CellMapType &Outputs) const { @@ -189,7 +190,7 @@ bool HexagonEvaluator::evaluate(const MachineInstr *MI, return true; }; // Get the cell corresponding to the N-th operand. - auto cop = [this,Reg,MI,Inputs] (unsigned N, uint16_t W) + auto cop = [this,&Reg,&MI,&Inputs] (unsigned N, uint16_t W) -> BT::RegisterCell { const MachineOperand &Op = MI->getOperand(N); if (Op.isImm()) diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp index 3753b745657b..efafdd007289 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp @@ -102,7 +102,7 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) { // Loop over all of the basic blocks. for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end(); MBBb != MBBe; ++MBBb) { - MachineBasicBlock* MBB = MBBb; + MachineBasicBlock *MBB = &*MBBb; // Traverse the basic block. MachineBasicBlock::iterator MII = MBB->getFirstTerminator(); @@ -186,13 +186,11 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) { if (case1 || case2) { InvertAndChangeJumpTarget(MI, UncondTarget); - MBB->removeSuccessor(JumpAroundTarget); - MBB->addSuccessor(UncondTarget); + MBB->replaceSuccessor(JumpAroundTarget, UncondTarget); // Remove the unconditional branch in LayoutSucc. LayoutSucc->erase(LayoutSucc->begin()); - LayoutSucc->removeSuccessor(UncondTarget); - LayoutSucc->addSuccessor(JumpAroundTarget); + LayoutSucc->replaceSuccessor(UncondTarget, JumpAroundTarget); // This code performs the conversion for case 2, which moves // the block to the fall-thru case (BB3 in the code above). @@ -210,16 +208,15 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) { // The live-in to LayoutSucc is now all values live-in to // JumpAroundTarget. // - std::vector<unsigned> OrigLiveIn(LayoutSucc->livein_begin(), - LayoutSucc->livein_end()); - std::vector<unsigned> NewLiveIn(JumpAroundTarget->livein_begin(), - JumpAroundTarget->livein_end()); - for (unsigned i = 0; i < OrigLiveIn.size(); ++i) { - LayoutSucc->removeLiveIn(OrigLiveIn[i]); - } - for (unsigned i = 0; i < NewLiveIn.size(); ++i) { - LayoutSucc->addLiveIn(NewLiveIn[i]); - } + std::vector<MachineBasicBlock::RegisterMaskPair> OrigLiveIn( + LayoutSucc->livein_begin(), LayoutSucc->livein_end()); + std::vector<MachineBasicBlock::RegisterMaskPair> NewLiveIn( + JumpAroundTarget->livein_begin(), + JumpAroundTarget->livein_end()); + for (const auto &OrigLI : OrigLiveIn) + LayoutSucc->removeLiveIn(OrigLI.PhysReg); + for (const auto &NewLI : NewLiveIn) + LayoutSucc->addLiveIn(NewLI); } } } diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp index 9f5fac156527..931db6687bf8 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp @@ -59,30 +59,23 @@ namespace { // Numbering map for gep nodes. Used to keep track of ordering for // gep nodes. - struct NodeNumbering : public std::map<const GepNode*,unsigned> { - }; - - struct NodeOrdering : public NodeNumbering { + struct NodeOrdering { NodeOrdering() : LastNum(0) {} -#ifdef _MSC_VER - void special_insert_for_special_msvc(const GepNode *N) -#else - using NodeNumbering::insert; - void insert(const GepNode* N) -#endif - { - insert(std::make_pair(N, ++LastNum)); - } - bool operator() (const GepNode* N1, const GepNode *N2) const { - const_iterator F1 = find(N1), F2 = find(N2); - assert(F1 != end() && F2 != end()); + + void insert(const GepNode *N) { Map.insert(std::make_pair(N, ++LastNum)); } + void clear() { Map.clear(); } + + bool operator()(const GepNode *N1, const GepNode *N2) const { + auto F1 = Map.find(N1), F2 = Map.find(N2); + assert(F1 != Map.end() && F2 != Map.end()); return F1->second < F2->second; } + private: + std::map<const GepNode *, unsigned> Map; unsigned LastNum; }; - class HexagonCommonGEP : public FunctionPass { public: static char ID; @@ -360,11 +353,7 @@ void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI, Us.insert(&UI.getUse()); } Nodes.push_back(N); -#ifdef _MSC_VER - NodeOrder.special_insert_for_special_msvc(N); -#else NodeOrder.insert(N); -#endif // Skip the first index operand, since we only handle 0. This dereferences // the pointer operand. @@ -379,11 +368,7 @@ void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI, Nx->PTy = PtrTy; Nx->Idx = Op; Nodes.push_back(Nx); -#ifdef _MSC_VER - NodeOrder.special_insert_for_special_msvc(Nx); -#else NodeOrder.insert(Nx); -#endif PN = Nx; PtrTy = next_type(PtrTy, Op); @@ -404,7 +389,7 @@ void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI, void HexagonCommonGEP::collect() { // Establish depth-first traversal order of the dominator tree. ValueVect BO; - getBlockTraversalOrder(Fn->begin(), BO); + getBlockTraversalOrder(&Fn->front(), BO); // The creation of gep nodes requires DT-traversal. When processing a GEP // instruction that uses another GEP instruction as the base pointer, the @@ -737,7 +722,7 @@ namespace { Instruction *In = cast<Instruction>(V); if (In->getParent() != B) continue; - BasicBlock::iterator It = In; + BasicBlock::iterator It = In->getIterator(); if (std::distance(FirstUse, BEnd) < std::distance(It, BEnd)) FirstUse = It; } @@ -1135,7 +1120,7 @@ Value *HexagonCommonGEP::fabricateGEP(NodeVect &NA, BasicBlock::iterator At, ArrayRef<Value*> A(IdxList, IdxC); Type *InpTy = Input->getType(); Type *ElTy = cast<PointerType>(InpTy->getScalarType())->getElementType(); - NewInst = GetElementPtrInst::Create(ElTy, Input, A, "cgep", At); + NewInst = GetElementPtrInst::Create(ElTy, Input, A, "cgep", &*At); DEBUG(dbgs() << "new GEP: " << *NewInst << '\n'); Input = NewInst; } while (nax <= Num); @@ -1213,7 +1198,7 @@ void HexagonCommonGEP::materialize(NodeToValueMap &Loc) { Last = Child; } while (true); - BasicBlock::iterator InsertAt = LastB->getTerminator(); + BasicBlock::iterator InsertAt = LastB->getTerminator()->getIterator(); if (LastUsed || LastCN > 0) { ValueVect Urs; getAllUsersForNode(Root, Urs, NCM); diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp new file mode 100644 index 000000000000..ee0c318ffb5d --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp @@ -0,0 +1,1063 @@ +//===--- HexagonEarlyIfConv.cpp -------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements a Hexagon-specific if-conversion pass that runs on the +// SSA form. +// In SSA it is not straightforward to represent instructions that condi- +// tionally define registers, since a conditionally-defined register may +// only be used under the same condition on which the definition was based. +// To avoid complications of this nature, this patch will only generate +// predicated stores, and speculate other instructions from the "if-conver- +// ted" block. +// The code will recognize CFG patterns where a block with a conditional +// branch "splits" into a "true block" and a "false block". Either of these +// could be omitted (in case of a triangle, for example). +// If after conversion of the side block(s) the CFG allows it, the resul- +// ting blocks may be merged. If the "join" block contained PHI nodes, they +// will be replaced with MUX (or MUX-like) instructions to maintain the +// semantics of the PHI. +// +// Example: +// +// %vreg40<def> = L2_loadrub_io %vreg39<kill>, 1 +// %vreg41<def> = S2_tstbit_i %vreg40<kill>, 0 +// J2_jumpt %vreg41<kill>, <BB#5>, %PC<imp-def,dead> +// J2_jump <BB#4>, %PC<imp-def,dead> +// Successors according to CFG: BB#4(62) BB#5(62) +// +// BB#4: derived from LLVM BB %if.then +// Predecessors according to CFG: BB#3 +// %vreg11<def> = A2_addp %vreg6, %vreg10 +// S2_storerd_io %vreg32, 16, %vreg11 +// Successors according to CFG: BB#5 +// +// BB#5: derived from LLVM BB %if.end +// Predecessors according to CFG: BB#3 BB#4 +// %vreg12<def> = PHI %vreg6, <BB#3>, %vreg11, <BB#4> +// %vreg13<def> = A2_addp %vreg7, %vreg12 +// %vreg42<def> = C2_cmpeqi %vreg9, 10 +// J2_jumpf %vreg42<kill>, <BB#3>, %PC<imp-def,dead> +// J2_jump <BB#6>, %PC<imp-def,dead> +// Successors according to CFG: BB#6(4) BB#3(124) +// +// would become: +// +// %vreg40<def> = L2_loadrub_io %vreg39<kill>, 1 +// %vreg41<def> = S2_tstbit_i %vreg40<kill>, 0 +// spec-> %vreg11<def> = A2_addp %vreg6, %vreg10 +// pred-> S2_pstorerdf_io %vreg41, %vreg32, 16, %vreg11 +// %vreg46<def> = MUX64_rr %vreg41, %vreg6, %vreg11 +// %vreg13<def> = A2_addp %vreg7, %vreg46 +// %vreg42<def> = C2_cmpeqi %vreg9, 10 +// J2_jumpf %vreg42<kill>, <BB#3>, %PC<imp-def,dead> +// J2_jump <BB#6>, %PC<imp-def,dead> +// Successors according to CFG: BB#6 BB#3 + +#define DEBUG_TYPE "hexagon-eif" + +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "HexagonTargetMachine.h" + +#include <functional> +#include <set> +#include <vector> + +using namespace llvm; + +namespace llvm { + FunctionPass *createHexagonEarlyIfConversion(); + void initializeHexagonEarlyIfConversionPass(PassRegistry& Registry); +} + +namespace { + cl::opt<bool> EnableHexagonBP("enable-hexagon-br-prob", cl::Hidden, + cl::init(false), cl::desc("Enable branch probability info")); + cl::opt<unsigned> SizeLimit("eif-limit", cl::init(6), cl::Hidden, + cl::desc("Size limit in Hexagon early if-conversion")); + + struct PrintMB { + PrintMB(const MachineBasicBlock *B) : MB(B) {} + const MachineBasicBlock *MB; + }; + raw_ostream &operator<< (raw_ostream &OS, const PrintMB &P) { + if (!P.MB) + return OS << "<none>"; + return OS << '#' << P.MB->getNumber(); + } + + struct FlowPattern { + FlowPattern() : SplitB(0), TrueB(0), FalseB(0), JoinB(0), PredR(0) {} + FlowPattern(MachineBasicBlock *B, unsigned PR, MachineBasicBlock *TB, + MachineBasicBlock *FB, MachineBasicBlock *JB) + : SplitB(B), TrueB(TB), FalseB(FB), JoinB(JB), PredR(PR) {} + + MachineBasicBlock *SplitB; + MachineBasicBlock *TrueB, *FalseB, *JoinB; + unsigned PredR; + }; + struct PrintFP { + PrintFP(const FlowPattern &P, const TargetRegisterInfo &T) + : FP(P), TRI(T) {} + const FlowPattern &FP; + const TargetRegisterInfo &TRI; + friend raw_ostream &operator<< (raw_ostream &OS, const PrintFP &P); + }; + raw_ostream &operator<<(raw_ostream &OS, + const PrintFP &P) LLVM_ATTRIBUTE_UNUSED; + raw_ostream &operator<<(raw_ostream &OS, const PrintFP &P) { + OS << "{ SplitB:" << PrintMB(P.FP.SplitB) + << ", PredR:" << PrintReg(P.FP.PredR, &P.TRI) + << ", TrueB:" << PrintMB(P.FP.TrueB) << ", FalseB:" + << PrintMB(P.FP.FalseB) + << ", JoinB:" << PrintMB(P.FP.JoinB) << " }"; + return OS; + } + + class HexagonEarlyIfConversion : public MachineFunctionPass { + public: + static char ID; + HexagonEarlyIfConversion() : MachineFunctionPass(ID), + TII(0), TRI(0), MFN(0), MRI(0), MDT(0), MLI(0) { + initializeHexagonEarlyIfConversionPass(*PassRegistry::getPassRegistry()); + } + const char *getPassName() const override { + return "Hexagon early if conversion"; + } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<MachineBranchProbabilityInfo>(); + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + bool runOnMachineFunction(MachineFunction &MF) override; + + private: + typedef DenseSet<MachineBasicBlock*> BlockSetType; + + bool isPreheader(const MachineBasicBlock *B) const; + bool matchFlowPattern(MachineBasicBlock *B, MachineLoop *L, + FlowPattern &FP); + bool visitBlock(MachineBasicBlock *B, MachineLoop *L); + bool visitLoop(MachineLoop *L); + + bool hasEHLabel(const MachineBasicBlock *B) const; + bool hasUncondBranch(const MachineBasicBlock *B) const; + bool isValidCandidate(const MachineBasicBlock *B) const; + bool usesUndefVReg(const MachineInstr *MI) const; + bool isValid(const FlowPattern &FP) const; + unsigned countPredicateDefs(const MachineBasicBlock *B) const; + unsigned computePhiCost(MachineBasicBlock *B) const; + bool isProfitable(const FlowPattern &FP) const; + bool isPredicableStore(const MachineInstr *MI) const; + bool isSafeToSpeculate(const MachineInstr *MI) const; + + unsigned getCondStoreOpcode(unsigned Opc, bool IfTrue) const; + void predicateInstr(MachineBasicBlock *ToB, MachineBasicBlock::iterator At, + MachineInstr *MI, unsigned PredR, bool IfTrue); + void predicateBlockNB(MachineBasicBlock *ToB, + MachineBasicBlock::iterator At, MachineBasicBlock *FromB, + unsigned PredR, bool IfTrue); + + void updatePhiNodes(MachineBasicBlock *WhereB, const FlowPattern &FP); + void convert(const FlowPattern &FP); + + void removeBlock(MachineBasicBlock *B); + void eliminatePhis(MachineBasicBlock *B); + void replacePhiEdges(MachineBasicBlock *OldB, MachineBasicBlock *NewB); + void mergeBlocks(MachineBasicBlock *PredB, MachineBasicBlock *SuccB); + void simplifyFlowGraph(const FlowPattern &FP); + + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineFunction *MFN; + MachineRegisterInfo *MRI; + MachineDominatorTree *MDT; + MachineLoopInfo *MLI; + BlockSetType Deleted; + const MachineBranchProbabilityInfo *MBPI; + }; + + char HexagonEarlyIfConversion::ID = 0; +} + +INITIALIZE_PASS(HexagonEarlyIfConversion, "hexagon-eif", + "Hexagon early if conversion", false, false) + +bool HexagonEarlyIfConversion::isPreheader(const MachineBasicBlock *B) const { + if (B->succ_size() != 1) + return false; + MachineBasicBlock *SB = *B->succ_begin(); + MachineLoop *L = MLI->getLoopFor(SB); + return L && SB == L->getHeader(); +} + + +bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B, + MachineLoop *L, FlowPattern &FP) { + DEBUG(dbgs() << "Checking flow pattern at BB#" << B->getNumber() << "\n"); + + // Interested only in conditional branches, no .new, no new-value, etc. + // Check the terminators directly, it's easier than handling all responses + // from AnalyzeBranch. + MachineBasicBlock *TB = 0, *FB = 0; + MachineBasicBlock::const_iterator T1I = B->getFirstTerminator(); + if (T1I == B->end()) + return false; + unsigned Opc = T1I->getOpcode(); + if (Opc != Hexagon::J2_jumpt && Opc != Hexagon::J2_jumpf) + return false; + unsigned PredR = T1I->getOperand(0).getReg(); + + // Get the layout successor, or 0 if B does not have one. + MachineFunction::iterator NextBI = std::next(MachineFunction::iterator(B)); + MachineBasicBlock *NextB = (NextBI != MFN->end()) ? &*NextBI : 0; + + MachineBasicBlock *T1B = T1I->getOperand(1).getMBB(); + MachineBasicBlock::const_iterator T2I = std::next(T1I); + // The second terminator should be an unconditional branch. + assert(T2I == B->end() || T2I->getOpcode() == Hexagon::J2_jump); + MachineBasicBlock *T2B = (T2I == B->end()) ? NextB + : T2I->getOperand(0).getMBB(); + if (T1B == T2B) { + // XXX merge if T1B == NextB, or convert branch to unconditional. + // mark as diamond with both sides equal? + return false; + } + // Loop could be null for both. + if (MLI->getLoopFor(T1B) != L || MLI->getLoopFor(T2B) != L) + return false; + + // Record the true/false blocks in such a way that "true" means "if (PredR)", + // and "false" means "if (!PredR)". + if (Opc == Hexagon::J2_jumpt) + TB = T1B, FB = T2B; + else + TB = T2B, FB = T1B; + + if (!MDT->properlyDominates(B, TB) || !MDT->properlyDominates(B, FB)) + return false; + + // Detect triangle first. In case of a triangle, one of the blocks TB/FB + // can fall through into the other, in other words, it will be executed + // in both cases. We only want to predicate the block that is executed + // conditionally. + unsigned TNP = TB->pred_size(), FNP = FB->pred_size(); + unsigned TNS = TB->succ_size(), FNS = FB->succ_size(); + + // A block is predicable if it has one predecessor (it must be B), and + // it has a single successor. In fact, the block has to end either with + // an unconditional branch (which can be predicated), or with a fall- + // through. + bool TOk = (TNP == 1) && (TNS == 1); + bool FOk = (FNP == 1) && (FNS == 1); + + // If neither is predicable, there is nothing interesting. + if (!TOk && !FOk) + return false; + + MachineBasicBlock *TSB = (TNS > 0) ? *TB->succ_begin() : 0; + MachineBasicBlock *FSB = (FNS > 0) ? *FB->succ_begin() : 0; + MachineBasicBlock *JB = 0; + + if (TOk) { + if (FOk) { + if (TSB == FSB) + JB = TSB; + // Diamond: "if (P) then TB; else FB;". + } else { + // TOk && !FOk + if (TSB == FB) { + JB = FB; + FB = 0; + } + } + } else { + // !TOk && FOk (at least one must be true by now). + if (FSB == TB) { + JB = TB; + TB = 0; + } + } + // Don't try to predicate loop preheaders. + if ((TB && isPreheader(TB)) || (FB && isPreheader(FB))) { + DEBUG(dbgs() << "One of blocks " << PrintMB(TB) << ", " << PrintMB(FB) + << " is a loop preheader. Skipping.\n"); + return false; + } + + FP = FlowPattern(B, PredR, TB, FB, JB); + DEBUG(dbgs() << "Detected " << PrintFP(FP, *TRI) << "\n"); + return true; +} + + +// KLUDGE: HexagonInstrInfo::AnalyzeBranch won't work on a block that +// contains EH_LABEL. +bool HexagonEarlyIfConversion::hasEHLabel(const MachineBasicBlock *B) const { + for (auto &I : *B) + if (I.isEHLabel()) + return true; + return false; +} + + +// KLUDGE: HexagonInstrInfo::AnalyzeBranch may be unable to recognize +// that a block can never fall-through. +bool HexagonEarlyIfConversion::hasUncondBranch(const MachineBasicBlock *B) + const { + MachineBasicBlock::const_iterator I = B->getFirstTerminator(), E = B->end(); + while (I != E) { + if (I->isBarrier()) + return true; + ++I; + } + return false; +} + + +bool HexagonEarlyIfConversion::isValidCandidate(const MachineBasicBlock *B) + const { + if (!B) + return true; + if (B->isEHPad() || B->hasAddressTaken()) + return false; + if (B->succ_size() == 0) + return false; + + for (auto &MI : *B) { + if (MI.isDebugValue()) + continue; + if (MI.isConditionalBranch()) + return false; + unsigned Opc = MI.getOpcode(); + bool IsJMP = (Opc == Hexagon::J2_jump); + if (!isPredicableStore(&MI) && !IsJMP && !isSafeToSpeculate(&MI)) + return false; + // Look for predicate registers defined by this instruction. It's ok + // to speculate such an instruction, but the predicate register cannot + // be used outside of this block (or else it won't be possible to + // update the use of it after predication). PHI uses will be updated + // to use a result of a MUX, and a MUX cannot be created for predicate + // registers. + for (ConstMIOperands MO(&MI); MO.isValid(); ++MO) { + if (!MO->isReg() || !MO->isDef()) + continue; + unsigned R = MO->getReg(); + if (!TargetRegisterInfo::isVirtualRegister(R)) + continue; + if (MRI->getRegClass(R) != &Hexagon::PredRegsRegClass) + continue; + for (auto U = MRI->use_begin(R); U != MRI->use_end(); ++U) + if (U->getParent()->isPHI()) + return false; + } + } + return true; +} + + +bool HexagonEarlyIfConversion::usesUndefVReg(const MachineInstr *MI) const { + for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { + if (!MO->isReg() || !MO->isUse()) + continue; + unsigned R = MO->getReg(); + if (!TargetRegisterInfo::isVirtualRegister(R)) + continue; + const MachineInstr *DefI = MRI->getVRegDef(R); + // "Undefined" virtual registers are actually defined via IMPLICIT_DEF. + assert(DefI && "Expecting a reaching def in MRI"); + if (DefI->isImplicitDef()) + return true; + } + return false; +} + + +bool HexagonEarlyIfConversion::isValid(const FlowPattern &FP) const { + if (hasEHLabel(FP.SplitB)) // KLUDGE: see function definition + return false; + if (FP.TrueB && !isValidCandidate(FP.TrueB)) + return false; + if (FP.FalseB && !isValidCandidate(FP.FalseB)) + return false; + // Check the PHIs in the join block. If any of them use a register + // that is defined as IMPLICIT_DEF, do not convert this. This can + // legitimately happen if one side of the split never executes, but + // the compiler is unable to prove it. That side may then seem to + // provide an "undef" value to the join block, however it will never + // execute at run-time. If we convert this case, the "undef" will + // be used in a MUX instruction, and that may seem like actually + // using an undefined value to other optimizations. This could lead + // to trouble further down the optimization stream, cause assertions + // to fail, etc. + if (FP.JoinB) { + const MachineBasicBlock &B = *FP.JoinB; + for (auto &MI : B) { + if (!MI.isPHI()) + break; + if (usesUndefVReg(&MI)) + return false; + unsigned DefR = MI.getOperand(0).getReg(); + const TargetRegisterClass *RC = MRI->getRegClass(DefR); + if (RC == &Hexagon::PredRegsRegClass) + return false; + } + } + return true; +} + + +unsigned HexagonEarlyIfConversion::computePhiCost(MachineBasicBlock *B) const { + assert(B->pred_size() <= 2); + if (B->pred_size() < 2) + return 0; + + unsigned Cost = 0; + MachineBasicBlock::const_iterator I, E = B->getFirstNonPHI(); + for (I = B->begin(); I != E; ++I) { + const MachineOperand &RO1 = I->getOperand(1); + const MachineOperand &RO3 = I->getOperand(3); + assert(RO1.isReg() && RO3.isReg()); + // Must have a MUX if the phi uses a subregister. + if (RO1.getSubReg() != 0 || RO3.getSubReg() != 0) { + Cost++; + continue; + } + MachineInstr *Def1 = MRI->getVRegDef(RO1.getReg()); + MachineInstr *Def3 = MRI->getVRegDef(RO3.getReg()); + if (!TII->isPredicable(Def1) || !TII->isPredicable(Def3)) + Cost++; + } + return Cost; +} + + +unsigned HexagonEarlyIfConversion::countPredicateDefs( + const MachineBasicBlock *B) const { + unsigned PredDefs = 0; + for (auto &MI : *B) { + for (ConstMIOperands MO(&MI); MO.isValid(); ++MO) { + if (!MO->isReg() || !MO->isDef()) + continue; + unsigned R = MO->getReg(); + if (!TargetRegisterInfo::isVirtualRegister(R)) + continue; + if (MRI->getRegClass(R) == &Hexagon::PredRegsRegClass) + PredDefs++; + } + } + return PredDefs; +} + + +bool HexagonEarlyIfConversion::isProfitable(const FlowPattern &FP) const { + if (FP.TrueB && FP.FalseB) { + + // Do not IfCovert if the branch is one sided. + if (MBPI) { + BranchProbability Prob(9, 10); + if (MBPI->getEdgeProbability(FP.SplitB, FP.TrueB) > Prob) + return false; + if (MBPI->getEdgeProbability(FP.SplitB, FP.FalseB) > Prob) + return false; + } + + // If both sides are predicable, convert them if they join, and the + // join block has no other predecessors. + MachineBasicBlock *TSB = *FP.TrueB->succ_begin(); + MachineBasicBlock *FSB = *FP.FalseB->succ_begin(); + if (TSB != FSB) + return false; + if (TSB->pred_size() != 2) + return false; + } + + // Calculate the total size of the predicated blocks. + // Assume instruction counts without branches to be the approximation of + // the code size. If the predicated blocks are smaller than a packet size, + // approximate the spare room in the packet that could be filled with the + // predicated/speculated instructions. + unsigned TS = 0, FS = 0, Spare = 0; + if (FP.TrueB) { + TS = std::distance(FP.TrueB->begin(), FP.TrueB->getFirstTerminator()); + if (TS < HEXAGON_PACKET_SIZE) + Spare += HEXAGON_PACKET_SIZE-TS; + } + if (FP.FalseB) { + FS = std::distance(FP.FalseB->begin(), FP.FalseB->getFirstTerminator()); + if (FS < HEXAGON_PACKET_SIZE) + Spare += HEXAGON_PACKET_SIZE-TS; + } + unsigned TotalIn = TS+FS; + DEBUG(dbgs() << "Total number of instructions to be predicated/speculated: " + << TotalIn << ", spare room: " << Spare << "\n"); + if (TotalIn >= SizeLimit+Spare) + return false; + + // Count the number of PHI nodes that will need to be updated (converted + // to MUX). Those can be later converted to predicated instructions, so + // they aren't always adding extra cost. + // KLUDGE: Also, count the number of predicate register definitions in + // each block. The scheduler may increase the pressure of these and cause + // expensive spills (e.g. bitmnp01). + unsigned TotalPh = 0; + unsigned PredDefs = countPredicateDefs(FP.SplitB); + if (FP.JoinB) { + TotalPh = computePhiCost(FP.JoinB); + PredDefs += countPredicateDefs(FP.JoinB); + } else { + if (FP.TrueB && FP.TrueB->succ_size() > 0) { + MachineBasicBlock *SB = *FP.TrueB->succ_begin(); + TotalPh += computePhiCost(SB); + PredDefs += countPredicateDefs(SB); + } + if (FP.FalseB && FP.FalseB->succ_size() > 0) { + MachineBasicBlock *SB = *FP.FalseB->succ_begin(); + TotalPh += computePhiCost(SB); + PredDefs += countPredicateDefs(SB); + } + } + DEBUG(dbgs() << "Total number of extra muxes from converted phis: " + << TotalPh << "\n"); + if (TotalIn+TotalPh >= SizeLimit+Spare) + return false; + + DEBUG(dbgs() << "Total number of predicate registers: " << PredDefs << "\n"); + if (PredDefs > 4) + return false; + + return true; +} + + +bool HexagonEarlyIfConversion::visitBlock(MachineBasicBlock *B, + MachineLoop *L) { + bool Changed = false; + + // Visit all dominated blocks from the same loop first, then process B. + MachineDomTreeNode *N = MDT->getNode(B); + typedef GraphTraits<MachineDomTreeNode*> GTN; + // We will change CFG/DT during this traversal, so take precautions to + // avoid problems related to invalidated iterators. In fact, processing + // a child C of B cannot cause another child to be removed, but it can + // cause a new child to be added (which was a child of C before C itself + // was removed. This new child C, however, would have been processed + // prior to processing B, so there is no need to process it again. + // Simply keep a list of children of B, and traverse that list. + typedef SmallVector<MachineDomTreeNode*,4> DTNodeVectType; + DTNodeVectType Cn(GTN::child_begin(N), GTN::child_end(N)); + for (DTNodeVectType::iterator I = Cn.begin(), E = Cn.end(); I != E; ++I) { + MachineBasicBlock *SB = (*I)->getBlock(); + if (!Deleted.count(SB)) + Changed |= visitBlock(SB, L); + } + // When walking down the dominator tree, we want to traverse through + // blocks from nested (other) loops, because they can dominate blocks + // that are in L. Skip the non-L blocks only after the tree traversal. + if (MLI->getLoopFor(B) != L) + return Changed; + + FlowPattern FP; + if (!matchFlowPattern(B, L, FP)) + return Changed; + + if (!isValid(FP)) { + DEBUG(dbgs() << "Conversion is not valid\n"); + return Changed; + } + if (!isProfitable(FP)) { + DEBUG(dbgs() << "Conversion is not profitable\n"); + return Changed; + } + + convert(FP); + simplifyFlowGraph(FP); + return true; +} + + +bool HexagonEarlyIfConversion::visitLoop(MachineLoop *L) { + MachineBasicBlock *HB = L ? L->getHeader() : 0; + DEBUG((L ? dbgs() << "Visiting loop H:" << PrintMB(HB) + : dbgs() << "Visiting function") << "\n"); + bool Changed = false; + if (L) { + for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) + Changed |= visitLoop(*I); + } + + MachineBasicBlock *EntryB = GraphTraits<MachineFunction*>::getEntryNode(MFN); + Changed |= visitBlock(L ? HB : EntryB, L); + return Changed; +} + + +bool HexagonEarlyIfConversion::isPredicableStore(const MachineInstr *MI) + const { + // Exclude post-increment stores. Those return a value, so we cannot + // predicate them. + unsigned Opc = MI->getOpcode(); + using namespace Hexagon; + switch (Opc) { + // Store byte: + case S2_storerb_io: case S4_storerb_rr: + case S2_storerbabs: case S4_storeirb_io: case S2_storerbgp: + // Store halfword: + case S2_storerh_io: case S4_storerh_rr: + case S2_storerhabs: case S4_storeirh_io: case S2_storerhgp: + // Store upper halfword: + case S2_storerf_io: case S4_storerf_rr: + case S2_storerfabs: case S2_storerfgp: + // Store word: + case S2_storeri_io: case S4_storeri_rr: + case S2_storeriabs: case S4_storeiri_io: case S2_storerigp: + // Store doubleword: + case S2_storerd_io: case S4_storerd_rr: + case S2_storerdabs: case S2_storerdgp: + return true; + } + return false; +} + + +bool HexagonEarlyIfConversion::isSafeToSpeculate(const MachineInstr *MI) + const { + if (MI->mayLoad() || MI->mayStore()) + return false; + if (MI->isCall() || MI->isBarrier() || MI->isBranch()) + return false; + if (MI->hasUnmodeledSideEffects()) + return false; + + return true; +} + + +unsigned HexagonEarlyIfConversion::getCondStoreOpcode(unsigned Opc, + bool IfTrue) const { + // Exclude post-increment stores. + using namespace Hexagon; + switch (Opc) { + case S2_storerb_io: + return IfTrue ? S2_pstorerbt_io : S2_pstorerbf_io; + case S4_storerb_rr: + return IfTrue ? S4_pstorerbt_rr : S4_pstorerbf_rr; + case S2_storerbabs: + case S2_storerbgp: + return IfTrue ? S4_pstorerbt_abs : S4_pstorerbf_abs; + case S4_storeirb_io: + return IfTrue ? S4_storeirbt_io : S4_storeirbf_io; + case S2_storerh_io: + return IfTrue ? S2_pstorerht_io : S2_pstorerhf_io; + case S4_storerh_rr: + return IfTrue ? S4_pstorerht_rr : S4_pstorerhf_rr; + case S2_storerhabs: + case S2_storerhgp: + return IfTrue ? S4_pstorerht_abs : S4_pstorerhf_abs; + case S2_storerf_io: + return IfTrue ? S2_pstorerft_io : S2_pstorerff_io; + case S4_storerf_rr: + return IfTrue ? S4_pstorerft_rr : S4_pstorerff_rr; + case S2_storerfabs: + case S2_storerfgp: + return IfTrue ? S4_pstorerft_abs : S4_pstorerff_abs; + case S4_storeirh_io: + return IfTrue ? S4_storeirht_io : S4_storeirhf_io; + case S2_storeri_io: + return IfTrue ? S2_pstorerit_io : S2_pstorerif_io; + case S4_storeri_rr: + return IfTrue ? S4_pstorerit_rr : S4_pstorerif_rr; + case S2_storeriabs: + case S2_storerigp: + return IfTrue ? S4_pstorerit_abs : S4_pstorerif_abs; + case S4_storeiri_io: + return IfTrue ? S4_storeirit_io : S4_storeirif_io; + case S2_storerd_io: + return IfTrue ? S2_pstorerdt_io : S2_pstorerdf_io; + case S4_storerd_rr: + return IfTrue ? S4_pstorerdt_rr : S4_pstorerdf_rr; + case S2_storerdabs: + case S2_storerdgp: + return IfTrue ? S4_pstorerdt_abs : S4_pstorerdf_abs; + } + llvm_unreachable("Unexpected opcode"); + return 0; +} + + +void HexagonEarlyIfConversion::predicateInstr(MachineBasicBlock *ToB, + MachineBasicBlock::iterator At, MachineInstr *MI, + unsigned PredR, bool IfTrue) { + DebugLoc DL; + if (At != ToB->end()) + DL = At->getDebugLoc(); + else if (!ToB->empty()) + DL = ToB->back().getDebugLoc(); + + unsigned Opc = MI->getOpcode(); + + if (isPredicableStore(MI)) { + unsigned COpc = getCondStoreOpcode(Opc, IfTrue); + assert(COpc); + MachineInstrBuilder MIB = BuildMI(*ToB, At, DL, TII->get(COpc)) + .addReg(PredR); + for (MIOperands MO(MI); MO.isValid(); ++MO) + MIB.addOperand(*MO); + + // Set memory references. + MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); + MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); + MIB.setMemRefs(MMOBegin, MMOEnd); + + MI->eraseFromParent(); + return; + } + + if (Opc == Hexagon::J2_jump) { + MachineBasicBlock *TB = MI->getOperand(0).getMBB(); + const MCInstrDesc &D = TII->get(IfTrue ? Hexagon::J2_jumpt + : Hexagon::J2_jumpf); + BuildMI(*ToB, At, DL, D) + .addReg(PredR) + .addMBB(TB); + MI->eraseFromParent(); + return; + } + + // Print the offending instruction unconditionally as we are about to + // abort. + dbgs() << *MI; + llvm_unreachable("Unexpected instruction"); +} + + +// Predicate/speculate non-branch instructions from FromB into block ToB. +// Leave the branches alone, they will be handled later. Btw, at this point +// FromB should have at most one branch, and it should be unconditional. +void HexagonEarlyIfConversion::predicateBlockNB(MachineBasicBlock *ToB, + MachineBasicBlock::iterator At, MachineBasicBlock *FromB, + unsigned PredR, bool IfTrue) { + DEBUG(dbgs() << "Predicating block " << PrintMB(FromB) << "\n"); + MachineBasicBlock::iterator End = FromB->getFirstTerminator(); + MachineBasicBlock::iterator I, NextI; + + for (I = FromB->begin(); I != End; I = NextI) { + assert(!I->isPHI()); + NextI = std::next(I); + if (isSafeToSpeculate(&*I)) + ToB->splice(At, FromB, I); + else + predicateInstr(ToB, At, &*I, PredR, IfTrue); + } +} + + +void HexagonEarlyIfConversion::updatePhiNodes(MachineBasicBlock *WhereB, + const FlowPattern &FP) { + // Visit all PHI nodes in the WhereB block and generate MUX instructions + // in the split block. Update the PHI nodes with the values of the MUX. + auto NonPHI = WhereB->getFirstNonPHI(); + for (auto I = WhereB->begin(); I != NonPHI; ++I) { + MachineInstr *PN = &*I; + // Registers and subregisters corresponding to TrueB, FalseB and SplitB. + unsigned TR = 0, TSR = 0, FR = 0, FSR = 0, SR = 0, SSR = 0; + for (int i = PN->getNumOperands()-2; i > 0; i -= 2) { + const MachineOperand &RO = PN->getOperand(i), &BO = PN->getOperand(i+1); + if (BO.getMBB() == FP.SplitB) + SR = RO.getReg(), SSR = RO.getSubReg(); + else if (BO.getMBB() == FP.TrueB) + TR = RO.getReg(), TSR = RO.getSubReg(); + else if (BO.getMBB() == FP.FalseB) + FR = RO.getReg(), FSR = RO.getSubReg(); + else + continue; + PN->RemoveOperand(i+1); + PN->RemoveOperand(i); + } + if (TR == 0) + TR = SR, TSR = SSR; + else if (FR == 0) + FR = SR, FSR = SSR; + assert(TR && FR); + + using namespace Hexagon; + unsigned DR = PN->getOperand(0).getReg(); + const TargetRegisterClass *RC = MRI->getRegClass(DR); + const MCInstrDesc &D = RC == &IntRegsRegClass ? TII->get(C2_mux) + : TII->get(MUX64_rr); + + MachineBasicBlock::iterator MuxAt = FP.SplitB->getFirstTerminator(); + DebugLoc DL; + if (MuxAt != FP.SplitB->end()) + DL = MuxAt->getDebugLoc(); + unsigned MuxR = MRI->createVirtualRegister(RC); + BuildMI(*FP.SplitB, MuxAt, DL, D, MuxR) + .addReg(FP.PredR) + .addReg(TR, 0, TSR) + .addReg(FR, 0, FSR); + + PN->addOperand(MachineOperand::CreateReg(MuxR, false)); + PN->addOperand(MachineOperand::CreateMBB(FP.SplitB)); + } +} + + +void HexagonEarlyIfConversion::convert(const FlowPattern &FP) { + MachineBasicBlock *TSB = 0, *FSB = 0; + MachineBasicBlock::iterator OldTI = FP.SplitB->getFirstTerminator(); + assert(OldTI != FP.SplitB->end()); + DebugLoc DL = OldTI->getDebugLoc(); + + if (FP.TrueB) { + TSB = *FP.TrueB->succ_begin(); + predicateBlockNB(FP.SplitB, OldTI, FP.TrueB, FP.PredR, true); + } + if (FP.FalseB) { + FSB = *FP.FalseB->succ_begin(); + MachineBasicBlock::iterator At = FP.SplitB->getFirstTerminator(); + predicateBlockNB(FP.SplitB, At, FP.FalseB, FP.PredR, false); + } + + // Regenerate new terminators in the split block and update the successors. + // First, remember any information that may be needed later and remove the + // existing terminators/successors from the split block. + MachineBasicBlock *SSB = 0; + FP.SplitB->erase(OldTI, FP.SplitB->end()); + while (FP.SplitB->succ_size() > 0) { + MachineBasicBlock *T = *FP.SplitB->succ_begin(); + // It's possible that the split block had a successor that is not a pre- + // dicated block. This could only happen if there was only one block to + // be predicated. Example: + // split_b: + // if (p) jump true_b + // jump unrelated2_b + // unrelated1_b: + // ... + // unrelated2_b: ; can have other predecessors, so it's not "false_b" + // jump other_b + // true_b: ; only reachable from split_b, can be predicated + // ... + // + // Find this successor (SSB) if it exists. + if (T != FP.TrueB && T != FP.FalseB) { + assert(!SSB); + SSB = T; + } + FP.SplitB->removeSuccessor(FP.SplitB->succ_begin()); + } + + // Insert new branches and update the successors of the split block. This + // may create unconditional branches to the layout successor, etc., but + // that will be cleaned up later. For now, make sure that correct code is + // generated. + if (FP.JoinB) { + assert(!SSB || SSB == FP.JoinB); + BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jump)) + .addMBB(FP.JoinB); + FP.SplitB->addSuccessor(FP.JoinB); + } else { + bool HasBranch = false; + if (TSB) { + BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jumpt)) + .addReg(FP.PredR) + .addMBB(TSB); + FP.SplitB->addSuccessor(TSB); + HasBranch = true; + } + if (FSB) { + const MCInstrDesc &D = HasBranch ? TII->get(Hexagon::J2_jump) + : TII->get(Hexagon::J2_jumpf); + MachineInstrBuilder MIB = BuildMI(*FP.SplitB, FP.SplitB->end(), DL, D); + if (!HasBranch) + MIB.addReg(FP.PredR); + MIB.addMBB(FSB); + FP.SplitB->addSuccessor(FSB); + } + if (SSB) { + // This cannot happen if both TSB and FSB are set. [TF]SB are the + // successor blocks of the TrueB and FalseB (or null of the TrueB + // or FalseB block is null). SSB is the potential successor block + // of the SplitB that is neither TrueB nor FalseB. + BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jump)) + .addMBB(SSB); + FP.SplitB->addSuccessor(SSB); + } + } + + // What is left to do is to update the PHI nodes that could have entries + // referring to predicated blocks. + if (FP.JoinB) { + updatePhiNodes(FP.JoinB, FP); + } else { + if (TSB) + updatePhiNodes(TSB, FP); + if (FSB) + updatePhiNodes(FSB, FP); + // Nothing to update in SSB, since SSB's predecessors haven't changed. + } +} + + +void HexagonEarlyIfConversion::removeBlock(MachineBasicBlock *B) { + DEBUG(dbgs() << "Removing block " << PrintMB(B) << "\n"); + + // Transfer the immediate dominator information from B to its descendants. + MachineDomTreeNode *N = MDT->getNode(B); + MachineDomTreeNode *IDN = N->getIDom(); + if (IDN) { + MachineBasicBlock *IDB = IDN->getBlock(); + typedef GraphTraits<MachineDomTreeNode*> GTN; + typedef SmallVector<MachineDomTreeNode*,4> DTNodeVectType; + DTNodeVectType Cn(GTN::child_begin(N), GTN::child_end(N)); + for (DTNodeVectType::iterator I = Cn.begin(), E = Cn.end(); I != E; ++I) { + MachineBasicBlock *SB = (*I)->getBlock(); + MDT->changeImmediateDominator(SB, IDB); + } + } + + while (B->succ_size() > 0) + B->removeSuccessor(B->succ_begin()); + + for (auto I = B->pred_begin(), E = B->pred_end(); I != E; ++I) + (*I)->removeSuccessor(B, true); + + Deleted.insert(B); + MDT->eraseNode(B); + MFN->erase(B->getIterator()); +} + + +void HexagonEarlyIfConversion::eliminatePhis(MachineBasicBlock *B) { + DEBUG(dbgs() << "Removing phi nodes from block " << PrintMB(B) << "\n"); + MachineBasicBlock::iterator I, NextI, NonPHI = B->getFirstNonPHI(); + for (I = B->begin(); I != NonPHI; I = NextI) { + NextI = std::next(I); + MachineInstr *PN = &*I; + assert(PN->getNumOperands() == 3 && "Invalid phi node"); + MachineOperand &UO = PN->getOperand(1); + unsigned UseR = UO.getReg(), UseSR = UO.getSubReg(); + unsigned DefR = PN->getOperand(0).getReg(); + unsigned NewR = UseR; + if (UseSR) { + // MRI.replaceVregUsesWith does not allow to update the subregister, + // so instead of doing the use-iteration here, create a copy into a + // "non-subregistered" register. + DebugLoc DL = PN->getDebugLoc(); + const TargetRegisterClass *RC = MRI->getRegClass(DefR); + NewR = MRI->createVirtualRegister(RC); + NonPHI = BuildMI(*B, NonPHI, DL, TII->get(TargetOpcode::COPY), NewR) + .addReg(UseR, 0, UseSR); + } + MRI->replaceRegWith(DefR, NewR); + B->erase(I); + } +} + + +void HexagonEarlyIfConversion::replacePhiEdges(MachineBasicBlock *OldB, + MachineBasicBlock *NewB) { + for (auto I = OldB->succ_begin(), E = OldB->succ_end(); I != E; ++I) { + MachineBasicBlock *SB = *I; + MachineBasicBlock::iterator P, N = SB->getFirstNonPHI(); + for (P = SB->begin(); P != N; ++P) { + MachineInstr *PN = &*P; + for (MIOperands MO(PN); MO.isValid(); ++MO) + if (MO->isMBB() && MO->getMBB() == OldB) + MO->setMBB(NewB); + } + } +} + + +void HexagonEarlyIfConversion::mergeBlocks(MachineBasicBlock *PredB, + MachineBasicBlock *SuccB) { + DEBUG(dbgs() << "Merging blocks " << PrintMB(PredB) << " and " + << PrintMB(SuccB) << "\n"); + bool TermOk = hasUncondBranch(SuccB); + eliminatePhis(SuccB); + TII->RemoveBranch(*PredB); + PredB->removeSuccessor(SuccB); + PredB->splice(PredB->end(), SuccB, SuccB->begin(), SuccB->end()); + MachineBasicBlock::succ_iterator I, E = SuccB->succ_end(); + for (I = SuccB->succ_begin(); I != E; ++I) + PredB->addSuccessor(*I); + PredB->normalizeSuccProbs(); + replacePhiEdges(SuccB, PredB); + removeBlock(SuccB); + if (!TermOk) + PredB->updateTerminator(); +} + + +void HexagonEarlyIfConversion::simplifyFlowGraph(const FlowPattern &FP) { + if (FP.TrueB) + removeBlock(FP.TrueB); + if (FP.FalseB) + removeBlock(FP.FalseB); + + FP.SplitB->updateTerminator(); + if (FP.SplitB->succ_size() != 1) + return; + + MachineBasicBlock *SB = *FP.SplitB->succ_begin(); + if (SB->pred_size() != 1) + return; + + // By now, the split block has only one successor (SB), and SB has only + // one predecessor. We can try to merge them. We will need to update ter- + // minators in FP.Split+SB, and that requires working AnalyzeBranch, which + // fails on Hexagon for blocks that have EH_LABELs. However, if SB ends + // with an unconditional branch, we won't need to touch the terminators. + if (!hasEHLabel(SB) || hasUncondBranch(SB)) + mergeBlocks(FP.SplitB, SB); +} + + +bool HexagonEarlyIfConversion::runOnMachineFunction(MachineFunction &MF) { + auto &ST = MF.getSubtarget(); + TII = ST.getInstrInfo(); + TRI = ST.getRegisterInfo(); + MFN = &MF; + MRI = &MF.getRegInfo(); + MDT = &getAnalysis<MachineDominatorTree>(); + MLI = &getAnalysis<MachineLoopInfo>(); + MBPI = EnableHexagonBP ? &getAnalysis<MachineBranchProbabilityInfo>() : + nullptr; + + Deleted.clear(); + bool Changed = false; + + for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I) + Changed |= visitLoop(*I); + Changed |= visitLoop(0); + + return Changed; +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// +FunctionPass *llvm::createHexagonEarlyIfConversion() { + return new HexagonEarlyIfConversion(); +} + diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp index e4c8d8f7b28c..6e2dbc06b124 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp @@ -74,7 +74,7 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) { // Loop over all of the basic blocks. for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end(); MBBb != MBBe; ++MBBb) { - MachineBasicBlock* MBB = MBBb; + MachineBasicBlock *MBB = &*MBBb; // Traverse the basic block. for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end(); ++MII) { diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp index 21a8996b1159..7a52a1c9eaec 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -147,6 +147,48 @@ static cl::opt<unsigned> ShrinkLimit("shrink-frame-limit", cl::init(UINT_MAX), cl::Hidden, cl::ZeroOrMore, cl::desc("Max count of stack frame " "shrink-wraps")); +static cl::opt<bool> UseAllocframe("use-allocframe", cl::init(true), + cl::Hidden, cl::desc("Use allocframe more conservatively")); + + +namespace llvm { + void initializeHexagonCallFrameInformationPass(PassRegistry&); + FunctionPass *createHexagonCallFrameInformation(); +} + +namespace { + class HexagonCallFrameInformation : public MachineFunctionPass { + public: + static char ID; + HexagonCallFrameInformation() : MachineFunctionPass(ID) { + PassRegistry &PR = *PassRegistry::getPassRegistry(); + initializeHexagonCallFrameInformationPass(PR); + } + bool runOnMachineFunction(MachineFunction &MF) override; + }; + + char HexagonCallFrameInformation::ID = 0; +} + +bool HexagonCallFrameInformation::runOnMachineFunction(MachineFunction &MF) { + auto &HFI = *MF.getSubtarget<HexagonSubtarget>().getFrameLowering(); + bool NeedCFI = MF.getMMI().hasDebugInfo() || + MF.getFunction()->needsUnwindTableEntry(); + + if (!NeedCFI) + return false; + HFI.insertCFIInstructions(MF); + return true; +} + +INITIALIZE_PASS(HexagonCallFrameInformation, "hexagon-cfi", + "Hexagon call frame information", false, false) + +FunctionPass *llvm::createHexagonCallFrameInformation() { + return new HexagonCallFrameInformation(); +} + + namespace { /// Map a register pair Reg to the subregister that has the greater "number", /// i.e. D3 (aka R7:6) will be mapped to R7, etc. @@ -370,11 +412,11 @@ void HexagonFrameLowering::emitPrologue(MachineFunction &MF, insertEpilogueInBlock(*EpilogB); } else { for (auto &B : MF) - if (!B.empty() && B.back().isReturn()) + if (B.isReturnBlock()) insertCSRRestoresInBlock(B, CSI, HRI); for (auto &B : MF) - if (!B.empty() && B.back().isReturn()) + if (B.isReturnBlock()) insertEpilogueInBlock(B); } } @@ -383,10 +425,7 @@ void HexagonFrameLowering::emitPrologue(MachineFunction &MF, void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB) const { MachineFunction &MF = *MBB.getParent(); MachineFrameInfo *MFI = MF.getFrameInfo(); - MachineModuleInfo &MMI = MF.getMMI(); - MachineBasicBlock::iterator MBBI = MBB.begin(); - auto &HTM = static_cast<const HexagonTargetMachine&>(MF.getTarget()); - auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget()); + auto &HST = MF.getSubtarget<HexagonSubtarget>(); auto &HII = *HST.getInstrInfo(); auto &HRI = *HST.getRegisterInfo(); DebugLoc dl; @@ -405,10 +444,6 @@ void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB) const { bool AlignStack = (MaxAlign > getStackAlignment()); - // Check if frame moves are needed for EH. - bool needsFrameMoves = MMI.hasDebugInfo() || - MF.getFunction()->needsUnwindTableEntry(); - // Get the number of bytes to allocate from the FrameInfo. unsigned NumBytes = MFI->getStackSize(); unsigned SP = HRI.getStackRegister(); @@ -424,14 +459,7 @@ void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB) const { MI->eraseFromParent(); } - // - // Only insert ALLOCFRAME if we need to or at -O0 for the debugger. Think - // that this shouldn't be required, but doing so now because gcc does and - // gdb can't break at the start of the function without it. Will remove if - // this turns out to be a gdb bug. - // - bool NoOpt = (HTM.getOptLevel() == CodeGenOpt::None); - if (!NoOpt && !FuncInfo->hasClobberLR() && !hasFP(MF)) + if (!hasFP(MF)) return; // Check for overflow. @@ -469,92 +497,11 @@ void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB) const { .addReg(SP) .addImm(-int64_t(MaxAlign)); } - - if (needsFrameMoves) { - std::vector<MCCFIInstruction> Instructions = MMI.getFrameInstructions(); - MCSymbol *FrameLabel = MMI.getContext().createTempSymbol(); - - // Advance CFA. DW_CFA_def_cfa - unsigned DwFPReg = HRI.getDwarfRegNum(HRI.getFrameRegister(), true); - unsigned DwRAReg = HRI.getDwarfRegNum(HRI.getRARegister(), true); - - // CFA = FP + 8 - unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa( - FrameLabel, DwFPReg, -8)); - BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - - // R31 (return addr) = CFA - #4 - CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( - FrameLabel, DwRAReg, -4)); - BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - - // R30 (frame ptr) = CFA - #8) - CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( - FrameLabel, DwFPReg, -8)); - BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - - unsigned int regsToMove[] = { - Hexagon::R1, Hexagon::R0, Hexagon::R3, Hexagon::R2, - Hexagon::R17, Hexagon::R16, Hexagon::R19, Hexagon::R18, - Hexagon::R21, Hexagon::R20, Hexagon::R23, Hexagon::R22, - Hexagon::R25, Hexagon::R24, Hexagon::R27, Hexagon::R26, - Hexagon::D0, Hexagon::D1, Hexagon::D8, Hexagon::D9, Hexagon::D10, - Hexagon::D11, Hexagon::D12, Hexagon::D13, Hexagon::NoRegister - }; - - const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); - - for (unsigned i = 0; regsToMove[i] != Hexagon::NoRegister; ++i) { - for (unsigned I = 0, E = CSI.size(); I < E; ++I) { - if (CSI[I].getReg() == regsToMove[i]) { - // Subtract 8 to make room for R30 and R31, which are added above. - int64_t Offset = getFrameIndexOffset(MF, CSI[I].getFrameIdx()) - 8; - - if (regsToMove[i] < Hexagon::D0 || regsToMove[i] > Hexagon::D15) { - unsigned DwarfReg = HRI.getDwarfRegNum(regsToMove[i], true); - unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createOffset(FrameLabel, - DwarfReg, Offset)); - BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - } else { - // Split the double regs into subregs, and generate appropriate - // cfi_offsets. - // The only reason, we are split double regs is, llvm-mc does not - // understand paired registers for cfi_offset. - // Eg .cfi_offset r1:0, -64 - unsigned HiReg = getMax32BitSubRegister(regsToMove[i], HRI); - unsigned LoReg = getMax32BitSubRegister(regsToMove[i], HRI, false); - unsigned HiDwarfReg = HRI.getDwarfRegNum(HiReg, true); - unsigned LoDwarfReg = HRI.getDwarfRegNum(LoReg, true); - unsigned HiCFIIndex = MMI.addFrameInst( - MCCFIInstruction::createOffset(FrameLabel, - HiDwarfReg, Offset+4)); - BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(HiCFIIndex); - unsigned LoCFIIndex = MMI.addFrameInst( - MCCFIInstruction::createOffset(FrameLabel, - LoDwarfReg, Offset)); - BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(LoCFIIndex); - } - break; - } - } // for CSI.size() - } // for regsToMove - } // needsFrameMoves } void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const { MachineFunction &MF = *MBB.getParent(); - // - // Only insert deallocframe if we need to. Also at -O0. See comment - // in insertPrologueInBlock above. - // - if (!hasFP(MF) && MF.getTarget().getOptLevel() != CodeGenOpt::None) + if (!hasFP(MF)) return; auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget()); @@ -630,12 +577,172 @@ void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const { } +namespace { + bool IsAllocFrame(MachineBasicBlock::const_iterator It) { + if (!It->isBundle()) + return It->getOpcode() == Hexagon::S2_allocframe; + auto End = It->getParent()->instr_end(); + MachineBasicBlock::const_instr_iterator I = It.getInstrIterator(); + while (++I != End && I->isBundled()) + if (I->getOpcode() == Hexagon::S2_allocframe) + return true; + return false; + } + + MachineBasicBlock::iterator FindAllocFrame(MachineBasicBlock &B) { + for (auto &I : B) + if (IsAllocFrame(I)) + return I; + return B.end(); + } +} + + +void HexagonFrameLowering::insertCFIInstructions(MachineFunction &MF) const { + for (auto &B : MF) { + auto AF = FindAllocFrame(B); + if (AF == B.end()) + continue; + insertCFIInstructionsAt(B, ++AF); + } +} + + +void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator At) const { + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineModuleInfo &MMI = MF.getMMI(); + auto &HST = MF.getSubtarget<HexagonSubtarget>(); + auto &HII = *HST.getInstrInfo(); + auto &HRI = *HST.getRegisterInfo(); + + // If CFI instructions have debug information attached, something goes + // wrong with the final assembly generation: the prolog_end is placed + // in a wrong location. + DebugLoc DL; + const MCInstrDesc &CFID = HII.get(TargetOpcode::CFI_INSTRUCTION); + + MCSymbol *FrameLabel = MMI.getContext().createTempSymbol(); + + if (hasFP(MF)) { + unsigned DwFPReg = HRI.getDwarfRegNum(HRI.getFrameRegister(), true); + unsigned DwRAReg = HRI.getDwarfRegNum(HRI.getRARegister(), true); + + // Define CFA via an offset from the value of FP. + // + // -8 -4 0 (SP) + // --+----+----+--------------------- + // | FP | LR | increasing addresses --> + // --+----+----+--------------------- + // | +-- Old SP (before allocframe) + // +-- New FP (after allocframe) + // + // MCCFIInstruction::createDefCfa subtracts the offset from the register. + // MCCFIInstruction::createOffset takes the offset without sign change. + auto DefCfa = MCCFIInstruction::createDefCfa(FrameLabel, DwFPReg, -8); + BuildMI(MBB, At, DL, CFID) + .addCFIIndex(MMI.addFrameInst(DefCfa)); + // R31 (return addr) = CFA - 4 + auto OffR31 = MCCFIInstruction::createOffset(FrameLabel, DwRAReg, -4); + BuildMI(MBB, At, DL, CFID) + .addCFIIndex(MMI.addFrameInst(OffR31)); + // R30 (frame ptr) = CFA - 8 + auto OffR30 = MCCFIInstruction::createOffset(FrameLabel, DwFPReg, -8); + BuildMI(MBB, At, DL, CFID) + .addCFIIndex(MMI.addFrameInst(OffR30)); + } + + static unsigned int RegsToMove[] = { + Hexagon::R1, Hexagon::R0, Hexagon::R3, Hexagon::R2, + Hexagon::R17, Hexagon::R16, Hexagon::R19, Hexagon::R18, + Hexagon::R21, Hexagon::R20, Hexagon::R23, Hexagon::R22, + Hexagon::R25, Hexagon::R24, Hexagon::R27, Hexagon::R26, + Hexagon::D0, Hexagon::D1, Hexagon::D8, Hexagon::D9, + Hexagon::D10, Hexagon::D11, Hexagon::D12, Hexagon::D13, + Hexagon::NoRegister + }; + + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + + for (unsigned i = 0; RegsToMove[i] != Hexagon::NoRegister; ++i) { + unsigned Reg = RegsToMove[i]; + auto IfR = [Reg] (const CalleeSavedInfo &C) -> bool { + return C.getReg() == Reg; + }; + auto F = std::find_if(CSI.begin(), CSI.end(), IfR); + if (F == CSI.end()) + continue; + + // Subtract 8 to make room for R30 and R31, which are added above. + unsigned FrameReg; + int64_t Offset = getFrameIndexReference(MF, F->getFrameIdx(), FrameReg) - 8; + + if (Reg < Hexagon::D0 || Reg > Hexagon::D15) { + unsigned DwarfReg = HRI.getDwarfRegNum(Reg, true); + auto OffReg = MCCFIInstruction::createOffset(FrameLabel, DwarfReg, + Offset); + BuildMI(MBB, At, DL, CFID) + .addCFIIndex(MMI.addFrameInst(OffReg)); + } else { + // Split the double regs into subregs, and generate appropriate + // cfi_offsets. + // The only reason, we are split double regs is, llvm-mc does not + // understand paired registers for cfi_offset. + // Eg .cfi_offset r1:0, -64 + + unsigned HiReg = HRI.getSubReg(Reg, Hexagon::subreg_hireg); + unsigned LoReg = HRI.getSubReg(Reg, Hexagon::subreg_loreg); + unsigned HiDwarfReg = HRI.getDwarfRegNum(HiReg, true); + unsigned LoDwarfReg = HRI.getDwarfRegNum(LoReg, true); + auto OffHi = MCCFIInstruction::createOffset(FrameLabel, HiDwarfReg, + Offset+4); + BuildMI(MBB, At, DL, CFID) + .addCFIIndex(MMI.addFrameInst(OffHi)); + auto OffLo = MCCFIInstruction::createOffset(FrameLabel, LoDwarfReg, + Offset); + BuildMI(MBB, At, DL, CFID) + .addCFIIndex(MMI.addFrameInst(OffLo)); + } + } +} + + bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - const HexagonMachineFunctionInfo *FuncInfo = - MF.getInfo<HexagonMachineFunctionInfo>(); - return MFI->hasCalls() || MFI->getStackSize() > 0 || - FuncInfo->hasClobberLR(); + auto &MFI = *MF.getFrameInfo(); + auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); + + bool HasFixed = MFI.getNumFixedObjects(); + bool HasPrealloc = const_cast<MachineFrameInfo&>(MFI) + .getLocalFrameObjectCount(); + bool HasExtraAlign = HRI.needsStackRealignment(MF); + bool HasAlloca = MFI.hasVarSizedObjects(); + + // Insert ALLOCFRAME if we need to or at -O0 for the debugger. Think + // that this shouldn't be required, but doing so now because gcc does and + // gdb can't break at the start of the function without it. Will remove if + // this turns out to be a gdb bug. + // + if (MF.getTarget().getOptLevel() == CodeGenOpt::None) + return true; + + // By default we want to use SP (since it's always there). FP requires + // some setup (i.e. ALLOCFRAME). + // Fixed and preallocated objects need FP if the distance from them to + // the SP is unknown (as is with alloca or aligna). + if ((HasFixed || HasPrealloc) && (HasAlloca || HasExtraAlign)) + return true; + + if (MFI.getStackSize() > 0) { + if (UseAllocframe) + return true; + } + + if (MFI.hasCalls() || + MF.getInfo<HexagonMachineFunctionInfo>()->hasClobberLR()) + return true; + + return false; } @@ -718,9 +825,89 @@ static void addCalleeSaveRegistersAsImpOperand(MachineInstr *Inst, } -int HexagonFrameLowering::getFrameIndexOffset(const MachineFunction &MF, - int FI) const { - return MF.getFrameInfo()->getObjectOffset(FI); +int HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF, + int FI, unsigned &FrameReg) const { + auto &MFI = *MF.getFrameInfo(); + auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); + + // Large parts of this code are shared with HRI::eliminateFrameIndex. + int Offset = MFI.getObjectOffset(FI); + bool HasAlloca = MFI.hasVarSizedObjects(); + bool HasExtraAlign = HRI.needsStackRealignment(MF); + bool NoOpt = MF.getTarget().getOptLevel() == CodeGenOpt::None; + + unsigned SP = HRI.getStackRegister(), FP = HRI.getFrameRegister(); + unsigned AP = 0; + if (const MachineInstr *AI = getAlignaInstr(MF)) + AP = AI->getOperand(0).getReg(); + unsigned FrameSize = MFI.getStackSize(); + + bool UseFP = false, UseAP = false; // Default: use SP (except at -O0). + // Use FP at -O0, except when there are objects with extra alignment. + // That additional alignment requirement may cause a pad to be inserted, + // which will make it impossible to use FP to access objects located + // past the pad. + if (NoOpt && !HasExtraAlign) + UseFP = true; + if (MFI.isFixedObjectIndex(FI) || MFI.isObjectPreAllocated(FI)) { + // Fixed and preallocated objects will be located before any padding + // so FP must be used to access them. + UseFP |= (HasAlloca || HasExtraAlign); + } else { + if (HasAlloca) { + if (HasExtraAlign) + UseAP = true; + else + UseFP = true; + } + } + + // If FP was picked, then there had better be FP. + bool HasFP = hasFP(MF); + assert((HasFP || !UseFP) && "This function must have frame pointer"); + + // Having FP implies allocframe. Allocframe will store extra 8 bytes: + // FP/LR. If the base register is used to access an object across these + // 8 bytes, then the offset will need to be adjusted by 8. + // + // After allocframe: + // HexagonISelLowering adds 8 to ---+ + // the offsets of all stack-based | + // arguments (*) | + // | + // getObjectOffset < 0 0 8 getObjectOffset >= 8 + // ------------------------+-----+------------------------> increasing + // <local objects> |FP/LR| <input arguments> addresses + // -----------------+------+-----+------------------------> + // | | + // SP/AP point --+ +-- FP points here (**) + // somewhere on + // this side of FP/LR + // + // (*) See LowerFormalArguments. The FP/LR is assumed to be present. + // (**) *FP == old-FP. FP+0..7 are the bytes of FP/LR. + + // The lowering assumes that FP/LR is present, and so the offsets of + // the formal arguments start at 8. If FP/LR is not there we need to + // reduce the offset by 8. + if (Offset > 0 && !HasFP) + Offset -= 8; + + if (UseFP) + FrameReg = FP; + else if (UseAP) + FrameReg = AP; + else + FrameReg = SP; + + // Calculate the actual offset in the instruction. If there is no FP + // (in other words, no allocframe), then SP will not be adjusted (i.e. + // there will be no SP -= FrameSize), so the frame size should not be + // added to the calculated offset. + int RealOffset = Offset; + if (!UseFP && !UseAP && HasFP) + RealOffset = FrameSize+Offset; + return RealOffset; } @@ -731,7 +918,7 @@ bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI = MBB.begin(); MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + auto &HII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); if (useSpillFunction(MF, CSI)) { unsigned MaxReg = getMaxCalleeSavedReg(CSI, HRI); @@ -739,7 +926,7 @@ bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB, // Call spill function. DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); MachineInstr *SaveRegsCall = - BuildMI(MBB, MI, DL, TII.get(Hexagon::SAVE_REGISTERS_CALL_V4)) + BuildMI(MBB, MI, DL, HII.get(Hexagon::SAVE_REGISTERS_CALL_V4)) .addExternalSymbol(SpillFun); // Add callee-saved registers as use. addCalleeSaveRegistersAsImpOperand(SaveRegsCall, MaxReg, false); @@ -757,7 +944,7 @@ bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB, bool IsKill = !HRI.isEHReturnCalleeSaveReg(Reg); int FI = CSI[i].getFrameIdx(); const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(MBB, MI, Reg, IsKill, FI, RC, &HRI); + HII.storeRegToStackSlot(MBB, MI, Reg, IsKill, FI, RC, &HRI); if (IsKill) MBB.addLiveIn(Reg); } @@ -772,7 +959,7 @@ bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI = MBB.getFirstTerminator(); MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + auto &HII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); if (useRestoreFunction(MF, CSI)) { bool HasTC = hasTailCall(MBB) || !hasReturn(MBB); @@ -787,14 +974,14 @@ bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB, if (HasTC) { unsigned ROpc = Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4; - DeallocCall = BuildMI(MBB, MI, DL, TII.get(ROpc)) + DeallocCall = BuildMI(MBB, MI, DL, HII.get(ROpc)) .addExternalSymbol(RestoreFn); } else { // The block has a return. MachineBasicBlock::iterator It = MBB.getFirstTerminator(); assert(It->isReturn() && std::next(It) == MBB.end()); unsigned ROpc = Hexagon::RESTORE_DEALLOC_RET_JMP_V4; - DeallocCall = BuildMI(MBB, It, DL, TII.get(ROpc)) + DeallocCall = BuildMI(MBB, It, DL, HII.get(ROpc)) .addExternalSymbol(RestoreFn); // Transfer the function live-out registers. DeallocCall->copyImplicitOps(MF, It); @@ -807,7 +994,7 @@ bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB, unsigned Reg = CSI[i].getReg(); const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg); int FI = CSI[i].getFrameIdx(); - TII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, &HRI); + HII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, &HRI); } return true; } @@ -832,9 +1019,9 @@ void HexagonFrameLowering::processFunctionBeforeFrameFinalized( // via AP, which may not be available at the particular place in the program. MachineFrameInfo *MFI = MF.getFrameInfo(); bool HasAlloca = MFI->hasVarSizedObjects(); - bool HasAligna = (MFI->getMaxAlignment() > getStackAlignment()); + bool NeedsAlign = (MFI->getMaxAlignment() > getStackAlignment()); - if (!HasAlloca || !HasAligna) + if (!HasAlloca || !NeedsAlign) return; unsigned LFS = MFI->getLocalFrameSize(); @@ -864,13 +1051,13 @@ static bool needToReserveScavengingSpillSlots(MachineFunction &MF, // Check for an unused caller-saved register. for ( ; *CallerSavedRegs; ++CallerSavedRegs) { MCPhysReg FreeReg = *CallerSavedRegs; - if (MRI.isPhysRegUsed(FreeReg)) + if (!MRI.reg_nodbg_empty(FreeReg)) continue; // Check aliased register usage. bool IsCurrentRegUsed = false; for (MCRegAliasIterator AI(FreeReg, &HRI, false); AI.isValid(); ++AI) - if (MRI.isPhysRegUsed(*AI)) { + if (!MRI.reg_nodbg_empty(*AI)) { IsCurrentRegUsed = true; break; } @@ -896,7 +1083,7 @@ bool HexagonFrameLowering::replacePredRegPseudoSpillCode(MachineFunction &MF) // Loop over all of the basic blocks. for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end(); MBBb != MBBe; ++MBBb) { - MachineBasicBlock* MBB = MBBb; + MachineBasicBlock *MBB = &*MBBb; // Traverse the basic block. MachineBasicBlock::iterator NextII; for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end(); @@ -1210,7 +1397,8 @@ bool HexagonFrameLowering::needsAligna(const MachineFunction &MF) const { } -MachineInstr *HexagonFrameLowering::getAlignaInstr(MachineFunction &MF) const { +const MachineInstr *HexagonFrameLowering::getAlignaInstr( + const MachineFunction &MF) const { for (auto &B : MF) for (auto &I : B) if (I.getOpcode() == Hexagon::ALIGNA) @@ -1219,6 +1407,7 @@ MachineInstr *HexagonFrameLowering::getAlignaInstr(MachineFunction &MF) const { } +// FIXME: Use Function::optForSize(). inline static bool isOptSize(const MachineFunction &MF) { AttributeSet AF = MF.getFunction()->getAttributes(); return AF.hasAttribute(AttributeSet::FunctionIndex, @@ -1226,8 +1415,7 @@ inline static bool isOptSize(const MachineFunction &MF) { } inline static bool isMinSize(const MachineFunction &MF) { - AttributeSet AF = MF.getFunction()->getAttributes(); - return AF.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); + return MF.getFunction()->optForMinSize(); } @@ -1289,4 +1477,3 @@ bool HexagonFrameLowering::useRestoreFunction(MachineFunction &MF, : SpillFuncThreshold; return Threshold < NumCSI; } - diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h index d39ee2c77195..683b303d43ea 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h +++ b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h @@ -51,7 +51,8 @@ public: bool targetHandlesStackFrameRounding() const override { return true; } - int getFrameIndexOffset(const MachineFunction &MF, int FI) const override; + int getFrameIndexReference(const MachineFunction &MF, int FI, + unsigned &FrameReg) const override; bool hasFP(const MachineFunction &MF) const override; const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) @@ -73,7 +74,9 @@ public: const override; bool needsAligna(const MachineFunction &MF) const; - MachineInstr *getAlignaInstr(MachineFunction &MF) const; + const MachineInstr *getAlignaInstr(const MachineFunction &MF) const; + + void insertCFIInstructions(MachineFunction &MF) const; private: typedef std::vector<CalleeSavedInfo> CSIVect; @@ -86,6 +89,8 @@ private: const HexagonRegisterInfo &HRI) const; bool insertCSRRestoresInBlock(MachineBasicBlock &MBB, const CSIVect &CSI, const HexagonRegisterInfo &HRI) const; + void insertCFIInstructionsAt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator At) const; void adjustForCalleeSavedRegsSpillCall(MachineFunction &MF) const; bool replacePredRegPseudoSpillCode(MachineFunction &MF) const; @@ -94,7 +99,7 @@ private: void findShrunkPrologEpilog(MachineFunction &MF, MachineBasicBlock *&PrologB, MachineBasicBlock *&EpilogB) const; - bool shouldInlineCSR(llvm::MachineFunction&, const CSIVect&) const; + bool shouldInlineCSR(llvm::MachineFunction &MF, const CSIVect &CSI) const; bool useSpillFunction(MachineFunction &MF, const CSIVect &CSI) const; bool useRestoreFunction(MachineFunction &MF, const CSIVect &CSI) const; }; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp index 4d32208bd5aa..f26e2ff764d7 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp @@ -195,7 +195,7 @@ bool HexagonGenExtract::convert(Instruction *In) { return false; } - IRBuilder<> IRB(BB, In); + IRBuilder<> IRB(In); Intrinsic::ID IntId = (BW == 32) ? Intrinsic::hexagon_S2_extractu : Intrinsic::hexagon_S2_extractup; Module *Mod = BB->getParent()->getParent(); diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp index 096da949e77b..64a2b6cec18a 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp @@ -77,9 +77,8 @@ namespace { namespace { // Set of virtual registers, based on BitVector. struct RegisterSet : private BitVector { - RegisterSet() : BitVector() {} + RegisterSet() = default; explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {} - RegisterSet(const RegisterSet &RS) : BitVector(RS) {} using BitVector::clear; @@ -1496,7 +1495,7 @@ bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) { // version of DCE that preserves lifetime markers. Without it, merging // of stack objects can fail to recognize and merge disjoint objects // leading to unnecessary stack growth. - Changed |= removeDeadCode(MDT->getRootNode()); + Changed = removeDeadCode(MDT->getRootNode()); const HexagonEvaluator HE(*HRI, *MRI, *HII, MF); BitTracker BTLoc(HE, MF); @@ -1534,7 +1533,7 @@ bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) { } if (IFMap.empty()) - return false; + return Changed; { NamedRegionTimer _T("pruning", "hexinsert", TimingDetail); @@ -1547,7 +1546,7 @@ bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) { } if (IFMap.empty()) - return false; + return Changed; { NamedRegionTimer _T("selection", "hexinsert", TimingDetail); @@ -1572,13 +1571,15 @@ bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) { for (unsigned i = 0, n = Out.size(); i < n; ++i) IFMap.erase(Out[i]); } + if (IFMap.empty()) + return Changed; { NamedRegionTimer _T("generation", "hexinsert", TimingDetail); - Changed = generateInserts(); + generateInserts(); } - return Changed; + return true; } diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonGenMux.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonGenMux.cpp new file mode 100644 index 000000000000..c059d566709e --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonGenMux.cpp @@ -0,0 +1,319 @@ +//===--- HexagonGenMux.cpp ------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// During instruction selection, MUX instructions are generated for +// conditional assignments. Since such assignments often present an +// opportunity to predicate instructions, HexagonExpandCondsets +// expands MUXes into pairs of conditional transfers, and then proceeds +// with predication of the producers/consumers of the registers involved. +// This happens after exiting from the SSA form, but before the machine +// instruction scheduler. After the scheduler and after the register +// allocation there can be cases of pairs of conditional transfers +// resulting from a MUX where neither of them was further predicated. If +// these transfers are now placed far enough from the instruction defining +// the predicate register, they cannot use the .new form. In such cases it +// is better to collapse them back to a single MUX instruction. + +#define DEBUG_TYPE "hexmux" + +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "HexagonTargetMachine.h" + +using namespace llvm; + +namespace llvm { + FunctionPass *createHexagonGenMux(); + void initializeHexagonGenMuxPass(PassRegistry& Registry); +} + +namespace { + class HexagonGenMux : public MachineFunctionPass { + public: + static char ID; + HexagonGenMux() : MachineFunctionPass(ID), HII(0), HRI(0) { + initializeHexagonGenMuxPass(*PassRegistry::getPassRegistry()); + } + const char *getPassName() const override { + return "Hexagon generate mux instructions"; + } + void getAnalysisUsage(AnalysisUsage &AU) const override { + MachineFunctionPass::getAnalysisUsage(AU); + } + bool runOnMachineFunction(MachineFunction &MF) override; + + private: + const HexagonInstrInfo *HII; + const HexagonRegisterInfo *HRI; + + struct CondsetInfo { + unsigned PredR; + unsigned TrueX, FalseX; + CondsetInfo() : PredR(0), TrueX(UINT_MAX), FalseX(UINT_MAX) {} + }; + struct DefUseInfo { + BitVector Defs, Uses; + DefUseInfo() : Defs(), Uses() {} + DefUseInfo(const BitVector &D, const BitVector &U) : Defs(D), Uses(U) {} + }; + struct MuxInfo { + MachineBasicBlock::iterator At; + unsigned DefR, PredR; + MachineOperand *SrcT, *SrcF; + MachineInstr *Def1, *Def2; + MuxInfo(MachineBasicBlock::iterator It, unsigned DR, unsigned PR, + MachineOperand *TOp, MachineOperand *FOp, + MachineInstr *D1, MachineInstr *D2) + : At(It), DefR(DR), PredR(PR), SrcT(TOp), SrcF(FOp), Def1(D1), + Def2(D2) {} + }; + typedef DenseMap<MachineInstr*,unsigned> InstrIndexMap; + typedef DenseMap<unsigned,DefUseInfo> DefUseInfoMap; + typedef SmallVector<MuxInfo,4> MuxInfoList; + + bool isRegPair(unsigned Reg) const { + return Hexagon::DoubleRegsRegClass.contains(Reg); + } + void getSubRegs(unsigned Reg, BitVector &SRs) const; + void expandReg(unsigned Reg, BitVector &Set) const; + void getDefsUses(const MachineInstr *MI, BitVector &Defs, + BitVector &Uses) const; + void buildMaps(MachineBasicBlock &B, InstrIndexMap &I2X, + DefUseInfoMap &DUM); + bool isCondTransfer(unsigned Opc) const; + unsigned getMuxOpcode(const MachineOperand &Src1, + const MachineOperand &Src2) const; + bool genMuxInBlock(MachineBasicBlock &B); + }; + + char HexagonGenMux::ID = 0; +} + +INITIALIZE_PASS(HexagonGenMux, "hexagon-mux", + "Hexagon generate mux instructions", false, false) + + +void HexagonGenMux::getSubRegs(unsigned Reg, BitVector &SRs) const { + for (MCSubRegIterator I(Reg, HRI); I.isValid(); ++I) + SRs[*I] = true; +} + + +void HexagonGenMux::expandReg(unsigned Reg, BitVector &Set) const { + if (isRegPair(Reg)) + getSubRegs(Reg, Set); + else + Set[Reg] = true; +} + + +void HexagonGenMux::getDefsUses(const MachineInstr *MI, BitVector &Defs, + BitVector &Uses) const { + // First, get the implicit defs and uses for this instruction. + unsigned Opc = MI->getOpcode(); + const MCInstrDesc &D = HII->get(Opc); + if (const MCPhysReg *R = D.ImplicitDefs) + while (*R) + expandReg(*R++, Defs); + if (const MCPhysReg *R = D.ImplicitUses) + while (*R) + expandReg(*R++, Uses); + + // Look over all operands, and collect explicit defs and uses. + for (ConstMIOperands Mo(MI); Mo.isValid(); ++Mo) { + if (!Mo->isReg() || Mo->isImplicit()) + continue; + unsigned R = Mo->getReg(); + BitVector &Set = Mo->isDef() ? Defs : Uses; + expandReg(R, Set); + } +} + + +void HexagonGenMux::buildMaps(MachineBasicBlock &B, InstrIndexMap &I2X, + DefUseInfoMap &DUM) { + unsigned Index = 0; + unsigned NR = HRI->getNumRegs(); + BitVector Defs(NR), Uses(NR); + + for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) { + MachineInstr *MI = &*I; + I2X.insert(std::make_pair(MI, Index)); + Defs.reset(); + Uses.reset(); + getDefsUses(MI, Defs, Uses); + DUM.insert(std::make_pair(Index, DefUseInfo(Defs, Uses))); + Index++; + } +} + + +bool HexagonGenMux::isCondTransfer(unsigned Opc) const { + switch (Opc) { + case Hexagon::A2_tfrt: + case Hexagon::A2_tfrf: + case Hexagon::C2_cmoveit: + case Hexagon::C2_cmoveif: + return true; + } + return false; +} + + +unsigned HexagonGenMux::getMuxOpcode(const MachineOperand &Src1, + const MachineOperand &Src2) const { + bool IsReg1 = Src1.isReg(), IsReg2 = Src2.isReg(); + if (IsReg1) + return IsReg2 ? Hexagon::C2_mux : Hexagon::C2_muxir; + if (IsReg2) + return Hexagon::C2_muxri; + + // Neither is a register. The first source is extendable, but the second + // is not (s8). + if (Src2.isImm() && isInt<8>(Src2.getImm())) + return Hexagon::C2_muxii; + + return 0; +} + + +bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) { + bool Changed = false; + InstrIndexMap I2X; + DefUseInfoMap DUM; + buildMaps(B, I2X, DUM); + + typedef DenseMap<unsigned,CondsetInfo> CondsetMap; + CondsetMap CM; + MuxInfoList ML; + + MachineBasicBlock::iterator NextI, End = B.end(); + for (MachineBasicBlock::iterator I = B.begin(); I != End; I = NextI) { + MachineInstr *MI = &*I; + NextI = std::next(I); + unsigned Opc = MI->getOpcode(); + if (!isCondTransfer(Opc)) + continue; + unsigned DR = MI->getOperand(0).getReg(); + if (isRegPair(DR)) + continue; + + unsigned PR = MI->getOperand(1).getReg(); + unsigned Idx = I2X.lookup(MI); + CondsetMap::iterator F = CM.find(DR); + bool IfTrue = HII->isPredicatedTrue(Opc); + + // If there is no record of a conditional transfer for this register, + // or the predicate register differs, create a new record for it. + if (F != CM.end() && F->second.PredR != PR) { + CM.erase(F); + F = CM.end(); + } + if (F == CM.end()) { + auto It = CM.insert(std::make_pair(DR, CondsetInfo())); + F = It.first; + F->second.PredR = PR; + } + CondsetInfo &CI = F->second; + if (IfTrue) + CI.TrueX = Idx; + else + CI.FalseX = Idx; + if (CI.TrueX == UINT_MAX || CI.FalseX == UINT_MAX) + continue; + + // There is now a complete definition of DR, i.e. we have the predicate + // register, the definition if-true, and definition if-false. + + // First, check if both definitions are far enough from the definition + // of the predicate register. + unsigned MinX = std::min(CI.TrueX, CI.FalseX); + unsigned MaxX = std::max(CI.TrueX, CI.FalseX); + unsigned SearchX = (MaxX > 4) ? MaxX-4 : 0; + bool NearDef = false; + for (unsigned X = SearchX; X < MaxX; ++X) { + const DefUseInfo &DU = DUM.lookup(X); + if (!DU.Defs[PR]) + continue; + NearDef = true; + break; + } + if (NearDef) + continue; + + // The predicate register is not defined in the last few instructions. + // Check if the conversion to MUX is possible (either "up", i.e. at the + // place of the earlier partial definition, or "down", where the later + // definition is located). Examine all defs and uses between these two + // definitions. + // SR1, SR2 - source registers from the first and the second definition. + MachineBasicBlock::iterator It1 = B.begin(), It2 = B.begin(); + std::advance(It1, MinX); + std::advance(It2, MaxX); + MachineInstr *Def1 = It1, *Def2 = It2; + MachineOperand *Src1 = &Def1->getOperand(2), *Src2 = &Def2->getOperand(2); + unsigned SR1 = Src1->isReg() ? Src1->getReg() : 0; + unsigned SR2 = Src2->isReg() ? Src2->getReg() : 0; + bool Failure = false, CanUp = true, CanDown = true; + for (unsigned X = MinX+1; X < MaxX; X++) { + const DefUseInfo &DU = DUM.lookup(X); + if (DU.Defs[PR] || DU.Defs[DR] || DU.Uses[DR]) { + Failure = true; + break; + } + if (CanDown && DU.Defs[SR1]) + CanDown = false; + if (CanUp && DU.Defs[SR2]) + CanUp = false; + } + if (Failure || (!CanUp && !CanDown)) + continue; + + MachineOperand *SrcT = (MinX == CI.TrueX) ? Src1 : Src2; + MachineOperand *SrcF = (MinX == CI.FalseX) ? Src1 : Src2; + // Prefer "down", since this will move the MUX farther away from the + // predicate definition. + MachineBasicBlock::iterator At = CanDown ? Def2 : Def1; + ML.push_back(MuxInfo(At, DR, PR, SrcT, SrcF, Def1, Def2)); + } + + for (unsigned I = 0, N = ML.size(); I < N; ++I) { + MuxInfo &MX = ML[I]; + MachineBasicBlock &B = *MX.At->getParent(); + DebugLoc DL = MX.At->getDebugLoc(); + unsigned MxOpc = getMuxOpcode(*MX.SrcT, *MX.SrcF); + if (!MxOpc) + continue; + BuildMI(B, MX.At, DL, HII->get(MxOpc), MX.DefR) + .addReg(MX.PredR) + .addOperand(*MX.SrcT) + .addOperand(*MX.SrcF); + B.erase(MX.Def1); + B.erase(MX.Def2); + Changed = true; + } + + return Changed; +} + +bool HexagonGenMux::runOnMachineFunction(MachineFunction &MF) { + HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); + HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); + bool Changed = false; + for (auto &I : MF) + Changed |= genMuxInBlock(I); + return Changed; +} + +FunctionPass *llvm::createHexagonGenMux() { + return new HexagonGenMux(); +} + diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp index 6905c4f6d125..d9675b5173d2 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp @@ -250,7 +250,7 @@ Register HexagonGenPredicate::getPredRegFor(const Register &Reg) { unsigned NewPR = MRI->createVirtualRegister(PredRC); // For convertible instructions, do not modify them, so that they can - // be coverted later. Generate a copy from Reg to NewPR. + // be converted later. Generate a copy from Reg to NewPR. if (isConvertibleToPredForm(DefI)) { MachineBasicBlock::iterator DefIt = DefI; BuildMI(B, std::next(DefIt), DL, TII->get(TargetOpcode::COPY), NewPR) diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp index 53b6bf617e8f..d20a809d6c09 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -727,9 +727,9 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, // Phis that may feed into the loop. LoopFeederMap LoopFeederPhi; - // Check if the inital value may be zero and can be decremented in the first + // Check if the initial value may be zero and can be decremented in the first // iteration. If the value is zero, the endloop instruction will not decrement - // the loop counter, so we shoudn't generate a hardware loop in this case. + // the loop counter, so we shouldn't generate a hardware loop in this case. if (loopCountMayWrapOrUnderFlow(Start, End, Loop->getLoopPreheader(), Loop, LoopFeederPhi)) return nullptr; @@ -1288,14 +1288,14 @@ bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI, typedef MachineBasicBlock::instr_iterator instr_iterator; // Check if things are in order to begin with. - for (instr_iterator I = BumpI, E = BB->instr_end(); I != E; ++I) + for (instr_iterator I(BumpI), E = BB->instr_end(); I != E; ++I) if (&*I == CmpI) return true; // Out of order. unsigned PredR = CmpI->getOperand(0).getReg(); bool FoundBump = false; - instr_iterator CmpIt = CmpI, NextIt = std::next(CmpIt); + instr_iterator CmpIt = CmpI->getIterator(), NextIt = std::next(CmpIt); for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) { MachineInstr *In = &*I; for (unsigned i = 0, n = In->getNumOperands(); i < n; ++i) { @@ -1307,9 +1307,7 @@ bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI, } if (In == BumpI) { - instr_iterator After = BumpI; - instr_iterator From = CmpI; - BB->splice(std::next(After), BB, From); + BB->splice(++BumpI->getIterator(), BB, CmpI->getIterator()); FoundBump = true; break; } @@ -1440,7 +1438,7 @@ bool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow( if (Comparison::isSigned(Cmp)) return false; - // Check if there is a comparison of the inital value. If the initial value + // Check if there is a comparison of the initial value. If the initial value // is greater than or not equal to another value, then assume this is a // range check. if ((Cmp & Comparison::G) || Cmp == Comparison::NE) @@ -1850,7 +1848,7 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop( } MachineBasicBlock *NewPH = MF->CreateMachineBasicBlock(); - MF->insert(Header, NewPH); + MF->insert(Header->getIterator(), NewPH); if (Header->pred_size() > 2) { // Ensure that the header has only two predecessors: the preheader and diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index 9123057e60d1..a0da945e7572 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -50,16 +50,21 @@ namespace { class HexagonDAGToDAGISel : public SelectionDAGISel { const HexagonTargetMachine& HTM; const HexagonSubtarget *HST; + const HexagonInstrInfo *HII; + const HexagonRegisterInfo *HRI; public: explicit HexagonDAGToDAGISel(HexagonTargetMachine &tm, CodeGenOpt::Level OptLevel) - : SelectionDAGISel(tm, OptLevel), HTM(tm) { + : SelectionDAGISel(tm, OptLevel), HTM(tm), HST(nullptr), HII(nullptr), + HRI(nullptr) { initializeHexagonDAGToDAGISelPass(*PassRegistry::getPassRegistry()); } bool runOnMachineFunction(MachineFunction &MF) override { // Reset the subtarget each time through. HST = &MF.getSubtarget<HexagonSubtarget>(); + HII = HST->getInstrInfo(); + HRI = HST->getRegisterInfo(); SelectionDAGISel::runOnMachineFunction(MF); return true; } @@ -104,7 +109,6 @@ public: SDNode *SelectConstantFP(SDNode *N); SDNode *SelectAdd(SDNode *N); SDNode *SelectBitOp(SDNode *N); - bool isConstExtProfitable(SDNode *N) const; // XformMskToBitPosU5Imm - Returns the bit position which // the single bit 32 bit mask represents. @@ -139,8 +143,8 @@ public: // type i32 where the negative literal is transformed into a positive literal // for use in -= memops. inline SDValue XformM5ToU5Imm(signed Imm, SDLoc DL) { - assert( (Imm >= -31 && Imm <= -1) && "Constant out of range for Memops"); - return CurDAG->getTargetConstant( - Imm, DL, MVT::i32); + assert((Imm >= -31 && Imm <= -1) && "Constant out of range for Memops"); + return CurDAG->getTargetConstant(-Imm, DL, MVT::i32); } // XformU7ToU7M1Imm - Return a target constant decremented by 1, in range @@ -203,11 +207,10 @@ void llvm::initializeHexagonDAGToDAGISelPass(PassRegistry &Registry) { // Intrinsics that return a a predicate. -static unsigned doesIntrinsicReturnPredicate(unsigned ID) -{ +static bool doesIntrinsicReturnPredicate(unsigned ID) { switch (ID) { default: - return 0; + return false; case Intrinsic::hexagon_C2_cmpeq: case Intrinsic::hexagon_C2_cmpgt: case Intrinsic::hexagon_C2_cmpgtu: @@ -244,7 +247,7 @@ static unsigned doesIntrinsicReturnPredicate(unsigned ID) case Intrinsic::hexagon_C2_tfrrp: case Intrinsic::hexagon_S2_tstbit_i: case Intrinsic::hexagon_S2_tstbit_r: - return 1; + return true; } } @@ -258,8 +261,7 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoadSignExtend64(LoadSDNode *LD, SDNode *OffsetNode = Offset.getNode(); int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue(); - const HexagonInstrInfo &TII = *HST->getInstrInfo(); - if (TII.isValidAutoIncImm(LoadedVT, Val)) { + if (HII->isValidAutoIncImm(LoadedVT, Val)) { SDValue TargetConst = CurDAG->getTargetConstant(Val, dl, MVT::i32); SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32, MVT::Other, Base, TargetConst, @@ -312,8 +314,7 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoadZeroExtend64(LoadSDNode *LD, SDNode *OffsetNode = Offset.getNode(); int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue(); - const HexagonInstrInfo &TII = *HST->getInstrInfo(); - if (TII.isValidAutoIncImm(LoadedVT, Val)) { + if (HII->isValidAutoIncImm(LoadedVT, Val)) { SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32); SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32); SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, @@ -378,29 +379,46 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, SDLoc dl) { // loads. ISD::LoadExtType ExtType = LD->getExtensionType(); bool IsZeroExt = (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD); + bool HasVecOffset = false; // Figure out the opcode. - const HexagonInstrInfo &TII = *HST->getInstrInfo(); if (LoadedVT == MVT::i64) { - if (TII.isValidAutoIncImm(LoadedVT, Val)) + if (HII->isValidAutoIncImm(LoadedVT, Val)) Opcode = Hexagon::L2_loadrd_pi; else Opcode = Hexagon::L2_loadrd_io; } else if (LoadedVT == MVT::i32) { - if (TII.isValidAutoIncImm(LoadedVT, Val)) + if (HII->isValidAutoIncImm(LoadedVT, Val)) Opcode = Hexagon::L2_loadri_pi; else Opcode = Hexagon::L2_loadri_io; } else if (LoadedVT == MVT::i16) { - if (TII.isValidAutoIncImm(LoadedVT, Val)) + if (HII->isValidAutoIncImm(LoadedVT, Val)) Opcode = IsZeroExt ? Hexagon::L2_loadruh_pi : Hexagon::L2_loadrh_pi; else Opcode = IsZeroExt ? Hexagon::L2_loadruh_io : Hexagon::L2_loadrh_io; } else if (LoadedVT == MVT::i8) { - if (TII.isValidAutoIncImm(LoadedVT, Val)) + if (HII->isValidAutoIncImm(LoadedVT, Val)) Opcode = IsZeroExt ? Hexagon::L2_loadrub_pi : Hexagon::L2_loadrb_pi; else Opcode = IsZeroExt ? Hexagon::L2_loadrub_io : Hexagon::L2_loadrb_io; + } else if (LoadedVT == MVT::v16i32 || LoadedVT == MVT::v8i64 || + LoadedVT == MVT::v32i16 || LoadedVT == MVT::v64i8) { + HasVecOffset = true; + if (HII->isValidAutoIncImm(LoadedVT, Val)) { + Opcode = Hexagon::V6_vL32b_pi; + } + else + Opcode = Hexagon::V6_vL32b_ai; + // 128B + } else if (LoadedVT == MVT::v32i32 || LoadedVT == MVT::v16i64 || + LoadedVT == MVT::v64i16 || LoadedVT == MVT::v128i8) { + HasVecOffset = true; + if (HII->isValidAutoIncImm(LoadedVT, Val)) { + Opcode = Hexagon::V6_vL32b_pi_128B; + } + else + Opcode = Hexagon::V6_vL32b_ai_128B; } else llvm_unreachable("unknown memory type"); @@ -411,7 +429,7 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, SDLoc dl) { if (LD->getValueType(0) == MVT::i64 && ExtType == ISD::SEXTLOAD) return SelectIndexedLoadSignExtend64(LD, Opcode, dl); - if (TII.isValidAutoIncImm(LoadedVT, Val)) { + if (HII->isValidAutoIncImm(LoadedVT, Val)) { SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32); SDNode* Result = CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0), @@ -420,15 +438,25 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, SDLoc dl) { MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = LD->getMemOperand(); cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1); - const SDValue Froms[] = { SDValue(LD, 0), - SDValue(LD, 1), - SDValue(LD, 2) - }; - const SDValue Tos[] = { SDValue(Result, 0), - SDValue(Result, 1), - SDValue(Result, 2) - }; - ReplaceUses(Froms, Tos, 3); + if (HasVecOffset) { + const SDValue Froms[] = { SDValue(LD, 0), + SDValue(LD, 2) + }; + const SDValue Tos[] = { SDValue(Result, 0), + SDValue(Result, 2) + }; + ReplaceUses(Froms, Tos, 2); + } else { + const SDValue Froms[] = { SDValue(LD, 0), + SDValue(LD, 1), + SDValue(LD, 2) + }; + const SDValue Tos[] = { SDValue(Result, 0), + SDValue(Result, 1), + SDValue(Result, 2) + }; + ReplaceUses(Froms, Tos, 3); + } return Result; } else { SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32); @@ -487,8 +515,7 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) { // Offset value must be within representable range // and must have correct alignment properties. - const HexagonInstrInfo &TII = *HST->getInstrInfo(); - if (TII.isValidAutoIncImm(StoredVT, Val)) { + if (HII->isValidAutoIncImm(StoredVT, Val)) { unsigned Opcode = 0; // Figure out the post inc version of opcode. @@ -496,7 +523,15 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) { else if (StoredVT == MVT::i32) Opcode = Hexagon::S2_storeri_pi; else if (StoredVT == MVT::i16) Opcode = Hexagon::S2_storerh_pi; else if (StoredVT == MVT::i8) Opcode = Hexagon::S2_storerb_pi; - else llvm_unreachable("unknown memory type"); + else if (StoredVT == MVT::v16i32 || StoredVT == MVT::v8i64 || + StoredVT == MVT::v32i16 || StoredVT == MVT::v64i8) { + Opcode = Hexagon::V6_vS32b_pi; + } + // 128B + else if (StoredVT == MVT::v32i32 || StoredVT == MVT::v16i64 || + StoredVT == MVT::v64i16 || StoredVT == MVT::v128i8) { + Opcode = Hexagon::V6_vS32b_pi_128B; + } else llvm_unreachable("unknown memory type"); if (ST->isTruncatingStore() && ValueVT.getSizeInBits() == 64) { assert(StoredVT.getSizeInBits() < 64 && "Not a truncating store"); @@ -530,6 +565,13 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) { else if (StoredVT == MVT::i32) Opcode = Hexagon::S2_storeri_io; else if (StoredVT == MVT::i16) Opcode = Hexagon::S2_storerh_io; else if (StoredVT == MVT::i8) Opcode = Hexagon::S2_storerb_io; + else if (StoredVT == MVT::v16i32 || StoredVT == MVT::v8i64 || + StoredVT == MVT::v32i16 || StoredVT == MVT::v64i8) + Opcode = Hexagon::V6_vS32b_ai; + // 128B + else if (StoredVT == MVT::v32i32 || StoredVT == MVT::v16i64 || + StoredVT == MVT::v64i16 || StoredVT == MVT::v128i8) + Opcode = Hexagon::V6_vS32b_ai_128B; else llvm_unreachable("unknown memory type"); // Build regular store. @@ -1113,14 +1155,12 @@ SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) { } if (Opc == ISD::AND) { - if (((ValueVT == MVT::i32) && - (!((Val & 0x80000000) || (Val & 0x7fffffff)))) || - ((ValueVT == MVT::i64) && - (!((Val & 0x8000000000000000) || (Val & 0x7fffffff))))) - // If it's simple AND, do the normal op. - return SelectCode(N); - else + // Check if this is a bit-clearing AND, if not select code the usual way. + if ((ValueVT == MVT::i32 && isPowerOf2_32(~Val)) || + (ValueVT == MVT::i64 && isPowerOf2_64(~Val))) Val = ~Val; + else + return SelectCode(N); } // If OR or AND is being fed by shl, srl and, sra don't do this change, @@ -1128,7 +1168,8 @@ SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) { // Traverse the DAG to see if there is shl, srl and sra. if (Opc == ISD::OR || Opc == ISD::AND) { switch (N->getOperand(0)->getOpcode()) { - default: break; + default: + break; case ISD::SRA: case ISD::SRL: case ISD::SHL: @@ -1137,23 +1178,24 @@ SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) { } // Make sure it's power of 2. - unsigned bitpos = 0; + unsigned BitPos = 0; if (Opc != ISD::FABS && Opc != ISD::FNEG) { - if (((ValueVT == MVT::i32) && !isPowerOf2_32(Val)) || - ((ValueVT == MVT::i64) && !isPowerOf2_64(Val))) + if ((ValueVT == MVT::i32 && !isPowerOf2_32(Val)) || + (ValueVT == MVT::i64 && !isPowerOf2_64(Val))) return SelectCode(N); // Get the bit position. - bitpos = countTrailingZeros(uint64_t(Val)); + BitPos = countTrailingZeros(uint64_t(Val)); } else { // For fabs and fneg, it's always the 31st bit. - bitpos = 31; + BitPos = 31; } unsigned BitOpc = 0; // Set the right opcode for bitwise operations. - switch(Opc) { - default: llvm_unreachable("Only bit-wise/abs/neg operations are allowed."); + switch (Opc) { + default: + llvm_unreachable("Only bit-wise/abs/neg operations are allowed."); case ISD::AND: case ISD::FABS: BitOpc = Hexagon::S2_clrbit_i; @@ -1169,7 +1211,7 @@ SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) { SDNode *Result; // Get the right SDVal for the opcode. - SDValue SDVal = CurDAG->getTargetConstant(bitpos, dl, MVT::i32); + SDValue SDVal = CurDAG->getTargetConstant(BitPos, dl, MVT::i32); if (ValueVT == MVT::i32 || ValueVT == MVT::f32) { Result = CurDAG->getMachineNode(BitOpc, dl, ValueVT, @@ -1198,7 +1240,7 @@ SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) { MVT::i32, SDValue(Reg, 0)); // Clear/set/toggle hi or lo registers depending on the bit position. - if (SubValueVT != MVT::f32 && bitpos < 32) { + if (SubValueVT != MVT::f32 && BitPos < 32) { SDNode *Result0 = CurDAG->getMachineNode(BitOpc, dl, SubValueVT, SubregLO, SDVal); const SDValue Ops[] = { RegClass, SubregHI, SubregHiIdx, @@ -1207,7 +1249,7 @@ SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) { dl, ValueVT, Ops); } else { if (Opc != ISD::FABS && Opc != ISD::FNEG) - SDVal = CurDAG->getTargetConstant(bitpos - 32, dl, MVT::i32); + SDVal = CurDAG->getTargetConstant(BitPos-32, dl, MVT::i32); SDNode *Result0 = CurDAG->getMachineNode(BitOpc, dl, SubValueVT, SubregHI, SDVal); const SDValue Ops[] = { RegClass, SDValue(Result0, 0), SubregHiIdx, @@ -1328,25 +1370,12 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, return false; } -bool HexagonDAGToDAGISel::isConstExtProfitable(SDNode *N) const { - unsigned UseCount = 0; - unsigned CallCount = 0; - for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { - // Ignore call instructions. - if (I->getOpcode() == ISD::CopyToReg) - ++CallCount; - UseCount++; - } - - return (UseCount <= 1) || (CallCount > 1); - -} void HexagonDAGToDAGISel::PreprocessISelDAG() { SelectionDAG &DAG = *CurDAG; std::vector<SDNode*> Nodes; - for (auto I = DAG.allnodes_begin(), E = DAG.allnodes_end(); I != E; ++I) - Nodes.push_back(I); + for (SDNode &Node : DAG.allnodes()) + Nodes.push_back(&Node); // Simplify: (or (select c x 0) z) -> (select c (or x z) z) // (or (select c 0 y) z) -> (select c z (or y z)) @@ -1397,11 +1426,10 @@ void HexagonDAGToDAGISel::EmitFunctionEntryCode() { return; MachineFrameInfo *MFI = MF->getFrameInfo(); - MachineBasicBlock *EntryBB = MF->begin(); + MachineBasicBlock *EntryBB = &MF->front(); unsigned AR = FuncInfo->CreateReg(MVT::i32); unsigned MaxA = MFI->getMaxAlignment(); - auto &HII = *HST.getInstrInfo(); - BuildMI(EntryBB, DebugLoc(), HII.get(Hexagon::ALIGNA), AR) + BuildMI(EntryBB, DebugLoc(), HII->get(Hexagon::ALIGNA), AR) .addImm(MaxA); MF->getInfo<HexagonMachineFunctionInfo>()->setStackAlignBaseVReg(AR); } diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index c739afb70c15..01670902e2b0 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -41,8 +41,8 @@ using namespace llvm; #define DEBUG_TYPE "hexagon-lowering" -static cl::opt<bool> -EmitJumpTables("hexagon-emit-jump-tables", cl::init(true), cl::Hidden, +static cl::opt<bool> EmitJumpTables("hexagon-emit-jump-tables", + cl::init(true), cl::Hidden, cl::desc("Control jump table emission on Hexagon target")); static cl::opt<bool> EnableHexSDNodeSched("enable-hexagon-sdnode-sched", @@ -98,6 +98,9 @@ public: } // Implement calling convention for Hexagon. + +static bool IsHvxVectorType(MVT ty); + static bool CC_Hexagon(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, @@ -114,6 +117,11 @@ CC_Hexagon64(unsigned ValNo, MVT ValVT, ISD::ArgFlagsTy ArgFlags, CCState &State); static bool +CC_HexagonVector(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State); + +static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State); @@ -129,6 +137,11 @@ RetCC_Hexagon64(unsigned ValNo, MVT ValVT, ISD::ArgFlagsTy ArgFlags, CCState &State); static bool +RetCC_HexagonVector(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State); + +static bool CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { @@ -169,15 +182,43 @@ CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT, State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); return false; } + if (LocVT == MVT::v2i64 || LocVT == MVT::v4i32 || LocVT == MVT::v8i16 || + LocVT == MVT::v16i8) { + ofst = State.AllocateStack(16, 16); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); + return false; + } + if (LocVT == MVT::v4i64 || LocVT == MVT::v8i32 || LocVT == MVT::v16i16 || + LocVT == MVT::v32i8) { + ofst = State.AllocateStack(32, 32); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); + return false; + } + if (LocVT == MVT::v8i64 || LocVT == MVT::v16i32 || LocVT == MVT::v32i16 || + LocVT == MVT::v64i8 || LocVT == MVT::v512i1) { + ofst = State.AllocateStack(64, 64); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); + return false; + } + if (LocVT == MVT::v16i64 || LocVT == MVT::v32i32 || LocVT == MVT::v64i16 || + LocVT == MVT::v128i8 || LocVT == MVT::v1024i1) { + ofst = State.AllocateStack(128, 128); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); + return false; + } + if (LocVT == MVT::v32i64 || LocVT == MVT::v64i32 || LocVT == MVT::v128i16 || + LocVT == MVT::v256i8) { + ofst = State.AllocateStack(256, 256); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); + return false; + } + llvm_unreachable(nullptr); } -static bool -CC_Hexagon (unsigned ValNo, MVT ValVT, - MVT LocVT, CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State) { - +static bool CC_Hexagon (unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { if (ArgFlags.isByVal()) { // Passed on stack. unsigned Offset = State.AllocateStack(ArgFlags.getByValSize(), @@ -213,6 +254,17 @@ CC_Hexagon (unsigned ValNo, MVT ValVT, return false; } + if (LocVT == MVT::v8i32 || LocVT == MVT::v16i16 || LocVT == MVT::v32i8) { + unsigned Offset = State.AllocateStack(ArgFlags.getByValSize(), 32); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; + } + + if (IsHvxVectorType(LocVT)) { + if (!CC_HexagonVector(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State)) + return false; + } + return true; // CC didn't match. } @@ -260,10 +312,82 @@ static bool CC_Hexagon64(unsigned ValNo, MVT ValVT, return false; } +static bool CC_HexagonVector(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + + static const MCPhysReg VecLstS[] = { Hexagon::V0, Hexagon::V1, + Hexagon::V2, Hexagon::V3, + Hexagon::V4, Hexagon::V5, + Hexagon::V6, Hexagon::V7, + Hexagon::V8, Hexagon::V9, + Hexagon::V10, Hexagon::V11, + Hexagon::V12, Hexagon::V13, + Hexagon::V14, Hexagon::V15}; + static const MCPhysReg VecLstD[] = { Hexagon::W0, Hexagon::W1, + Hexagon::W2, Hexagon::W3, + Hexagon::W4, Hexagon::W5, + Hexagon::W6, Hexagon::W7}; + auto &MF = State.getMachineFunction(); + auto &HST = MF.getSubtarget<HexagonSubtarget>(); + bool UseHVX = HST.useHVXOps(); + bool UseHVXDbl = HST.useHVXDblOps(); + + if ((UseHVX && !UseHVXDbl) && + (LocVT == MVT::v8i64 || LocVT == MVT::v16i32 || LocVT == MVT::v32i16 || + LocVT == MVT::v64i8 || LocVT == MVT::v512i1)) { + if (unsigned Reg = State.AllocateReg(VecLstS)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + unsigned Offset = State.AllocateStack(64, 64); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; + } + if ((UseHVX && !UseHVXDbl) && + (LocVT == MVT::v16i64 || LocVT == MVT::v32i32 || LocVT == MVT::v64i16 || + LocVT == MVT::v128i8)) { + if (unsigned Reg = State.AllocateReg(VecLstD)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + unsigned Offset = State.AllocateStack(128, 128); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; + } + // 128B Mode + if ((UseHVX && UseHVXDbl) && + (LocVT == MVT::v32i64 || LocVT == MVT::v64i32 || LocVT == MVT::v128i16 || + LocVT == MVT::v256i8)) { + if (unsigned Reg = State.AllocateReg(VecLstD)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + unsigned Offset = State.AllocateStack(256, 256); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; + } + if ((UseHVX && UseHVXDbl) && + (LocVT == MVT::v16i64 || LocVT == MVT::v32i32 || LocVT == MVT::v64i16 || + LocVT == MVT::v128i8 || LocVT == MVT::v1024i1)) { + if (unsigned Reg = State.AllocateReg(VecLstS)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + unsigned Offset = State.AllocateStack(128, 128); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; + } + return true; +} + static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { - + auto &MF = State.getMachineFunction(); + auto &HST = MF.getSubtarget<HexagonSubtarget>(); + bool UseHVX = HST.useHVXOps(); + bool UseHVXDbl = HST.useHVXDblOps(); if (LocVT == MVT::i1 || LocVT == MVT::i8 || @@ -282,8 +406,24 @@ static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT, } else if (LocVT == MVT::v8i8 || LocVT == MVT::v4i16 || LocVT == MVT::v2i32) { LocVT = MVT::i64; LocInfo = CCValAssign::BCvt; + } else if (LocVT == MVT::v64i8 || LocVT == MVT::v32i16 || + LocVT == MVT::v16i32 || LocVT == MVT::v8i64 || + LocVT == MVT::v512i1) { + LocVT = MVT::v16i32; + ValVT = MVT::v16i32; + LocInfo = CCValAssign::Full; + } else if (LocVT == MVT::v128i8 || LocVT == MVT::v64i16 || + LocVT == MVT::v32i32 || LocVT == MVT::v16i64 || + (LocVT == MVT::v1024i1 && UseHVX && UseHVXDbl)) { + LocVT = MVT::v32i32; + ValVT = MVT::v32i32; + LocInfo = CCValAssign::Full; + } else if (LocVT == MVT::v256i8 || LocVT == MVT::v128i16 || + LocVT == MVT::v64i32 || LocVT == MVT::v32i64) { + LocVT = MVT::v64i32; + ValVT = MVT::v64i32; + LocInfo = CCValAssign::Full; } - if (LocVT == MVT::i32 || LocVT == MVT::f32) { if (!RetCC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State)) return false; @@ -293,7 +433,10 @@ static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT, if (!RetCC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State)) return false; } - + if (LocVT == MVT::v16i32 || LocVT == MVT::v32i32 || LocVT == MVT::v64i32) { + if (!RetCC_HexagonVector(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State)) + return false; + } return true; // CC didn't match. } @@ -328,6 +471,52 @@ static bool RetCC_Hexagon64(unsigned ValNo, MVT ValVT, return false; } +static bool RetCC_HexagonVector(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + auto &MF = State.getMachineFunction(); + auto &HST = MF.getSubtarget<HexagonSubtarget>(); + bool UseHVX = HST.useHVXOps(); + bool UseHVXDbl = HST.useHVXDblOps(); + + unsigned OffSiz = 64; + if (LocVT == MVT::v16i32) { + if (unsigned Reg = State.AllocateReg(Hexagon::V0)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + } else if (LocVT == MVT::v32i32) { + unsigned Req = (UseHVX && UseHVXDbl) ? Hexagon::V0 : Hexagon::W0; + if (unsigned Reg = State.AllocateReg(Req)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + OffSiz = 128; + } else if (LocVT == MVT::v64i32) { + if (unsigned Reg = State.AllocateReg(Hexagon::W0)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + OffSiz = 256; + } + + unsigned Offset = State.AllocateStack(OffSiz, OffSiz); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; +} + +void HexagonTargetLowering::promoteLdStType(EVT VT, EVT PromotedLdStVT) { + if (VT != PromotedLdStVT) { + setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote); + AddPromotedToType(ISD::LOAD, VT.getSimpleVT(), + PromotedLdStVT.getSimpleVT()); + + setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote); + AddPromotedToType(ISD::STORE, VT.getSimpleVT(), + PromotedLdStVT.getSimpleVT()); + } +} + SDValue HexagonTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { @@ -351,6 +540,15 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, MachinePointerInfo(), MachinePointerInfo()); } +static bool IsHvxVectorType(MVT ty) { + return (ty == MVT::v8i64 || ty == MVT::v16i32 || ty == MVT::v32i16 || + ty == MVT::v64i8 || + ty == MVT::v16i64 || ty == MVT::v32i32 || ty == MVT::v64i16 || + ty == MVT::v128i8 || + ty == MVT::v32i64 || ty == MVT::v64i32 || ty == MVT::v128i16 || + ty == MVT::v256i8 || + ty == MVT::v512i1 || ty == MVT::v1024i1); +} // LowerReturn - Lower ISD::RET. If a struct is larger than 8 bytes and is // passed by value, the function prototype is modified to return void and @@ -463,19 +661,15 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Check for varargs. int NumNamedVarArgParams = -1; - if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Callee)) - { - const Function* CalleeFn = nullptr; - Callee = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, MVT::i32); - if ((CalleeFn = dyn_cast<Function>(GA->getGlobal()))) - { + if (GlobalAddressSDNode *GAN = dyn_cast<GlobalAddressSDNode>(Callee)) { + const GlobalValue *GV = GAN->getGlobal(); + Callee = DAG.getTargetGlobalAddress(GV, dl, MVT::i32); + if (const Function* F = dyn_cast<Function>(GV)) { // If a function has zero args and is a vararg function, that's // disallowed so it must be an undeclared function. Do not assume // varargs if the callee is undefined. - if (CalleeFn->isVarArg() && - CalleeFn->getFunctionType()->getNumParams() != 0) { - NumNamedVarArgParams = CalleeFn->getFunctionType()->getNumParams(); - } + if (F->isVarArg() && F->getFunctionType()->getNumParams() != 0) + NumNamedVarArgParams = F->getFunctionType()->getNumParams(); } } @@ -519,11 +713,16 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, HRI.getStackRegister(), PtrVT); + bool NeedsArgAlign = false; + unsigned LargestAlignSeen = 0; // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; + // Record if we need > 8 byte alignment on an argument. + bool ArgAlign = IsHvxVectorType(VA.getValVT()); + NeedsArgAlign |= ArgAlign; // Promote the value if needed. switch (VA.getLocInfo()) { @@ -549,13 +748,17 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDValue MemAddr = DAG.getConstant(LocMemOffset, dl, StackPtr.getValueType()); MemAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, MemAddr); + if (ArgAlign) + LargestAlignSeen = std::max(LargestAlignSeen, + VA.getLocVT().getStoreSizeInBits() >> 3); if (Flags.isByVal()) { // The argument is a struct passed by value. According to LLVM, "Arg" // is is pointer. MemOpChains.push_back(CreateCopyOfByValArgument(Arg, MemAddr, Chain, Flags, DAG, dl)); } else { - MachinePointerInfo LocPI = MachinePointerInfo::getStack(LocMemOffset); + MachinePointerInfo LocPI = MachinePointerInfo::getStack( + DAG.getMachineFunction(), LocMemOffset); SDValue S = DAG.getStore(Chain, dl, Arg, MemAddr, LocPI, false, false, 0); MemOpChains.push_back(S); @@ -569,6 +772,17 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); } + if (NeedsArgAlign && Subtarget.hasV60TOps()) { + DEBUG(dbgs() << "Function needs byte stack align due to call args\n"); + MachineFrameInfo* MFI = DAG.getMachineFunction().getFrameInfo(); + // V6 vectors passed by value have 64 or 128 byte alignment depending + // on whether we are 64 byte vector mode or 128 byte. + bool UseHVXDbl = Subtarget.useHVXDblOps(); + assert(Subtarget.useHVXOps()); + const unsigned ObjAlign = UseHVXDbl ? 128 : 64; + LargestAlignSeen = std::max(LargestAlignSeen, ObjAlign); + MFI->ensureMaxAlignment(LargestAlignSeen); + } // Transform all store nodes into one single node because all store // nodes are independent of each other. if (!MemOpChains.empty()) @@ -613,12 +827,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol // node so that legalize doesn't hack it. - if (flag_aligned_memcpy) { - const char *MemcpyName = - "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes"; - Callee = DAG.getTargetExternalSymbol(MemcpyName, PtrVT); - flag_aligned_memcpy = false; - } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, PtrVT); } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { @@ -668,7 +877,19 @@ static bool getIndexedAddressParts(SDNode *Ptr, EVT VT, if (Ptr->getOpcode() != ISD::ADD) return false; - if (VT == MVT::i64 || VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) { + auto &HST = static_cast<const HexagonSubtarget&>(DAG.getSubtarget()); + bool UseHVX = HST.useHVXOps(); + bool UseHVXDbl = HST.useHVXDblOps(); + + bool ValidHVXDblType = + (UseHVX && UseHVXDbl) && (VT == MVT::v32i32 || VT == MVT::v16i64 || + VT == MVT::v64i16 || VT == MVT::v128i8); + bool ValidHVXType = + UseHVX && !UseHVXDbl && (VT == MVT::v16i32 || VT == MVT::v8i64 || + VT == MVT::v32i16 || VT == MVT::v64i8); + + if (ValidHVXDblType || ValidHVXType || + VT == MVT::i64 || VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) { isInc = (Ptr->getOpcode() == ISD::ADD); Base = Ptr->getOperand(0); Offset = Ptr->getOperand(1); @@ -679,23 +900,6 @@ static bool getIndexedAddressParts(SDNode *Ptr, EVT VT, return false; } -// TODO: Put this function along with the other isS* functions in -// HexagonISelDAGToDAG.cpp into a common file. Or better still, use the -// functions defined in HexagonOperands.td. -static bool Is_PostInc_S4_Offset(SDNode * S, int ShiftAmount) { - ConstantSDNode *N = cast<ConstantSDNode>(S); - - // immS4 predicate - True if the immediate fits in a 4-bit sign extended. - // field. - int64_t v = (int64_t)N->getSExtValue(); - int64_t m = 0; - if (ShiftAmount > 0) { - m = v % ShiftAmount; - v = v >> ShiftAmount; - } - return (v <= 7) && (v >= -8) && (m == 0); -} - /// getPostIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if this node can be /// combined with a load / store to form a post-indexed load / store. @@ -724,18 +928,20 @@ bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, bool isInc = false; bool isLegal = getIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, isInc, DAG); - // ShiftAmount = number of left-shifted bits in the Hexagon instruction. - int ShiftAmount = VT.getSizeInBits() / 16; - if (isLegal && Is_PostInc_S4_Offset(Offset.getNode(), ShiftAmount)) { - AM = isInc ? ISD::POST_INC : ISD::POST_DEC; - return true; + if (isLegal) { + auto &HII = *Subtarget.getInstrInfo(); + int32_t OffsetVal = cast<ConstantSDNode>(Offset.getNode())->getSExtValue(); + if (HII.isValidAutoIncImm(VT, OffsetVal)) { + AM = isInc ? ISD::POST_INC : ISD::POST_DEC; + return true; + } } return false; } -SDValue HexagonTargetLowering::LowerINLINEASM(SDValue Op, - SelectionDAG &DAG) const { +SDValue +HexagonTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); MachineFunction &MF = DAG.getMachineFunction(); auto &FuncInfo = *MF.getInfo<HexagonMachineFunctionInfo>(); @@ -784,47 +990,6 @@ SDValue HexagonTargetLowering::LowerINLINEASM(SDValue Op, return Op; } - -// -// Taken from the XCore backend. -// -SDValue HexagonTargetLowering:: -LowerBR_JT(SDValue Op, SelectionDAG &DAG) const -{ - SDValue Chain = Op.getOperand(0); - SDValue Table = Op.getOperand(1); - SDValue Index = Op.getOperand(2); - SDLoc dl(Op); - JumpTableSDNode *JT = cast<JumpTableSDNode>(Table); - unsigned JTI = JT->getIndex(); - MachineFunction &MF = DAG.getMachineFunction(); - const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); - SDValue TargetJT = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32); - - // Mark all jump table targets as address taken. - const std::vector<MachineJumpTableEntry> &JTE = MJTI->getJumpTables(); - const std::vector<MachineBasicBlock*> &JTBBs = JTE[JTI].MBBs; - for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) { - MachineBasicBlock *MBB = JTBBs[i]; - MBB->setHasAddressTaken(); - // This line is needed to set the hasAddressTaken flag on the BasicBlock - // object. - BlockAddress::get(const_cast<BasicBlock *>(MBB->getBasicBlock())); - } - - SDValue JumpTableBase = DAG.getNode( - HexagonISD::JT, dl, getPointerTy(DAG.getDataLayout()), TargetJT); - SDValue ShiftIndex = DAG.getNode(ISD::SHL, dl, MVT::i32, Index, - DAG.getConstant(2, dl, MVT::i32)); - SDValue JTAddress = DAG.getNode(ISD::ADD, dl, MVT::i32, JumpTableBase, - ShiftIndex); - SDValue LoadTarget = DAG.getLoad(MVT::i32, dl, Chain, JTAddress, - MachinePointerInfo(), false, false, false, - 0); - return DAG.getNode(HexagonISD::BR_JT, dl, MVT::Other, Chain, LoadTarget); -} - - SDValue HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { @@ -850,7 +1015,10 @@ HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SDValue AC = DAG.getConstant(A, dl, MVT::i32); SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); - return DAG.getNode(HexagonISD::ALLOCA, dl, VTs, Chain, Size, AC); + SDValue AA = DAG.getNode(HexagonISD::ALLOCA, dl, VTs, Chain, Size, AC); + if (Op.getNode()->getHasDebugValue()) + DAG.TransferDbgValues(Op, AA); + return AA; } SDValue @@ -882,7 +1050,8 @@ const { // equal to) 8 bytes. If not, no address will be passed into callee and // callee return the result direclty through R0/R1. - SmallVector<SDValue, 4> MemOps; + SmallVector<SDValue, 8> MemOps; + bool UseHVX = Subtarget.useHVXOps(), UseHVXDbl = Subtarget.useHVXDblOps(); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; @@ -908,6 +1077,42 @@ const { RegInfo.createVirtualRegister(&Hexagon::DoubleRegsRegClass); RegInfo.addLiveIn(VA.getLocReg(), VReg); InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); + + // Single Vector + } else if ((RegVT == MVT::v8i64 || RegVT == MVT::v16i32 || + RegVT == MVT::v32i16 || RegVT == MVT::v64i8)) { + unsigned VReg = + RegInfo.createVirtualRegister(&Hexagon::VectorRegsRegClass); + RegInfo.addLiveIn(VA.getLocReg(), VReg); + InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); + } else if (UseHVX && UseHVXDbl && + ((RegVT == MVT::v16i64 || RegVT == MVT::v32i32 || + RegVT == MVT::v64i16 || RegVT == MVT::v128i8))) { + unsigned VReg = + RegInfo.createVirtualRegister(&Hexagon::VectorRegs128BRegClass); + RegInfo.addLiveIn(VA.getLocReg(), VReg); + InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); + + // Double Vector + } else if ((RegVT == MVT::v16i64 || RegVT == MVT::v32i32 || + RegVT == MVT::v64i16 || RegVT == MVT::v128i8)) { + unsigned VReg = + RegInfo.createVirtualRegister(&Hexagon::VecDblRegsRegClass); + RegInfo.addLiveIn(VA.getLocReg(), VReg); + InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); + } else if (UseHVX && UseHVXDbl && + ((RegVT == MVT::v32i64 || RegVT == MVT::v64i32 || + RegVT == MVT::v128i16 || RegVT == MVT::v256i8))) { + unsigned VReg = + RegInfo.createVirtualRegister(&Hexagon::VecDblRegs128BRegClass); + RegInfo.addLiveIn(VA.getLocReg(), VReg); + InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); + } else if (RegVT == MVT::v512i1 || RegVT == MVT::v1024i1) { + assert(0 && "need to support VecPred regs"); + unsigned VReg = + RegInfo.createVirtualRegister(&Hexagon::VecPredRegsRegClass); + RegInfo.addLiveIn(VA.getLocReg(), VReg); + InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); } else { assert (0); } @@ -1056,8 +1261,8 @@ SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } -SDValue HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) - const { +SDValue +HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue PredOp = Op.getOperand(0); SDValue Op1 = Op.getOperand(1), Op2 = Op.getOperand(2); EVT OpVT = Op1.getValueType(); @@ -1163,16 +1368,33 @@ SDValue HexagonTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDValue HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { EVT ValTy = Op.getValueType(); - SDLoc dl(Op); - ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); - SDValue Res; - if (CP->isMachineConstantPoolEntry()) - Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), ValTy, - CP->getAlignment()); + ConstantPoolSDNode *CPN = cast<ConstantPoolSDNode>(Op); + unsigned Align = CPN->getAlignment(); + Reloc::Model RM = HTM.getRelocationModel(); + unsigned char TF = (RM == Reloc::PIC_) ? HexagonII::MO_PCREL : 0; + + SDValue T; + if (CPN->isMachineConstantPoolEntry()) + T = DAG.getTargetConstantPool(CPN->getMachineCPVal(), ValTy, Align, TF); else - Res = DAG.getTargetConstantPool(CP->getConstVal(), ValTy, - CP->getAlignment()); - return DAG.getNode(HexagonISD::CP, dl, ValTy, Res); + T = DAG.getTargetConstantPool(CPN->getConstVal(), ValTy, Align, TF); + if (RM == Reloc::PIC_) + return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), ValTy, T); + return DAG.getNode(HexagonISD::CP, SDLoc(Op), ValTy, T); +} + +SDValue +HexagonTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + int Idx = cast<JumpTableSDNode>(Op)->getIndex(); + Reloc::Model RM = HTM.getRelocationModel(); + if (RM == Reloc::PIC_) { + SDValue T = DAG.getTargetJumpTable(Idx, VT, HexagonII::MO_PCREL); + return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), VT, T); + } + + SDValue T = DAG.getTargetJumpTable(Idx, VT); + return DAG.getNode(HexagonISD::JT, SDLoc(Op), VT, T); } SDValue @@ -1219,52 +1441,70 @@ HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { return FrameAddr; } -SDValue HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op, - SelectionDAG& DAG) const { +SDValue +HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const { SDLoc dl(Op); return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other, Op.getOperand(0)); } -SDValue HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, - SelectionDAG &DAG) const { - SDValue Result; - const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); - int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset(); +SDValue +HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); + auto *GAN = cast<GlobalAddressSDNode>(Op); auto PtrVT = getPointerTy(DAG.getDataLayout()); - Result = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset); + auto *GV = GAN->getGlobal(); + int64_t Offset = GAN->getOffset(); + + auto &HLOF = *HTM.getObjFileLowering(); + Reloc::Model RM = HTM.getRelocationModel(); - const HexagonTargetObjectFile *TLOF = - static_cast<const HexagonTargetObjectFile *>( - getTargetMachine().getObjFileLowering()); - if (TLOF->IsGlobalInSmallSection(GV, getTargetMachine())) { - return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, Result); + if (RM == Reloc::Static) { + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset); + if (HLOF.IsGlobalInSmallSection(GV, HTM)) + return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, GA); + return DAG.getNode(HexagonISD::CONST32, dl, PtrVT, GA); } - return DAG.getNode(HexagonISD::CONST32, dl, PtrVT, Result); + bool UsePCRel = GV->hasInternalLinkage() || GV->hasHiddenVisibility() || + (GV->hasLocalLinkage() && !isa<Function>(GV)); + if (UsePCRel) { + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset, + HexagonII::MO_PCREL); + return DAG.getNode(HexagonISD::AT_PCREL, dl, PtrVT, GA); + } + + // Use GOT index. + SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, HexagonII::MO_GOT); + SDValue Off = DAG.getConstant(Offset, dl, MVT::i32); + return DAG.getNode(HexagonISD::AT_GOT, dl, PtrVT, GOT, GA, Off); } // Specifies that for loads and stores VT can be promoted to PromotedLdStVT. -void HexagonTargetLowering::promoteLdStType(EVT VT, EVT PromotedLdStVT) { - if (VT != PromotedLdStVT) { - setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote); - AddPromotedToType(ISD::LOAD, VT.getSimpleVT(), - PromotedLdStVT.getSimpleVT()); +SDValue +HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { + const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); + SDLoc dl(Op); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); - setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote); - AddPromotedToType(ISD::STORE, VT.getSimpleVT(), - PromotedLdStVT.getSimpleVT()); + Reloc::Model RM = HTM.getRelocationModel(); + if (RM == Reloc::Static) { + SDValue A = DAG.getTargetBlockAddress(BA, PtrVT); + return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, A); } + + SDValue A = DAG.getTargetBlockAddress(BA, PtrVT, 0, HexagonII::MO_PCREL); + return DAG.getNode(HexagonISD::AT_PCREL, dl, PtrVT, A); } SDValue -HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { - const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); - SDValue BA_SD = DAG.getTargetBlockAddress(BA, MVT::i32); - SDLoc dl(Op); - return DAG.getNode(HexagonISD::CONST32_GP, dl, - getPointerTy(DAG.getDataLayout()), BA_SD); +HexagonTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) + const { + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + SDValue GOTSym = DAG.getTargetExternalSymbol(HEXAGON_GOT_SYM_NAME, PtrVT, + HexagonII::MO_PCREL); + return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), PtrVT, GOTSym); } //===----------------------------------------------------------------------===// @@ -1272,18 +1512,19 @@ HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { //===----------------------------------------------------------------------===// HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, - const HexagonSubtarget &STI) + const HexagonSubtarget &ST) : TargetLowering(TM), HTM(static_cast<const HexagonTargetMachine&>(TM)), - Subtarget(STI) { + Subtarget(ST) { bool IsV4 = !Subtarget.hasV5TOps(); auto &HRI = *Subtarget.getRegisterInfo(); + bool UseHVX = Subtarget.useHVXOps(); + bool UseHVXSgl = Subtarget.useHVXSglOps(); + bool UseHVXDbl = Subtarget.useHVXDblOps(); setPrefLoopAlignment(4); setPrefFunctionAlignment(4); setMinFunctionAlignment(2); setInsertFencesForAtomic(false); - setExceptionPointerRegister(Hexagon::R0); - setExceptionSelectorRegister(Hexagon::R1); setStackPointerRegisterToSaveRestore(HRI.getStackRegister()); if (EnableHexSDNodeSched) @@ -1320,6 +1561,31 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass); } + if (Subtarget.hasV60TOps()) { + if (Subtarget.useHVXSglOps()) { + addRegisterClass(MVT::v64i8, &Hexagon::VectorRegsRegClass); + addRegisterClass(MVT::v32i16, &Hexagon::VectorRegsRegClass); + addRegisterClass(MVT::v16i32, &Hexagon::VectorRegsRegClass); + addRegisterClass(MVT::v8i64, &Hexagon::VectorRegsRegClass); + addRegisterClass(MVT::v128i8, &Hexagon::VecDblRegsRegClass); + addRegisterClass(MVT::v64i16, &Hexagon::VecDblRegsRegClass); + addRegisterClass(MVT::v32i32, &Hexagon::VecDblRegsRegClass); + addRegisterClass(MVT::v16i64, &Hexagon::VecDblRegsRegClass); + addRegisterClass(MVT::v512i1, &Hexagon::VecPredRegsRegClass); + } else if (Subtarget.useHVXDblOps()) { + addRegisterClass(MVT::v128i8, &Hexagon::VectorRegs128BRegClass); + addRegisterClass(MVT::v64i16, &Hexagon::VectorRegs128BRegClass); + addRegisterClass(MVT::v32i32, &Hexagon::VectorRegs128BRegClass); + addRegisterClass(MVT::v16i64, &Hexagon::VectorRegs128BRegClass); + addRegisterClass(MVT::v256i8, &Hexagon::VecDblRegs128BRegClass); + addRegisterClass(MVT::v128i16, &Hexagon::VecDblRegs128BRegClass); + addRegisterClass(MVT::v64i32, &Hexagon::VecDblRegs128BRegClass); + addRegisterClass(MVT::v32i64, &Hexagon::VecDblRegs128BRegClass); + addRegisterClass(MVT::v1024i1, &Hexagon::VecPredRegs128BRegClass); + } + + } + // // Handling of scalar operations. // @@ -1336,10 +1602,12 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::ConstantFP, MVT::f64, Legal); // Default: expand setOperationAction(ISD::ConstantPool, MVT::i32, Custom); + setOperationAction(ISD::JumpTable, MVT::i32, Custom); setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); setOperationAction(ISD::INLINEASM, MVT::Other, Custom); setOperationAction(ISD::EH_RETURN, MVT::Other, Custom); + setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); // Custom legalize GlobalAddress nodes into CONST32. @@ -1361,11 +1629,10 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); if (EmitJumpTables) - setOperationAction(ISD::BR_JT, MVT::Other, Custom); + setMinimumJumpTableEntries(2); else - setOperationAction(ISD::BR_JT, MVT::Other, Expand); - // Increase jump tables cutover to 5, was 4. - setMinimumJumpTableEntries(MinimumJumpTables); + setMinimumJumpTableEntries(MinimumJumpTables); + setOperationAction(ISD::BR_JT, MVT::Other, Expand); // Hexagon has instructions for add/sub with carry. The problem with // modeling these instructions is that they produce 2 results: Rdd and Px. @@ -1420,9 +1687,10 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::MULHS, MVT::i64, Expand); for (unsigned IntExpOp : - {ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, ISD::SDIVREM, ISD::UDIVREM, - ISD::ROTL, ISD::ROTR, ISD::BSWAP, ISD::SHL_PARTS, ISD::SRA_PARTS, - ISD::SRL_PARTS, ISD::SMUL_LOHI, ISD::UMUL_LOHI}) { + { ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, + ISD::SDIVREM, ISD::UDIVREM, ISD::ROTL, ISD::ROTR, + ISD::BSWAP, ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS, + ISD::SMUL_LOHI, ISD::UMUL_LOHI }) { setOperationAction(IntExpOp, MVT::i32, Expand); setOperationAction(IntExpOp, MVT::i64, Expand); } @@ -1475,7 +1743,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, // Set the action for vector operations to "expand", then override it with // either "custom" or "legal" for specific cases. - static unsigned VectExpOps[] = { + static const unsigned VectExpOps[] = { // Integer arithmetic: ISD::ADD, ISD::SUB, ISD::MUL, ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, ISD::SDIVREM, ISD::UDIVREM, ISD::ADDC, @@ -1539,7 +1807,21 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::VSELECT, MVT::v2i16, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); - + if (UseHVX) { + if (UseHVXSgl) { + setOperationAction(ISD::CONCAT_VECTORS, MVT::v128i8, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i16, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i32, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i64, Custom); + } else if (UseHVXDbl) { + setOperationAction(ISD::CONCAT_VECTORS, MVT::v256i8, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v128i16, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i32, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i64, Custom); + } else { + llvm_unreachable("Unrecognized HVX mode"); + } + } // Subtarget-specific operation actions. // if (Subtarget.hasV5TOps()) { @@ -1586,7 +1868,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, for (ISD::CondCode FPExpCCV4 : {ISD::SETOEQ, ISD::SETOGT, ISD::SETOLT, ISD::SETOGE, ISD::SETOLE, - ISD::SETUO, ISD::SETO}) { + ISD::SETUO, ISD::SETO}) { setCondCodeAction(FPExpCCV4, MVT::f32, Expand); setCondCodeAction(FPExpCCV4, MVT::f64, Expand); } @@ -1599,6 +1881,13 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setIndexedStoreAction(ISD::POST_INC, LSXTy, Legal); } + if (UseHVXDbl) { + for (MVT VT : {MVT::v128i8, MVT::v64i16, MVT::v32i32, MVT::v16i64}) { + setIndexedLoadAction(ISD::POST_INC, VT, Legal); + setIndexedStoreAction(ISD::POST_INC, VT, Legal); + } + } + computeRegisterProperties(&HRI); // @@ -1720,7 +2009,6 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { case HexagonISD::AT_GOT: return "HexagonISD::AT_GOT"; case HexagonISD::AT_PCREL: return "HexagonISD::AT_PCREL"; case HexagonISD::BARRIER: return "HexagonISD::BARRIER"; - case HexagonISD::BR_JT: return "HexagonISD::BR_JT"; case HexagonISD::CALLR: return "HexagonISD::CALLR"; case HexagonISD::CALLv3nr: return "HexagonISD::CALLv3nr"; case HexagonISD::CALLv3: return "HexagonISD::CALLv3"; @@ -1737,7 +2025,6 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { case HexagonISD::INSERTRP: return "HexagonISD::INSERTRP"; case HexagonISD::JT: return "HexagonISD::JT"; case HexagonISD::PACKHL: return "HexagonISD::PACKHL"; - case HexagonISD::PIC_ADD: return "HexagonISD::PIC_ADD"; case HexagonISD::POPCOUNT: return "HexagonISD::POPCOUNT"; case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG"; case HexagonISD::SHUFFEB: return "HexagonISD::SHUFFEB"; @@ -1754,6 +2041,7 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { case HexagonISD::VCMPWEQ: return "HexagonISD::VCMPWEQ"; case HexagonISD::VCMPWGT: return "HexagonISD::VCMPWGT"; case HexagonISD::VCMPWGTU: return "HexagonISD::VCMPWGTU"; + case HexagonISD::VCOMBINE: return "HexagonISD::VCOMBINE"; case HexagonISD::VSHLH: return "HexagonISD::VSHLH"; case HexagonISD::VSHLW: return "HexagonISD::VSHLW"; case HexagonISD::VSPLATB: return "HexagonISD::VSPLTB"; @@ -1923,8 +2211,7 @@ HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { unsigned Size = VT.getSizeInBits(); - // A vector larger than 64 bits cannot be represented in Hexagon. - // Expand will split the vector. + // Only handle vectors of 64 bits or shorter. if (Size > 64) return SDValue(); @@ -2058,58 +2345,61 @@ SDValue HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); + bool UseHVX = Subtarget.useHVXOps(); EVT VT = Op.getValueType(); unsigned NElts = Op.getNumOperands(); - SDValue Vec = Op.getOperand(0); - EVT VecVT = Vec.getValueType(); - SDValue Width = DAG.getConstant(VecVT.getSizeInBits(), dl, MVT::i64); - SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width, - DAG.getConstant(32, dl, MVT::i64)); - SDValue ConstVal = DAG.getConstant(0, dl, MVT::i64); - - ConstantSDNode *W = dyn_cast<ConstantSDNode>(Width); - ConstantSDNode *S = dyn_cast<ConstantSDNode>(Shifted); - - if ((VecVT.getSimpleVT() == MVT::v2i16) && (NElts == 2) && W && S) { - if ((W->getZExtValue() == 32) && ((S->getZExtValue() >> 32) == 32)) { - // We are trying to concat two v2i16 to a single v4i16. - SDValue Vec0 = Op.getOperand(1); - SDValue Combined = DAG.getNode(HexagonISD::COMBINE, dl, VT, Vec0, Vec); - return DAG.getNode(ISD::BITCAST, dl, VT, Combined); + SDValue Vec0 = Op.getOperand(0); + EVT VecVT = Vec0.getValueType(); + unsigned Width = VecVT.getSizeInBits(); + + if (NElts == 2) { + MVT ST = VecVT.getSimpleVT(); + // We are trying to concat two v2i16 to a single v4i16, or two v4i8 + // into a single v8i8. + if (ST == MVT::v2i16 || ST == MVT::v4i8) + return DAG.getNode(HexagonISD::COMBINE, dl, VT, Op.getOperand(1), Vec0); + + if (UseHVX) { + assert((Width == 64*8 && Subtarget.useHVXSglOps()) || + (Width == 128*8 && Subtarget.useHVXDblOps())); + SDValue Vec1 = Op.getOperand(1); + MVT OpTy = Subtarget.useHVXSglOps() ? MVT::v16i32 : MVT::v32i32; + MVT ReTy = Subtarget.useHVXSglOps() ? MVT::v32i32 : MVT::v64i32; + SDValue B0 = DAG.getNode(ISD::BITCAST, dl, OpTy, Vec0); + SDValue B1 = DAG.getNode(ISD::BITCAST, dl, OpTy, Vec1); + SDValue VC = DAG.getNode(HexagonISD::VCOMBINE, dl, ReTy, B1, B0); + return DAG.getNode(ISD::BITCAST, dl, VT, VC); } } - if ((VecVT.getSimpleVT() == MVT::v4i8) && (NElts == 2) && W && S) { - if ((W->getZExtValue() == 32) && ((S->getZExtValue() >> 32) == 32)) { - // We are trying to concat two v4i8 to a single v8i8. - SDValue Vec0 = Op.getOperand(1); - SDValue Combined = DAG.getNode(HexagonISD::COMBINE, dl, VT, Vec0, Vec); - return DAG.getNode(ISD::BITCAST, dl, VT, Combined); - } - } + if (VT.getSizeInBits() != 32 && VT.getSizeInBits() != 64) + return SDValue(); + + SDValue C0 = DAG.getConstant(0, dl, MVT::i64); + SDValue C32 = DAG.getConstant(32, dl, MVT::i64); + SDValue W = DAG.getConstant(Width, dl, MVT::i64); + // Create the "width" part of the argument to insert_rp/insertp_rp. + SDValue S = DAG.getNode(ISD::SHL, dl, MVT::i64, W, C32); + SDValue V = C0; for (unsigned i = 0, e = NElts; i != e; ++i) { - unsigned OpIdx = NElts - i - 1; - SDValue Operand = Op.getOperand(OpIdx); + unsigned N = NElts-i-1; + SDValue OpN = Op.getOperand(N); - if (VT.getSizeInBits() == 64 && - Operand.getValueType().getSizeInBits() == 32) { + if (VT.getSizeInBits() == 64 && OpN.getValueType().getSizeInBits() == 32) { SDValue C = DAG.getConstant(0, dl, MVT::i32); - Operand = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Operand); + OpN = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, OpN); } - - SDValue Idx = DAG.getConstant(OpIdx, dl, MVT::i64); - SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i64, Idx, Width); - SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset); - const SDValue Ops[] = {ConstVal, Operand, Combined}; - + SDValue Idx = DAG.getConstant(N, dl, MVT::i64); + SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i64, Idx, W); + SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, S, Offset); if (VT.getSizeInBits() == 32) - ConstVal = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, Ops); + V = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, {V, OpN, Or}); else - ConstVal = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, Ops); + V = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, {V, OpN, Or}); } - return DAG.getNode(ISD::BITCAST, dl, VT, ConstVal); + return DAG.getNode(ISD::BITCAST, dl, VT, V); } SDValue @@ -2301,6 +2591,7 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SHL: case ISD::SRL: return LowerVECTOR_SHIFT(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); + case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); // Frame & Return address. Currently unimplemented. case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); @@ -2308,8 +2599,8 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG); case ISD::GlobalAddress: return LowerGLOBALADDRESS(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); + case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); - case ISD::BR_JT: return LowerBR_JT(Op, DAG); // Custom lower some vector loads. case ISD::LOAD: return LowerLOAD(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); @@ -2321,6 +2612,16 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { } } +/// Returns relocation base for the given PIC jumptable. +SDValue +HexagonTargetLowering::getPICJumpTableRelocBase(SDValue Table, + SelectionDAG &DAG) const { + int Idx = cast<JumpTableSDNode>(Table)->getIndex(); + EVT VT = Table.getValueType(); + SDValue T = DAG.getTargetJumpTable(Idx, VT, HexagonII::MO_PCREL); + return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Table), VT, T); +} + MachineBasicBlock * HexagonTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) @@ -2343,6 +2644,8 @@ HexagonTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, std::pair<unsigned, const TargetRegisterClass *> HexagonTargetLowering::getRegForInlineAsmConstraint( const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { + bool UseHVX = Subtarget.useHVXOps(), UseHVXDbl = Subtarget.useHVXDblOps(); + if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': // R0-R31 @@ -2358,6 +2661,42 @@ HexagonTargetLowering::getRegForInlineAsmConstraint( case MVT::f64: return std::make_pair(0U, &Hexagon::DoubleRegsRegClass); } + case 'q': // q0-q3 + switch (VT.SimpleTy) { + default: + llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type"); + case MVT::v1024i1: + case MVT::v512i1: + case MVT::v32i16: + case MVT::v16i32: + case MVT::v64i8: + case MVT::v8i64: + return std::make_pair(0U, &Hexagon::VecPredRegsRegClass); + } + case 'v': // V0-V31 + switch (VT.SimpleTy) { + default: + llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type"); + case MVT::v16i32: + case MVT::v32i16: + case MVT::v64i8: + case MVT::v8i64: + return std::make_pair(0U, &Hexagon::VectorRegsRegClass); + case MVT::v32i32: + case MVT::v64i16: + case MVT::v16i64: + case MVT::v128i8: + if (Subtarget.hasV60TOps() && UseHVX && UseHVXDbl) + return std::make_pair(0U, &Hexagon::VectorRegs128BRegClass); + else + return std::make_pair(0U, &Hexagon::VecDblRegsRegClass); + case MVT::v256i8: + case MVT::v128i16: + case MVT::v64i32: + case MVT::v32i64: + return std::make_pair(0U, &Hexagon::VecDblRegs128BRegClass); + } + default: llvm_unreachable("Unknown asm register class"); } @@ -2397,6 +2736,14 @@ bool HexagonTargetLowering::isLegalAddressingMode(const DataLayout &DL, return true; } +/// Return true if folding a constant offset with the given GlobalAddress is +/// legal. It is frequently not legal in PIC relocation models. +bool HexagonTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) + const { + return HTM.getRelocationModel() == Reloc::Static; +} + + /// isLegalICmpImmediate - Return true if the specified immediate is legal /// icmp immediate, that is the target has icmp instructions which can compare /// a register against the immediate without having to materialize the @@ -2428,8 +2775,8 @@ bool HexagonTargetLowering::IsEligibleForTailCallOptimization( // *************************************************************************** // If this is a tail call via a function pointer, then don't do it! - if (!(dyn_cast<GlobalAddressSDNode>(Callee)) - && !(dyn_cast<ExternalSymbolSDNode>(Callee))) { + if (!(isa<GlobalAddressSDNode>(Callee)) && + !(isa<ExternalSymbolSDNode>(Callee))) { return false; } @@ -2467,6 +2814,41 @@ bool llvm::isPositiveHalfWord(SDNode *N) { } } +std::pair<const TargetRegisterClass*, uint8_t> +HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI, + MVT VT) const { + const TargetRegisterClass *RRC = nullptr; + + uint8_t Cost = 1; + switch (VT.SimpleTy) { + default: + return TargetLowering::findRepresentativeClass(TRI, VT); + case MVT::v64i8: + case MVT::v32i16: + case MVT::v16i32: + case MVT::v8i64: + RRC = &Hexagon::VectorRegsRegClass; + break; + case MVT::v128i8: + case MVT::v64i16: + case MVT::v32i32: + case MVT::v16i64: + if (Subtarget.hasV60TOps() && Subtarget.useHVXOps() && + Subtarget.useHVXDblOps()) + RRC = &Hexagon::VectorRegs128BRegClass; + else + RRC = &Hexagon::VecDblRegsRegClass; + break; + case MVT::v256i8: + case MVT::v128i16: + case MVT::v64i32: + case MVT::v32i64: + RRC = &Hexagon::VecDblRegs128BRegClass; + break; + } + return std::make_pair(RRC, Cost); +} + Value *HexagonTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const { BasicBlock *BB = Builder.GetInsertBlock(); @@ -2498,13 +2880,15 @@ Value *HexagonTargetLowering::emitStoreConditional(IRBuilder<> &Builder, return Ext; } -bool HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { +TargetLowering::AtomicExpansionKind +HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { // Do not expand loads and stores that don't exceed 64 bits. - return LI->getType()->getPrimitiveSizeInBits() > 64; + return LI->getType()->getPrimitiveSizeInBits() > 64 + ? AtomicExpansionKind::LLOnly + : AtomicExpansionKind::None; } bool HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { // Do not expand loads and stores that don't exceed 64 bits. return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64; } - diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h index 2642abffaddd..bf378b922220 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -35,16 +35,14 @@ bool isPositiveHalfWord(SDNode *N); ALLOCA, ARGEXTEND, - PIC_ADD, - AT_GOT, - AT_PCREL, + AT_GOT, // Index in GOT. + AT_PCREL, // Offset relative to PC. CALLv3, // A V3+ call instruction. CALLv3nr, // A V3+ call instruction that doesn't return. CALLR, RET_FLAG, // Return with a flag operand. - BR_JT, // Branch through jump table. BARRIER, // Memory barrier. JT, // Jump table. CP, // Constant pool. @@ -80,6 +78,7 @@ bool isPositiveHalfWord(SDNode *N); INSERTRP, EXTRACTU, EXTRACTURP, + VCOMBINE, TC_RETURN, EH_RETURN, DCFETCH, @@ -127,7 +126,6 @@ bool isPositiveHalfWord(SDNode *N); SDValue LowerEXTRACT_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINSERT_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const; @@ -137,6 +135,7 @@ bool isPositiveHalfWord(SDNode *N); SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const override; SDValue LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const override; @@ -163,8 +162,23 @@ bool isPositiveHalfWord(SDNode *N); MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const override; + /// If a physical register, this returns the register that receives the + /// exception address on entry to an EH pad. + unsigned + getExceptionPointerRegister(const Constant *PersonalityFn) const override { + return Hexagon::R0; + } + + /// If a physical register, this returns the register that receives the + /// exception typeid on entry to a landing pad. + unsigned + getExceptionSelectorRegister(const Constant *PersonalityFn) const override { + return Hexagon::R1; + } + SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; EVT getSetCCResultType(const DataLayout &, LLVMContext &C, EVT VT) const override { if (!VT.isVector()) @@ -200,6 +214,10 @@ bool isPositiveHalfWord(SDNode *N); /// TODO: Handle pre/postinc as well. bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override; + /// Return true if folding a constant offset with the given GlobalAddress + /// is legal. It is frequently not legal in PIC relocation models. + bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; + bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; /// isLegalICmpImmediate - Return true if the specified immediate is legal @@ -208,20 +226,26 @@ bool isPositiveHalfWord(SDNode *N); /// the immediate into a register. bool isLegalICmpImmediate(int64_t Imm) const override; + /// Returns relocation base for the given PIC jumptable. + SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) + const override; + // Handling of atomic RMW instructions. - bool hasLoadLinkedStoreConditional() const override { - return true; - } Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const override; Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override; - bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override; + AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override; bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; - AtomicRMWExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) - const override { - return AtomicRMWExpansionKind::LLSC; + AtomicExpansionKind + shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override { + return AtomicExpansionKind::LLSC; } + + protected: + std::pair<const TargetRegisterClass*, uint8_t> + findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) + const override; }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrAlias.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrAlias.td new file mode 100644 index 000000000000..5a1a69b40d4d --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrAlias.td @@ -0,0 +1,462 @@ +//==- HexagonInstrAlias.td - Hexagon Instruction Aliases ---*- tablegen -*--==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// Hexagon Instruction Mappings +//===----------------------------------------------------------------------===// + + +def : InstAlias<"memb({GP}+#$addr) = $Nt.new", + (S2_storerbnewgp u16_0Imm:$addr, IntRegs:$Nt)>; +def : InstAlias<"memh({GP}+#$addr) = $Nt.new", + (S2_storerhnewgp u16_1Imm:$addr, IntRegs:$Nt)>; +def : InstAlias<"memw({GP}+#$addr) = $Nt.new", + (S2_storerinewgp u16_2Imm:$addr, IntRegs:$Nt)>; +def : InstAlias<"memb({GP}+#$addr) = $Nt", + (S2_storerbgp u16_0Imm:$addr, IntRegs:$Nt)>; +def : InstAlias<"memh({GP}+#$addr) = $Nt", + (S2_storerhgp u16_1Imm:$addr, IntRegs:$Nt)>; +def : InstAlias<"memh({GP}+#$addr) = $Nt.h", + (S2_storerfgp u16_1Imm:$addr, IntRegs:$Nt)>; +def : InstAlias<"memw({GP}+#$addr) = $Nt", + (S2_storerigp u16_2Imm:$addr, IntRegs:$Nt)>; +def : InstAlias<"memd({GP}+#$addr) = $Nt", + (S2_storerdgp u16_3Imm:$addr, DoubleRegs:$Nt)>; + +def : InstAlias<"$Nt = memb({GP}+#$addr)", + (L2_loadrbgp IntRegs:$Nt, u16_0Imm:$addr)>; +def : InstAlias<"$Nt = memub({GP}+#$addr)", + (L2_loadrubgp IntRegs:$Nt, u16_0Imm:$addr)>; +def : InstAlias<"$Nt = memh({GP}+#$addr)", + (L2_loadrhgp IntRegs:$Nt, u16_1Imm:$addr)>; +def : InstAlias<"$Nt = memuh({GP}+#$addr)", + (L2_loadruhgp IntRegs:$Nt, u16_1Imm:$addr)>; +def : InstAlias<"$Nt = memw({GP}+#$addr)", + (L2_loadrigp IntRegs:$Nt, u16_2Imm:$addr)>; +def : InstAlias<"$Nt = memd({GP}+#$addr)", + (L2_loadrdgp DoubleRegs:$Nt, u16_3Imm:$addr)>; + +// Alias of: memXX($Rs+#XX) = $Rt to memXX($Rs) = $Rt +def : InstAlias<"memb($Rs) = $Rt", + (S2_storerb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"memh($Rs) = $Rt", + (S2_storerh_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"memh($Rs) = $Rt.h", + (S2_storerf_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"memw($Rs) = $Rt", + (S2_storeri_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"memb($Rs) = $Rt.new", + (S2_storerbnew_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"memh($Rs) = $Rt.new", + (S2_storerhnew_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"memw($Rs) = $Rt.new", + (S2_storerinew_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"memb($Rs) = #$S8", + (S4_storeirb_io IntRegs:$Rs, 0, s8Ext:$S8), 0>; + +def : InstAlias<"memh($Rs) = #$S8", + (S4_storeirh_io IntRegs:$Rs, 0, s8Ext:$S8), 0>; + +def : InstAlias<"memw($Rs) = #$S8", + (S4_storeiri_io IntRegs:$Rs, 0, s8Ext:$S8), 0>; + +def : InstAlias<"memd($Rs) = $Rtt", + (S2_storerd_io IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>; + +def : InstAlias<"memb($Rs) = setbit(#$U5)", + (L4_ior_memopb_io IntRegs:$Rs, 0, u5Imm:$U5), 0>; + +def : InstAlias<"memh($Rs) = setbit(#$U5)", + (L4_ior_memoph_io IntRegs:$Rs, 0, u5Imm:$U5), 0>; + +def : InstAlias<"memw($Rs) = setbit(#$U5)", + (L4_ior_memopw_io IntRegs:$Rs, 0, u5Imm:$U5), 0>; + +def : InstAlias<"memb($Rs) = clrbit(#$U5)", + (L4_iand_memopb_io IntRegs:$Rs, 0, u5Imm:$U5), 0>; + +def : InstAlias<"memh($Rs) = clrbit(#$U5)", + (L4_iand_memoph_io IntRegs:$Rs, 0, u5Imm:$U5), 0>; + +def : InstAlias<"memw($Rs) = clrbit(#$U5)", + (L4_iand_memopw_io IntRegs:$Rs, 0, u5Imm:$U5), 0>; + +// Alias of: $Rd = memXX($Rs+#XX) to $Rd = memXX($Rs) +def : InstAlias<"$Rd = memb($Rs)", + (L2_loadrb_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"$Rd = memub($Rs)", + (L2_loadrub_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"$Rd = memh($Rs)", + (L2_loadrh_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"$Rd = memuh($Rs)", + (L2_loadruh_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"$Rd = memw($Rs)", + (L2_loadri_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"$Rdd = memd($Rs)", + (L2_loadrd_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"$Rd = memubh($Rs)", + (L2_loadbzw2_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"$Rdd = memubh($Rs)", + (L2_loadbzw4_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"$Rd = membh($Rs)", + (L2_loadbsw2_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"$Rdd = membh($Rs)", + (L2_loadbsw4_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"$Rdd = memb_fifo($Rs)", + (L2_loadalignb_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"$Rdd = memh_fifo($Rs)", + (L2_loadalignh_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>; + +// Alias of: if ($Pt) $Rd = memXX($Rs + #$u6_X) +// to: if ($Pt) $Rd = memXX($Rs) +def : InstAlias<"if ($Pt) $Rd = memb($Rs)", + (L2_ploadrbt_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if ($Pt) $Rd = memub($Rs)", + (L2_ploadrubt_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if ($Pt) $Rd = memh($Rs)", + (L2_ploadrht_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if ($Pt) $Rd = memuh($Rs)", + (L2_ploadruht_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if ($Pt) $Rd = memw($Rs)", + (L2_ploadrit_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if ($Pt) $Rdd = memd($Rs)", + (L2_ploadrdt_io DoubleRegs:$Rdd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +// Alias of: if ($Pt) memXX($Rs + #$u6_X) = $Rt +// to: if ($Pt) memXX($Rs) = $Rt +def : InstAlias<"if ($Pt) memb($Rs) = $Rt", + (S2_pstorerbt_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pt) memh($Rs) = $Rt", + (S2_pstorerht_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pt) memh($Rs) = $Rt.h", + (S2_pstorerft_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pt) memw($Rs) = $Rt", + (S2_pstorerit_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pt) memd($Rs) = $Rtt", + (S2_pstorerdt_io PredRegs:$Pt, IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>; + +def : InstAlias<"if ($Pt) memb($Rs) = $Rt.new", + (S2_pstorerbnewt_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pt) memh($Rs) = $Rt.new", + (S2_pstorerhnewt_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pt) memw($Rs) = $Rt.new", + (S2_pstorerinewt_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pt.new) memb($Rs) = $Rt.new", + (S4_pstorerbnewtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pt.new) memh($Rs) = $Rt.new", + (S4_pstorerhnewtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pt.new) memw($Rs) = $Rt.new", + (S4_pstorerinewtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + + +// Alias of: if (!$Pt) $Rd = memXX($Rs + #$u6_X) +// to: if (!$Pt) $Rd = memXX($Rs) +def : InstAlias<"if (!$Pt) $Rd = memb($Rs)", + (L2_ploadrbf_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if (!$Pt) $Rd = memub($Rs)", + (L2_ploadrubf_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if (!$Pt) $Rd = memh($Rs)", + (L2_ploadrhf_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if (!$Pt) $Rd = memuh($Rs)", + (L2_ploadruhf_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if (!$Pt) $Rd = memw($Rs)", + (L2_ploadrif_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if (!$Pt) $Rdd = memd($Rs)", + (L2_ploadrdf_io DoubleRegs:$Rdd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +// Alias of: if (!$Pt) memXX($Rs + #$u6_X) = $Rt +// to: if (!$Pt) memXX($Rs) = $Rt +def : InstAlias<"if (!$Pt) memb($Rs) = $Rt", + (S2_pstorerbf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if (!$Pt) memh($Rs) = $Rt", + (S2_pstorerhf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if (!$Pt) memh($Rs) = $Rt.h", + (S2_pstorerff_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if (!$Pt) memw($Rs) = $Rt", + (S2_pstorerif_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if (!$Pt) memd($Rs) = $Rtt", + (S2_pstorerdf_io PredRegs:$Pt, IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>; + +def : InstAlias<"if (!$Pt) memb($Rs) = $Rt.new", + (S2_pstorerbnewf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if (!$Pt) memh($Rs) = $Rt.new", + (S2_pstorerhnewf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if (!$Pt) memw($Rs) = $Rt.new", + (S2_pstorerinewf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if (!$Pt.new) memb($Rs) = $Rt.new", + (S4_pstorerbnewfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if (!$Pt.new) memh($Rs) = $Rt.new", + (S4_pstorerhnewfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if (!$Pt.new) memw($Rs) = $Rt.new", + (S4_pstorerinewfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pt) memb($Rs) = #$S6", + (S4_storeirbt_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + +def : InstAlias<"if ($Pt) memh($Rs) = #$S6", + (S4_storeirht_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + +def : InstAlias<"if ($Pt) memw($Rs) = #$S6", + (S4_storeirit_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + +def : InstAlias<"if ($Pt.new) memb($Rs) = #$S6", + (S4_storeirbtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + +def : InstAlias<"if ($Pt.new) memh($Rs) = #$S6", + (S4_storeirhtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + +def : InstAlias<"if ($Pt.new) memw($Rs) = #$S6", + (S4_storeiritnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + +def : InstAlias<"if (!$Pt) memb($Rs) = #$S6", + (S4_storeirbf_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + +def : InstAlias<"if (!$Pt) memh($Rs) = #$S6", + (S4_storeirhf_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + +def : InstAlias<"if (!$Pt) memw($Rs) = #$S6", + (S4_storeirif_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + +def : InstAlias<"if (!$Pt.new) memb($Rs) = #$S6", + (S4_storeirbfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + +def : InstAlias<"if (!$Pt.new) memh($Rs) = #$S6", + (S4_storeirhfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + +def : InstAlias<"if (!$Pt.new) memw($Rs) = #$S6", + (S4_storeirifnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + +// Alias of: memXX($Rs + $u6_X) |= $Rt, also &=, +=, -= +// to: memXX($Rs) |= $Rt +def : InstAlias<"memb($Rs) &= $Rt", + (L4_and_memopb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memb($Rs) |= $Rt", + (L4_or_memopb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memb($Rs) += $Rt", + (L4_add_memopb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memb($Rs) -= $Rt", + (L4_sub_memopb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memb($Rs) += #$U5", + (L4_iadd_memopb_io IntRegs:$Rs, 0, u5Imm:$U5), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memb($Rs) -= #$U5", + (L4_isub_memopb_io IntRegs:$Rs, 0, u5Imm:$U5), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memh($Rs) &= $Rt", + (L4_and_memoph_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memh($Rs) |= $Rt", + (L4_or_memoph_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memh($Rs) += $Rt", + (L4_add_memoph_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memh($Rs) -= $Rt", + (L4_sub_memoph_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memh($Rs) += #$U5", + (L4_iadd_memoph_io IntRegs:$Rs, 0, u5Imm:$U5), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memh($Rs) -= #$U5", + (L4_isub_memoph_io IntRegs:$Rs, 0, u5Imm:$U5), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memw($Rs) &= $Rt", + (L4_and_memopw_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memw($Rs) |= $Rt", + (L4_or_memopw_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memw($Rs) += $Rt", + (L4_add_memopw_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memw($Rs) -= $Rt", + (L4_sub_memopw_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memw($Rs) += #$U5", + (L4_iadd_memopw_io IntRegs:$Rs, 0, u5Imm:$U5), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memw($Rs) -= #$U5", + (L4_isub_memopw_io IntRegs:$Rs, 0, u5Imm:$U5), 0>, + Requires<[UseMEMOP]>; + +// +// Alias of: if ($Pv.new) memX($Rs) = $Rt +// to: if (p3.new) memX(r17 + #0) = $Rt +def : InstAlias<"if ($Pv.new) memb($Rs) = $Rt", + (S4_pstorerbtnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pv.new) memh($Rs) = $Rt", + (S4_pstorerhtnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pv.new) memh($Rs) = $Rt.h", + (S4_pstorerftnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pv.new) memw($Rs) = $Rt", + (S4_pstoreritnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pv.new) memd($Rs) = $Rtt", + (S4_pstorerdtnew_io + PredRegs:$Pv, IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>; + +def : InstAlias<"if (!$Pv.new) memb($Rs) = $Rt", + (S4_pstorerbfnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if (!$Pv.new) memh($Rs) = $Rt", + (S4_pstorerhfnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if (!$Pv.new) memh($Rs) = $Rt.h", + (S4_pstorerffnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if (!$Pv.new) memw($Rs) = $Rt", + (S4_pstorerifnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if (!$Pv.new) memd($Rs) = $Rtt", + (S4_pstorerdfnew_io + PredRegs:$Pv, IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>; + +// +// Alias of: if ($Pt.new) $Rd = memub($Rs) -- And if (!$Pt.new) ... +// to: if ($Pt.new) $Rd = memub($Rs + #$u6_0) +def : InstAlias<"if ($Pt.new) $Rd = memub($Rs)", + (L2_ploadrubtnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if ($Pt.new) $Rd = memb($Rs)", + (L2_ploadrbtnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if ($Pt.new) $Rd = memh($Rs)", + (L2_ploadrhtnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if ($Pt.new) $Rd = memuh($Rs)", + (L2_ploadruhtnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if ($Pt.new) $Rd = memw($Rs)", + (L2_ploadritnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if ($Pt.new) $Rdd = memd($Rs)", + (L2_ploadrdtnew_io DoubleRegs:$Rdd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if (!$Pt.new) $Rd = memub($Rs)", + (L2_ploadrubfnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if (!$Pt.new) $Rd = memb($Rs)", + (L2_ploadrbfnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if (!$Pt.new) $Rd = memh($Rs)", + (L2_ploadrhfnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if (!$Pt.new) $Rd = memuh($Rs)", + (L2_ploadruhfnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if (!$Pt.new) $Rd = memw($Rs)", + (L2_ploadrifnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if (!$Pt.new) $Rdd = memd($Rs)", + (L2_ploadrdfnew_io DoubleRegs:$Rdd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"dcfetch($Rs)", + (Y2_dcfetchbo IntRegs:$Rs, 0), 0>; + +// Alias of some insn mappings, others must be handled by the parser +def : InstAlias<"$Pd=cmp.lt($Rs, $Rt)", + (C2_cmpgt PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>; +def : InstAlias<"$Pd=cmp.ltu($Rs, $Rt)", + (C2_cmpgtu PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>; + +// Rd=neg(Rs) is aliased to Rd=sub(#0,Rs) +def : InstAlias<"$Rd = neg($Rs)", + (A2_subri IntRegs:$Rd, 0, IntRegs:$Rs), 0>; + +def : InstAlias<"m0 = $Rs", (A2_tfrrcr C6, IntRegs:$Rs)>; +def : InstAlias<"$Rd = m0", (A2_tfrcrr IntRegs:$Rd, C6)>; +def : InstAlias<"m1 = $Rs", (A2_tfrrcr C7, IntRegs:$Rs)>; +def : InstAlias<"$Rd = m1", (A2_tfrcrr IntRegs:$Rd, C7)>; + +def : InstAlias<"$Pd = $Ps", + (C2_or PredRegs:$Pd, PredRegs:$Ps, PredRegs:$Ps), 0>; + +def : InstAlias<"$Rdd = vaddb($Rss, $Rtt)", + (A2_vaddub DoubleRegs:$Rdd, DoubleRegs:$Rss, DoubleRegs:$Rtt), 1>; + +def : InstAlias<"$Rdd = vsubb($Rss,$Rtt)", + (A2_vsubub DoubleRegs:$Rdd, DoubleRegs:$Rss, DoubleRegs:$Rtt), 0>; + +def : InstAlias<"$Rd = mpyui($Rs,$Rt)", + (M2_mpyi IntRegs:$Rd, IntRegs:$Rs, IntRegs:$Rt), 0>; + +// Assembler mapped insns: cmp.lt(a,b) -> cmp.gt(b,a) +def : InstAlias<"$Pd=cmp.lt($Rs, $Rt)", + (C2_cmpgt PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>; +def : InstAlias<"$Pd=cmp.ltu($Rs, $Rt)", + (C2_cmpgtu PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>; + diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrEnc.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrEnc.td new file mode 100644 index 000000000000..280832fd167f --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrEnc.td @@ -0,0 +1,1019 @@ +class Enc_COPROC_VX_3op_v<bits<15> opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + bits<5> src2; + + let Inst{31-16} = { opc{14-4}, src2}; + let Inst{13-0} = { opc{3}, src1, opc{2-0}, dst}; +} + +class V6_vtmpyb_enc : Enc_COPROC_VX_3op_v<0b000110010000000>; +class V6_vtmpybus_enc : Enc_COPROC_VX_3op_v<0b000110010000001>; +class V6_vdmpyhb_enc : Enc_COPROC_VX_3op_v<0b000110010000010>; +class V6_vrmpyub_enc : Enc_COPROC_VX_3op_v<0b000110010000011>; +class V6_vrmpybus_enc : Enc_COPROC_VX_3op_v<0b000110010000100>; +class V6_vdsaduh_enc : Enc_COPROC_VX_3op_v<0b000110010000101>; +class V6_vdmpybus_enc : Enc_COPROC_VX_3op_v<0b000110010000110>; +class V6_vdmpybus_dv_enc : Enc_COPROC_VX_3op_v<0b000110010000111>; +class V6_vtmpyb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001000>; +class V6_vtmpybus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001001>; +class V6_vtmpyhb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001010>; +class V6_vdmpyhb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001011>; +class V6_vrmpyub_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001100>; +class V6_vrmpybus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001101>; +class V6_vdmpybus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001110>; +class V6_vdmpybus_dv_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001111>; +class V6_vdmpyhsusat_enc : Enc_COPROC_VX_3op_v<0b000110010010000>; +class V6_vdmpyhsuisat_enc : Enc_COPROC_VX_3op_v<0b000110010010001>; +class V6_vdmpyhsat_enc : Enc_COPROC_VX_3op_v<0b000110010010010>; +class V6_vdmpyhisat_enc : Enc_COPROC_VX_3op_v<0b000110010010011>; +class V6_vdmpyhb_dv_enc : Enc_COPROC_VX_3op_v<0b000110010010100>; +class V6_vmpybus_enc : Enc_COPROC_VX_3op_v<0b000110010010101>; +class V6_vmpabus_enc : Enc_COPROC_VX_3op_v<0b000110010010110>; +class V6_vmpahb_enc : Enc_COPROC_VX_3op_v<0b000110010010111>; +class V6_vdmpyhsusat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011000>; +class V6_vdmpyhsuisat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011001>; +class V6_vdmpyhisat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011010>; +class V6_vdmpyhsat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011011>; +class V6_vdmpyhb_dv_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011100>; +class V6_vmpybus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011101>; +class V6_vmpabus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011110>; +class V6_vmpahb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011111>; +class V6_vmpyh_enc : Enc_COPROC_VX_3op_v<0b000110010100000>; +class V6_vmpyhss_enc : Enc_COPROC_VX_3op_v<0b000110010100001>; +class V6_vmpyhsrs_enc : Enc_COPROC_VX_3op_v<0b000110010100010>; +class V6_vmpyuh_enc : Enc_COPROC_VX_3op_v<0b000110010100011>; +class V6_vmpyhsat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010101000>; +class V6_vmpyuh_acc_enc : Enc_COPROC_VX_3op_v<0b000110010101001>; +class V6_vmpyiwb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010101010>; +class V6_vmpyiwh_acc_enc : Enc_COPROC_VX_3op_v<0b000110010101011>; +class V6_vmpyihb_enc : Enc_COPROC_VX_3op_v<0b000110010110000>; +class V6_vror_enc : Enc_COPROC_VX_3op_v<0b000110010110001>; +class V6_vasrw_enc : Enc_COPROC_VX_3op_v<0b000110010110101>; +class V6_vasrh_enc : Enc_COPROC_VX_3op_v<0b000110010110110>; +class V6_vaslw_enc : Enc_COPROC_VX_3op_v<0b000110010110111>; +class V6_vdsaduh_acc_enc : Enc_COPROC_VX_3op_v<0b000110010111000>; +class V6_vmpyihb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010111001>; +class V6_vaslw_acc_enc : Enc_COPROC_VX_3op_v<0b000110010111010>; +class V6_vasrw_acc_enc : Enc_COPROC_VX_3op_v<0b000110010111101>; +class V6_vaslh_enc : Enc_COPROC_VX_3op_v<0b000110011000000>; +class V6_vlsrw_enc : Enc_COPROC_VX_3op_v<0b000110011000001>; +class V6_vlsrh_enc : Enc_COPROC_VX_3op_v<0b000110011000010>; +class V6_vmpyiwh_enc : Enc_COPROC_VX_3op_v<0b000110011000111>; +class V6_vmpyub_acc_enc : Enc_COPROC_VX_3op_v<0b000110011001000>; +class V6_vmpyiwb_enc : Enc_COPROC_VX_3op_v<0b000110011010000>; +class V6_vtmpyhb_enc : Enc_COPROC_VX_3op_v<0b000110011010100>; +class V6_vmpyub_enc : Enc_COPROC_VX_3op_v<0b000110011100000>; +class V6_vrmpyubv_enc : Enc_COPROC_VX_3op_v<0b000111000000000>; +class V6_vrmpybv_enc : Enc_COPROC_VX_3op_v<0b000111000000001>; +class V6_vrmpybusv_enc : Enc_COPROC_VX_3op_v<0b000111000000010>; +class V6_vdmpyhvsat_enc : Enc_COPROC_VX_3op_v<0b000111000000011>; +class V6_vmpybv_enc : Enc_COPROC_VX_3op_v<0b000111000000100>; +class V6_vmpyubv_enc : Enc_COPROC_VX_3op_v<0b000111000000101>; +class V6_vmpybusv_enc : Enc_COPROC_VX_3op_v<0b000111000000110>; +class V6_vmpyhv_enc : Enc_COPROC_VX_3op_v<0b000111000000111>; +class V6_vrmpyubv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001000>; +class V6_vrmpybv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001001>; +class V6_vrmpybusv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001010>; +class V6_vdmpyhvsat_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001011>; +class V6_vmpybv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001100>; +class V6_vmpyubv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001101>; +class V6_vmpybusv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001110>; +class V6_vmpyhv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001111>; +class V6_vmpyuhv_enc : Enc_COPROC_VX_3op_v<0b000111000010000>; +class V6_vmpyhvsrs_enc : Enc_COPROC_VX_3op_v<0b000111000010001>; +class V6_vmpyhus_enc : Enc_COPROC_VX_3op_v<0b000111000010010>; +class V6_vmpabusv_enc : Enc_COPROC_VX_3op_v<0b000111000010011>; +class V6_vmpyih_enc : Enc_COPROC_VX_3op_v<0b000111000010100>; +class V6_vand_enc : Enc_COPROC_VX_3op_v<0b000111000010101>; +class V6_vor_enc : Enc_COPROC_VX_3op_v<0b000111000010110>; +class V6_vxor_enc : Enc_COPROC_VX_3op_v<0b000111000010111>; +class V6_vmpyuhv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000011000>; +class V6_vmpyhus_acc_enc : Enc_COPROC_VX_3op_v<0b000111000011001>; +class V6_vmpyih_acc_enc : Enc_COPROC_VX_3op_v<0b000111000011100>; +class V6_vmpyiewuh_acc_enc : Enc_COPROC_VX_3op_v<0b000111000011101>; +class V6_vmpyowh_sacc_enc : Enc_COPROC_VX_3op_v<0b000111000011110>; +class V6_vmpyowh_rnd_sacc_enc : Enc_COPROC_VX_3op_v<0b000111000011111>; +class V6_vaddw_enc : Enc_COPROC_VX_3op_v<0b000111000100000>; +class V6_vaddubsat_enc : Enc_COPROC_VX_3op_v<0b000111000100001>; +class V6_vadduhsat_enc : Enc_COPROC_VX_3op_v<0b000111000100010>; +class V6_vaddhsat_enc : Enc_COPROC_VX_3op_v<0b000111000100011>; +class V6_vaddwsat_enc : Enc_COPROC_VX_3op_v<0b000111000100100>; +class V6_vsubb_enc : Enc_COPROC_VX_3op_v<0b000111000100101>; +class V6_vsubh_enc : Enc_COPROC_VX_3op_v<0b000111000100110>; +class V6_vsubw_enc : Enc_COPROC_VX_3op_v<0b000111000100111>; +class V6_vmpyiewh_acc_enc : Enc_COPROC_VX_3op_v<0b000111000101000>; +class V6_vsububsat_enc : Enc_COPROC_VX_3op_v<0b000111000110000>; +class V6_vsubuhsat_enc : Enc_COPROC_VX_3op_v<0b000111000110001>; +class V6_vsubhsat_enc : Enc_COPROC_VX_3op_v<0b000111000110010>; +class V6_vsubwsat_enc : Enc_COPROC_VX_3op_v<0b000111000110011>; +class V6_vaddb_dv_enc : Enc_COPROC_VX_3op_v<0b000111000110100>; +class V6_vaddh_dv_enc : Enc_COPROC_VX_3op_v<0b000111000110101>; +class V6_vaddw_dv_enc : Enc_COPROC_VX_3op_v<0b000111000110110>; +class V6_vaddubsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111000110111>; +class V6_vadduhsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000000>; +class V6_vaddhsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000001>; +class V6_vaddwsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000010>; +class V6_vsubb_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000011>; +class V6_vsubh_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000100>; +class V6_vsubw_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000101>; +class V6_vsububsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000110>; +class V6_vsubuhsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000111>; +class V6_vsubhsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001010000>; +class V6_vsubwsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001010001>; +class V6_vaddubh_enc : Enc_COPROC_VX_3op_v<0b000111001010010>; +class V6_vadduhw_enc : Enc_COPROC_VX_3op_v<0b000111001010011>; +class V6_vaddhw_enc : Enc_COPROC_VX_3op_v<0b000111001010100>; +class V6_vsububh_enc : Enc_COPROC_VX_3op_v<0b000111001010101>; +class V6_vsubuhw_enc : Enc_COPROC_VX_3op_v<0b000111001010110>; +class V6_vsubhw_enc : Enc_COPROC_VX_3op_v<0b000111001010111>; +class V6_vabsdiffub_enc : Enc_COPROC_VX_3op_v<0b000111001100000>; +class V6_vabsdiffh_enc : Enc_COPROC_VX_3op_v<0b000111001100001>; +class V6_vabsdiffuh_enc : Enc_COPROC_VX_3op_v<0b000111001100010>; +class V6_vabsdiffw_enc : Enc_COPROC_VX_3op_v<0b000111001100011>; +class V6_vavgub_enc : Enc_COPROC_VX_3op_v<0b000111001100100>; +class V6_vavguh_enc : Enc_COPROC_VX_3op_v<0b000111001100101>; +class V6_vavgh_enc : Enc_COPROC_VX_3op_v<0b000111001100110>; +class V6_vavgw_enc : Enc_COPROC_VX_3op_v<0b000111001100111>; +class V6_vnavgub_enc : Enc_COPROC_VX_3op_v<0b000111001110000>; +class V6_vnavgh_enc : Enc_COPROC_VX_3op_v<0b000111001110001>; +class V6_vnavgw_enc : Enc_COPROC_VX_3op_v<0b000111001110010>; +class V6_vavgubrnd_enc : Enc_COPROC_VX_3op_v<0b000111001110011>; +class V6_vavguhrnd_enc : Enc_COPROC_VX_3op_v<0b000111001110100>; +class V6_vavghrnd_enc : Enc_COPROC_VX_3op_v<0b000111001110101>; +class V6_vavgwrnd_enc : Enc_COPROC_VX_3op_v<0b000111001110110>; +class V6_vmpabuuv_enc : Enc_COPROC_VX_3op_v<0b000111001110111>; +class V6_vminub_enc : Enc_COPROC_VX_3op_v<0b000111110000001>; +class V6_vminuh_enc : Enc_COPROC_VX_3op_v<0b000111110000010>; +class V6_vminh_enc : Enc_COPROC_VX_3op_v<0b000111110000011>; +class V6_vminw_enc : Enc_COPROC_VX_3op_v<0b000111110000100>; +class V6_vmaxub_enc : Enc_COPROC_VX_3op_v<0b000111110000101>; +class V6_vmaxuh_enc : Enc_COPROC_VX_3op_v<0b000111110000110>; +class V6_vmaxh_enc : Enc_COPROC_VX_3op_v<0b000111110000111>; +class V6_vmaxw_enc : Enc_COPROC_VX_3op_v<0b000111110010000>; +class V6_vdelta_enc : Enc_COPROC_VX_3op_v<0b000111110010001>; +class V6_vrdelta_enc : Enc_COPROC_VX_3op_v<0b000111110010011>; +class V6_vdealb4w_enc : Enc_COPROC_VX_3op_v<0b000111110010111>; +class V6_vmpyowh_rnd_enc : Enc_COPROC_VX_3op_v<0b000111110100000>; +class V6_vshuffeb_enc : Enc_COPROC_VX_3op_v<0b000111110100001>; +class V6_vshuffob_enc : Enc_COPROC_VX_3op_v<0b000111110100010>; +class V6_vshufeh_enc : Enc_COPROC_VX_3op_v<0b000111110100011>; +class V6_vshufoh_enc : Enc_COPROC_VX_3op_v<0b000111110100100>; +class V6_vshufoeh_enc : Enc_COPROC_VX_3op_v<0b000111110100101>; +class V6_vshufoeb_enc : Enc_COPROC_VX_3op_v<0b000111110100110>; +class V6_vcombine_enc : Enc_COPROC_VX_3op_v<0b000111110100111>; +class V6_vmpyieoh_enc : Enc_COPROC_VX_3op_v<0b000111110110000>; +class V6_vsathub_enc : Enc_COPROC_VX_3op_v<0b000111110110010>; +class V6_vsatwh_enc : Enc_COPROC_VX_3op_v<0b000111110110011>; +class V6_vroundwh_enc : Enc_COPROC_VX_3op_v<0b000111110110100>; +class V6_vroundwuh_enc : Enc_COPROC_VX_3op_v<0b000111110110101>; +class V6_vroundhb_enc : Enc_COPROC_VX_3op_v<0b000111110110110>; +class V6_vroundhub_enc : Enc_COPROC_VX_3op_v<0b000111110110111>; +class V6_vasrwv_enc : Enc_COPROC_VX_3op_v<0b000111111010000>; +class V6_vlsrwv_enc : Enc_COPROC_VX_3op_v<0b000111111010001>; +class V6_vlsrhv_enc : Enc_COPROC_VX_3op_v<0b000111111010010>; +class V6_vasrhv_enc : Enc_COPROC_VX_3op_v<0b000111111010011>; +class V6_vaslwv_enc : Enc_COPROC_VX_3op_v<0b000111111010100>; +class V6_vaslhv_enc : Enc_COPROC_VX_3op_v<0b000111111010101>; +class V6_vaddb_enc : Enc_COPROC_VX_3op_v<0b000111111010110>; +class V6_vaddh_enc : Enc_COPROC_VX_3op_v<0b000111111010111>; +class V6_vmpyiewuh_enc : Enc_COPROC_VX_3op_v<0b000111111100000>; +class V6_vmpyiowh_enc : Enc_COPROC_VX_3op_v<0b000111111100001>; +class V6_vpackeb_enc : Enc_COPROC_VX_3op_v<0b000111111100010>; +class V6_vpackeh_enc : Enc_COPROC_VX_3op_v<0b000111111100011>; +class V6_vpackhub_sat_enc : Enc_COPROC_VX_3op_v<0b000111111100101>; +class V6_vpackhb_sat_enc : Enc_COPROC_VX_3op_v<0b000111111100110>; +class V6_vpackwuh_sat_enc : Enc_COPROC_VX_3op_v<0b000111111100111>; +class V6_vpackwh_sat_enc : Enc_COPROC_VX_3op_v<0b000111111110000>; +class V6_vpackob_enc : Enc_COPROC_VX_3op_v<0b000111111110001>; +class V6_vpackoh_enc : Enc_COPROC_VX_3op_v<0b000111111110010>; +class V6_vmpyewuh_enc : Enc_COPROC_VX_3op_v<0b000111111110101>; +class V6_vmpyowh_enc : Enc_COPROC_VX_3op_v<0b000111111110111>; +class V6_extractw_enc : Enc_COPROC_VX_3op_v<0b100100100000001>; +class M6_vabsdiffub_enc : Enc_COPROC_VX_3op_v<0b111010001010000>; +class M6_vabsdiffb_enc : Enc_COPROC_VX_3op_v<0b111010001110000>; + +class Enc_COPROC_VX_cmp<bits<13> opc> : OpcodeHexagon { + bits<2> dst; + bits<5> src1; + bits<5> src2; + + let Inst{31-16} = { 0b00011, opc{12-7}, src2{4-0} }; + let Inst{13-0} = { opc{6}, src1{4-0}, opc{5-0}, dst{1-0} }; +} + +class V6_vandvrt_acc_enc : Enc_COPROC_VX_cmp<0b0010111100000>; +class V6_vandvrt_enc : Enc_COPROC_VX_cmp<0b0011010010010>; +class V6_veqb_and_enc : Enc_COPROC_VX_cmp<0b1001001000000>; +class V6_veqh_and_enc : Enc_COPROC_VX_cmp<0b1001001000001>; +class V6_veqw_and_enc : Enc_COPROC_VX_cmp<0b1001001000010>; +class V6_vgtb_and_enc : Enc_COPROC_VX_cmp<0b1001001000100>; +class V6_vgth_and_enc : Enc_COPROC_VX_cmp<0b1001001000101>; +class V6_vgtw_and_enc : Enc_COPROC_VX_cmp<0b1001001000110>; +class V6_vgtub_and_enc : Enc_COPROC_VX_cmp<0b1001001001000>; +class V6_vgtuh_and_enc : Enc_COPROC_VX_cmp<0b1001001001001>; +class V6_vgtuw_and_enc : Enc_COPROC_VX_cmp<0b1001001001010>; +class V6_veqb_or_enc : Enc_COPROC_VX_cmp<0b1001001010000>; +class V6_veqh_or_enc : Enc_COPROC_VX_cmp<0b1001001010001>; +class V6_veqw_or_enc : Enc_COPROC_VX_cmp<0b1001001010010>; +class V6_vgtb_or_enc : Enc_COPROC_VX_cmp<0b1001001010100>; +class V6_vgth_or_enc : Enc_COPROC_VX_cmp<0b1001001010101>; +class V6_vgtw_or_enc : Enc_COPROC_VX_cmp<0b1001001010110>; +class V6_vgtub_or_enc : Enc_COPROC_VX_cmp<0b1001001011000>; +class V6_vgtuh_or_enc : Enc_COPROC_VX_cmp<0b1001001011001>; +class V6_vgtuw_or_enc : Enc_COPROC_VX_cmp<0b1001001011010>; +class V6_veqb_xor_enc : Enc_COPROC_VX_cmp<0b1001001100000>; +class V6_veqh_xor_enc : Enc_COPROC_VX_cmp<0b1001001100001>; +class V6_veqw_xor_enc : Enc_COPROC_VX_cmp<0b1001001100010>; +class V6_vgtb_xor_enc : Enc_COPROC_VX_cmp<0b1001001100100>; +class V6_vgth_xor_enc : Enc_COPROC_VX_cmp<0b1001001100101>; +class V6_vgtw_xor_enc : Enc_COPROC_VX_cmp<0b1001001100110>; +class V6_vgtub_xor_enc : Enc_COPROC_VX_cmp<0b1001001101000>; +class V6_vgtuh_xor_enc : Enc_COPROC_VX_cmp<0b1001001101001>; +class V6_vgtuw_xor_enc : Enc_COPROC_VX_cmp<0b1001001101010>; +class V6_veqb_enc : Enc_COPROC_VX_cmp<0b1111000000000>; +class V6_veqh_enc : Enc_COPROC_VX_cmp<0b1111000000001>; +class V6_veqw_enc : Enc_COPROC_VX_cmp<0b1111000000010>; +class V6_vgtb_enc : Enc_COPROC_VX_cmp<0b1111000000100>; +class V6_vgth_enc : Enc_COPROC_VX_cmp<0b1111000000101>; +class V6_vgtw_enc : Enc_COPROC_VX_cmp<0b1111000000110>; +class V6_vgtub_enc : Enc_COPROC_VX_cmp<0b1111000001000>; +class V6_vgtuh_enc : Enc_COPROC_VX_cmp<0b1111000001001>; +class V6_vgtuw_enc : Enc_COPROC_VX_cmp<0b1111000001010>; + +class Enc_COPROC_VX_p2op<bits<5> opc> : OpcodeHexagon { + bits<2> src1; + bits<5> dst; + bits<5> src2; + + let Inst{31-16} = { 0b00011110, src1{1-0}, 0b0000, opc{4-3} }; + let Inst{13-0} = { 1, src2{4-0}, opc{2-0}, dst{4-0} }; +} + +class V6_vaddbq_enc : Enc_COPROC_VX_p2op<0b01000>; +class V6_vaddhq_enc : Enc_COPROC_VX_p2op<0b01001>; +class V6_vaddwq_enc : Enc_COPROC_VX_p2op<0b01010>; +class V6_vaddbnq_enc : Enc_COPROC_VX_p2op<0b01011>; +class V6_vaddhnq_enc : Enc_COPROC_VX_p2op<0b01100>; +class V6_vaddwnq_enc : Enc_COPROC_VX_p2op<0b01101>; +class V6_vsubbq_enc : Enc_COPROC_VX_p2op<0b01110>; +class V6_vsubhq_enc : Enc_COPROC_VX_p2op<0b01111>; +class V6_vsubwq_enc : Enc_COPROC_VX_p2op<0b10000>; +class V6_vsubbnq_enc : Enc_COPROC_VX_p2op<0b10001>; +class V6_vsubhnq_enc : Enc_COPROC_VX_p2op<0b10010>; +class V6_vsubwnq_enc : Enc_COPROC_VX_p2op<0b10011>; + +class Enc_COPROC_VX_2op<bits<6> opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + + let Inst{31-16} = { 0b00011110000000, opc{5-4} }; + let Inst{13-0} = { opc{3}, src1{4-0}, opc{2-0}, dst{4-0} }; +} + +class V6_vabsh_enc : Enc_COPROC_VX_2op<0b000000>; +class V6_vabsh_sat_enc : Enc_COPROC_VX_2op<0b000001>; +class V6_vabsw_enc : Enc_COPROC_VX_2op<0b000010>; +class V6_vabsw_sat_enc : Enc_COPROC_VX_2op<0b000011>; +class V6_vnot_enc : Enc_COPROC_VX_2op<0b000100>; +class V6_vdealh_enc : Enc_COPROC_VX_2op<0b000110>; +class V6_vdealb_enc : Enc_COPROC_VX_2op<0b000111>; +class V6_vunpackob_enc : Enc_COPROC_VX_2op<0b001000>; +class V6_vunpackoh_enc : Enc_COPROC_VX_2op<0b001001>; +class V6_vunpackub_enc : Enc_COPROC_VX_2op<0b010000>; +class V6_vunpackuh_enc : Enc_COPROC_VX_2op<0b010001>; +class V6_vunpackb_enc : Enc_COPROC_VX_2op<0b010010>; +class V6_vunpackh_enc : Enc_COPROC_VX_2op<0b010011>; +class V6_vshuffh_enc : Enc_COPROC_VX_2op<0b010111>; +class V6_vshuffb_enc : Enc_COPROC_VX_2op<0b100000>; +class V6_vzb_enc : Enc_COPROC_VX_2op<0b100001>; +class V6_vzh_enc : Enc_COPROC_VX_2op<0b100010>; +class V6_vsb_enc : Enc_COPROC_VX_2op<0b100011>; +class V6_vsh_enc : Enc_COPROC_VX_2op<0b100100>; +class V6_vcl0w_enc : Enc_COPROC_VX_2op<0b100101>; +class V6_vpopcounth_enc : Enc_COPROC_VX_2op<0b100110>; +class V6_vcl0h_enc : Enc_COPROC_VX_2op<0b100111>; +class V6_vnormamtw_enc : Enc_COPROC_VX_2op<0b110100>; +class V6_vnormamth_enc : Enc_COPROC_VX_2op<0b110101>; +class V6_vassign_enc : Enc_COPROC_VX_2op<0b111111>; + +class Enc_COPROC_VMEM_vL32_b_ai<bits<4> opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + bits<10> src2; + bits<4> src2_vector; + + let src2_vector = src2{9-6}; + let Inst{31-16} = { 0b001010000, opc{3}, 0, src1{4-0} }; + let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, opc{2-0}, dst{4-0} }; +} + +class V6_vL32b_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b0000>; +class V6_vL32b_cur_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b0001>; +class V6_vL32b_tmp_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b0010>; +class V6_vL32Ub_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b0111>; +class V6_vL32b_nt_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b1000>; +class V6_vL32b_nt_cur_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b1001>; +class V6_vL32b_nt_tmp_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b1010>; + +class Enc_COPROC_VMEM_vL32_b_ai_128B<bits<4> opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + bits<11> src2; + bits<4> src2_vector; + + let src2_vector = src2{10-7}; + let Inst{31-16} = { 0b001010000, opc{3}, 0, src1{4-0} }; + let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, opc{2-0}, dst{4-0} }; +} + +class V6_vL32b_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b0000>; +class V6_vL32b_cur_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b0001>; +class V6_vL32b_tmp_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b0010>; +class V6_vL32Ub_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b0111>; +class V6_vL32b_nt_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b1000>; +class V6_vL32b_nt_cur_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b1001>; +class V6_vL32b_nt_tmp_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b1010>; + +class Enc_COPROC_VMEM_vS32_b_ai_64B<bits<4> opc> : OpcodeHexagon { + bits<5> src1; + bits<10> src2; + bits<4> src2_vector; + bits<5> src3; + + let src2_vector = src2{9-6}; + let Inst{31-16} = { 0b001010000, opc{3}, 1, src1{4-0} }; + let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, opc{2-0}, src3{4-0} }; +} + +class Enc_COPROC_VMEM_vS32_b_ai_128B<bits<4> opc> : OpcodeHexagon { + bits<5> src1; + bits<11> src2; + bits<4> src2_vector; + bits<5> src3; + + let src2_vector = src2{10-7}; + let Inst{31-16} = { 0b001010000, opc{3}, 1, src1{4-0} }; + let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, opc{2-0}, src3{4-0} }; +} + +class V6_vS32b_ai_enc : Enc_COPROC_VMEM_vS32_b_ai_64B<0b0000>; +class V6_vS32Ub_ai_enc : Enc_COPROC_VMEM_vS32_b_ai_64B<0b0111>; +class V6_vS32b_nt_ai_enc : Enc_COPROC_VMEM_vS32_b_ai_64B<0b1000>; + +class V6_vS32b_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_ai_128B<0b0000>; +class V6_vS32Ub_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_ai_128B<0b0111>; +class V6_vS32b_nt_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_ai_128B<0b1000>; + +class Enc_COPROC_VMEM_vS32b_n_ew_ai_64B<bits<1> opc> : OpcodeHexagon { + bits<5> src1; + bits<10> src2; + bits<4> src2_vector; + bits<3> src3; + + let src2_vector = src2{9-6}; + let Inst{31-16} = { 0b001010000, opc{0}, 1, src1{4-0} }; + let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, 0b00100, src3{2-0} }; +} + +class V6_vS32b_new_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_ai_64B<0>; +class V6_vS32b_nt_new_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_ai_64B<1>; + +class Enc_COPROC_VMEM_vS32b_n_ew_ai_128B<bits<1> opc> : OpcodeHexagon { + bits<5> src1; + bits<11> src2; + bits<4> src2_vector; + bits<3> src3; + + let src2_vector = src2{10-7}; + let Inst{31-16} = { 0b001010000, opc{0}, 1, src1{4-0} }; + let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, 0b00100, src3{2-0} }; +} + +class V6_vS32b_new_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_ai_128B<0>; +class V6_vS32b_nt_new_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_ai_128B<1>; + +class Enc_COPROC_VMEM_vS32_b_pred_ai<bits<5> opc> : OpcodeHexagon { + bits<2> src1; + bits<5> src2; + bits<10> src3; + bits<4> src3_vector; + bits<5> src4; + + let src3_vector = src3{9-6}; + let Inst{31-16} = { 0b001010001, opc{4-3}, src2{4-0} }; + let Inst{13-0} = { src3_vector{3}, src1{1-0}, src3_vector{2-0}, opc{2-0}, src4{4-0} }; +} + +class Enc_COPROC_VMEM_vS32_b_pred_ai_128B<bits<5> opc> : OpcodeHexagon { + bits<2> src1; + bits<5> src2; + bits<11> src3; + bits<4> src3_vector; + bits<5> src4; + + let src3_vector = src3{10-7}; + let Inst{31-16} = { 0b001010001, opc{4-3}, src2{4-0} }; + let Inst{13-0} = { src3_vector{3}, src1{1-0}, src3_vector{2-0}, opc{2-0}, src4{4-0} }; +} + +class V6_vS32b_qpred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b00000>; +class V6_vS32b_nqpred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b00001>; +class V6_vS32b_pred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b01000>; +class V6_vS32b_npred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b01001>; +class V6_vS32Ub_pred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b01110>; +class V6_vS32Ub_npred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b01111>; +class V6_vS32b_nt_qpred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b10000>; +class V6_vS32b_nt_nqpred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b10001>; +class V6_vS32b_nt_pred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b11000>; +class V6_vS32b_nt_npred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b11001>; + +class V6_vS32b_qpred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b00000>; +class V6_vS32b_nqpred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b00001>; +class V6_vS32b_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b01000>; +class V6_vS32b_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b01001>; +class V6_vS32Ub_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b01110>; +class V6_vS32Ub_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b01111>; +class V6_vS32b_nt_qpred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b10000>; +class V6_vS32b_nt_nqpred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b10001>; +class V6_vS32b_nt_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b11000>; +class V6_vS32b_nt_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b11001>; + +class Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<bits<4> opc> : OpcodeHexagon { + bits<2> src1; + bits<5> src2; + bits<10> src3; + bits<4> src3_vector; + bits<3> src4; + + let src3_vector = src3{9-6}; + let Inst{31-16} = { 0b001010001, opc{3}, 1, src2{4-0} }; + let Inst{13-0} = { src3_vector{3}, src1{1-0}, src3_vector{2-0}, 0b01, opc{2-0}, src4{2-0} }; +} + +class V6_vS32b_new_pred_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<0b0000>; +class V6_vS32b_new_npred_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<0b0101>; +class V6_vS32b_nt_new_pred_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<0b1010>; +class V6_vS32b_nt_new_npred_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<0b1111>; + +class Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<bits<4> opc> : OpcodeHexagon { + bits<2> src1; + bits<5> src2; + bits<11> src3; + bits<4> src3_vector; + bits<3> src4; + + let src3_vector = src3{10-7}; + let Inst{31-16} = { 0b001010001, opc{3}, 1, src2{4-0} }; + let Inst{13-0} = { src3_vector{3}, src1{1-0}, src3_vector{2-0}, 0b01, opc{2-0}, src4{2-0} }; +} + +class V6_vS32b_new_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<0b0000>; +class V6_vS32b_new_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<0b0101>; +class V6_vS32b_nt_new_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<0b1010>; +class V6_vS32b_nt_new_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<0b1111>; + +// TODO: Change script to generate dst, src1, src2 instead of +// dst, dst2, src1. +class Enc_COPROC_VMEM_vL32_b_pi<bits<4> opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + bits<9> src2; + bits<3> src2_vector; + + let src2_vector = src2{8-6}; + let Inst{31-16} = { 0b001010010, opc{3}, 0, src1{4-0} }; + let Inst{13-0} = { 0b000, src2_vector{2-0}, opc{2-0}, dst{4-0} }; +} + +class V6_vL32b_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b0000>; +class V6_vL32b_cur_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b0001>; +class V6_vL32b_tmp_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b0010>; +class V6_vL32Ub_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b0111>; +class V6_vL32b_nt_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b1000>; +class V6_vL32b_nt_cur_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b1001>; +class V6_vL32b_nt_tmp_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b1010>; + +class Enc_COPROC_VMEM_vL32_b_pi_128B<bits<4> opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + bits<10> src2; + bits<3> src2_vector; + + let src2_vector = src2{9-7}; + let Inst{31-16} = { 0b001010010, opc{3}, 0, src1{4-0} }; + let Inst{13-0} = { 0b000, src2_vector{2-0}, opc{2-0}, dst{4-0} }; +} + +class V6_vL32b_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b0000>; +class V6_vL32b_cur_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b0001>; +class V6_vL32b_tmp_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b0010>; +class V6_vL32Ub_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b0111>; +class V6_vL32b_nt_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b1000>; +class V6_vL32b_nt_cur_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b1001>; +class V6_vL32b_nt_tmp_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b1010>; + + +// TODO: Change script to generate src1, src2 and src3 instead of +// dst, src1, src2. +class Enc_COPROC_VMEM_vS32_b_pi<bits<4> opc> : OpcodeHexagon { + bits<5> src1; + bits<9> src2; + bits<3> src2_vector; + bits<5> src3; + + let src2_vector = src2{8-6}; + let Inst{31-16} = { 0b001010010, opc{3}, 1, src1{4-0} }; + let Inst{10-0} = {src2_vector{2-0}, opc{2-0}, src3{4-0} }; +} + +class V6_vS32b_pi_enc : Enc_COPROC_VMEM_vS32_b_pi<0b0000>; +class V6_vS32Ub_pi_enc : Enc_COPROC_VMEM_vS32_b_pi<0b0111>; +class V6_vS32b_nt_pi_enc : Enc_COPROC_VMEM_vS32_b_pi<0b1000>; + +class Enc_COPROC_VMEM_vS32_b_pi_128B<bits<4> opc> : OpcodeHexagon { + bits<5> src1; + bits<10> src2; + bits<3> src2_vector; + bits<5> src3; + + let src2_vector = src2{9-7}; + let Inst{31-16} = { 0b001010010, opc{3}, 1, src1{4-0} }; + let Inst{10-0} = {src2_vector{2-0}, opc{2-0}, src3{4-0} }; +} + +class V6_vS32b_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pi_128B<0b0000>; +class V6_vS32Ub_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pi_128B<0b0111>; +class V6_vS32b_nt_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pi_128B<0b1000>; + +// TODO: Change script to generate src1, src2 and src3 instead of +// dst, src1, src2. +class Enc_COPROC_VMEM_vS32b_n_ew_pi<bits<1> opc> : OpcodeHexagon { + bits<5> src1; + bits<9> src2; + bits<3> src2_vector; + bits<3> src3; + + let src2_vector = src2{8-6}; + let Inst{31-16} = { 0b001010010, opc{0}, 1, src1{4-0} }; + let Inst{13-0} = { 0b000, src2_vector{2-0}, 0b00100, src3{2-0} }; +} + +class V6_vS32b_new_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pi<0>; +class V6_vS32b_nt_new_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pi<1>; + +class Enc_COPROC_VMEM_vS32b_n_ew_pi_128B<bits<1> opc> : OpcodeHexagon { + bits<5> src1; + bits<10> src2; + bits<3> src2_vector; + bits<3> src3; + + let src2_vector = src2{9-7}; + let Inst{31-16} = { 0b001010010, opc{0}, 1, src1{4-0} }; + let Inst{13-0} = { 0b000, src2_vector{2-0}, 0b00100, src3{2-0} }; +} + +class V6_vS32b_new_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pi_128B<0>; +class V6_vS32b_nt_new_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pi_128B<1>; + +// TODO: Change script to generate src1, src2,src3 and src4 instead of +// dst, src1, src2, src3. +class Enc_COPROC_VMEM_vS32_b_pred_pi<bits<5> opc> : OpcodeHexagon { + bits<2> src1; + bits<5> src2; + bits<9> src3; + bits<3> src3_vector; + bits<5> src4; + + let src3_vector = src3{8-6}; + let Inst{31-16} = { 0b001010011, opc{4-3}, src2{4-0} }; + let Inst{13-0} = { 0, src1{1-0}, src3_vector{2-0}, opc{2-0}, src4{4-0} }; +} + +class V6_vS32b_qpred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b00000>; +class V6_vS32b_nqpred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b00001>; +class V6_vS32b_pred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b01000>; +class V6_vS32b_npred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b01001>; +class V6_vS32Ub_pred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b01110>; +class V6_vS32Ub_npred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b01111>; +class V6_vS32b_nt_qpred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b10000>; +class V6_vS32b_nt_nqpred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b10001>; +class V6_vS32b_nt_pred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b11000>; +class V6_vS32b_nt_npred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b11001>; + +// TODO: Change script to generate src1, src2,src3 and src4 instead of +// dst, src1, src2, src3. +class Enc_COPROC_VMEM_vS32_b_pred_pi_128B<bits<5> opc> : OpcodeHexagon { + bits<2> src1; + bits<5> src2; + bits<10> src3; + bits<3> src3_vector; + bits<5> src4; + + let src3_vector = src3{9-7}; + let Inst{31-16} = { 0b001010011, opc{4-3}, src2{4-0} }; + let Inst{13-0} = { 0, src1{1-0}, src3_vector{2-0}, opc{2-0}, src4{4-0} }; +} + +class V6_vS32b_qpred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b00000>; +class V6_vS32b_nqpred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b00001>; +class V6_vS32b_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b01000>; +class V6_vS32b_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b01001>; +class V6_vS32Ub_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b01110>; +class V6_vS32Ub_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b01111>; +class V6_vS32b_nt_qpred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b10000>; +class V6_vS32b_nt_nqpred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b10001>; +class V6_vS32b_nt_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b11000>; +class V6_vS32b_nt_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b11001>; + +class Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<bits<4> opc> : OpcodeHexagon { + bits<2> src1; + bits<5> src2; + bits<9> src3; + bits<3> src3_vector; + bits<3> src4; + + let src3_vector = src3{8-6}; + let Inst{31-16} = { 0b001010011, opc{3}, 1, src2{4-0} }; + let Inst{13-0} = { 0, src1{1-0}, src3_vector{2-0}, 0b01, opc{2-0}, src4{2-0} }; +} + +class V6_vS32b_new_pred_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<0b0000>; +class V6_vS32b_new_npred_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<0b0101>; +class V6_vS32b_nt_new_pred_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<0b1010>; +class V6_vS32b_nt_new_npred_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<0b1111>; + +class Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<bits<4> opc> : OpcodeHexagon { + bits<2> src1; + bits<5> src2; + bits<10> src3; + bits<3> src3_vector; + bits<3> src4; + + let src3_vector = src3{9-7}; + let Inst{31-16} = { 0b001010011, opc{3}, 1, src2{4-0} }; + let Inst{13-0} = { 0, src1{1-0}, src3_vector{2-0}, 0b01, opc{2-0}, src4{2-0} }; +} + +class V6_vS32b_new_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<0b0000>; +class V6_vS32b_new_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<0b0101>; +class V6_vS32b_nt_new_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<0b1010>; +class V6_vS32b_nt_new_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<0b1111>; + +class Enc_LD_load_m<bits<13> opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + bits<1> src2; + + let Inst{31-16} = { opc{12}, 0, opc{11-10}, 1, opc{9-4}, src1{4-0} }; + let Inst{13-0} = { src2{0}, 0b000, opc{3}, 0, opc{2-0}, dst{4-0} }; +} + +class V6_vL32b_ppu_enc : Enc_LD_load_m<0b0100110000000>; +class V6_vL32b_cur_ppu_enc : Enc_LD_load_m<0b0100110000001>; +class V6_vL32b_tmp_ppu_enc : Enc_LD_load_m<0b0100110000010>; +class V6_vL32Ub_ppu_enc : Enc_LD_load_m<0b0100110000111>; +class V6_vL32b_nt_ppu_enc : Enc_LD_load_m<0b0100110100000>; +class V6_vL32b_nt_cur_ppu_enc : Enc_LD_load_m<0b0100110100001>; +class V6_vL32b_nt_tmp_ppu_enc : Enc_LD_load_m<0b0100110100010>; + +class Enc_COPROC_VMEM_vS32_b_ppu<bits<4> opc> : OpcodeHexagon { + bits<5> src1; + bits<1> src2; + bits<5> src3; + + let Inst{31-16} = { 0b001010110, opc{3}, 1, src1{4-0} }; + let Inst{13-0} = { src2{0}, 0b00000, opc{2-0}, src3{4-0} }; +} + +class V6_vS32b_ppu_enc : Enc_COPROC_VMEM_vS32_b_ppu<0b0000>; +class V6_vS32Ub_ppu_enc : Enc_COPROC_VMEM_vS32_b_ppu<0b0111>; +class V6_vS32b_nt_ppu_enc : Enc_COPROC_VMEM_vS32_b_ppu<0b1000>; + +class Enc_COPROC_VMEM_vS32b_new_ppu<bits<1> opc> : OpcodeHexagon { + bits<5> src1; + bits<1> src2; + bits<3> src3; + + let Inst{31-16} = { 0b001010110, opc{0}, 1, src1{4-0} }; + let Inst{13-0} = { src2{0}, 0b0000000100, src3{2-0} }; +} + +class V6_vS32b_new_ppu_enc : Enc_COPROC_VMEM_vS32b_new_ppu<0>; +class V6_vS32b_nt_new_ppu_enc : Enc_COPROC_VMEM_vS32b_new_ppu<1>; + +class Enc_COPROC_VMEM_vS32_b_pred_ppu<bits<5> opc> : OpcodeHexagon { + bits<2> src1; + bits<5> src2; + bits<1> src3; + bits<5> src4; + + let Inst{31-16} = { 0b001010111, opc{4-3}, src2{4-0} }; + let Inst{13-0} = { src3{0}, src1{1-0}, 0b000, opc{2-0}, src4{4-0} }; +} + +class V6_vS32b_qpred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b00000>; +class V6_vS32b_nqpred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b00001>; +class V6_vS32b_pred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b01000>; +class V6_vS32b_npred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b01001>; +class V6_vS32Ub_pred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b01110>; +class V6_vS32Ub_npred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b01111>; +class V6_vS32b_nt_qpred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b10000>; +class V6_vS32b_nt_nqpred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b10001>; +class V6_vS32b_nt_pred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b11000>; +class V6_vS32b_nt_npred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b11001>; + +class Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<bits<4> opc> : OpcodeHexagon { + bits<2> src1; + bits<5> src2; + bits<1> src3; + bits<3> src4; + + let Inst{31-16} = { 0b001010111, opc{3}, 1, src2{4-0} }; + let Inst{13-0} = { src3{0}, src1{1-0}, 0b00001, opc{2-0}, src4{2-0} }; +} + +class V6_vS32b_new_pred_ppu_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<0b0000>; +class V6_vS32b_new_npred_ppu_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<0b0101>; +class V6_vS32b_nt_new_pred_ppu_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<0b1010>; +class V6_vS32b_nt_new_npred_ppu_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<0b1111>; + + +class Enc_COPROC_VX_4op_i<bits<5> opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + bits<5> src2; + bits<1> src3; + + let Inst{31-16} = { 0b00011001, opc{4-2}, src2{4-0} }; + let Inst{13-0} = { opc{1}, src1{4-0}, 1, opc{0}, src3{0}, dst{4-0} }; +} + +class V6_vrmpybusi_enc : Enc_COPROC_VX_4op_i<0b01000>; +class V6_vrsadubi_enc : Enc_COPROC_VX_4op_i<0b01001>; +class V6_vrmpybusi_acc_enc : Enc_COPROC_VX_4op_i<0b01010>; +class V6_vrsadubi_acc_enc : Enc_COPROC_VX_4op_i<0b01011>; +class V6_vrmpyubi_acc_enc : Enc_COPROC_VX_4op_i<0b01111>; +class V6_vrmpyubi_enc : Enc_COPROC_VX_4op_i<0b10101>; + +class Enc_COPROC_VX_vandqrt<bits<5> opc> : OpcodeHexagon { + bits<5> dst; + bits<2> src1; + bits<5> src2; + + let Inst{31-16} = { 0b00011001, opc{4-3}, 1, src2{4-0} }; + let Inst{13-0} = { opc{2}, 0b000, src1{1-0}, opc{1-0}, 1, dst{4-0} }; +} + +class V6_vandqrt_acc_enc : Enc_COPROC_VX_vandqrt<0b01101>; +class V6_vandqrt_enc : Enc_COPROC_VX_vandqrt<0b10010>; + +class Enc_COPROC_VX_cards<bits<2> opc> : OpcodeHexagon { + bits<5> src1; + bits<5> src2; + bits<5> src3; + + let Inst{31-16} = { 0b00011001111, src3{4-0} }; + let Inst{13-0} = { 1, src1{4-0}, 0, opc{1-0}, src2{4-0} }; +} + +class V6_vshuff_enc : Enc_COPROC_VX_cards<0b01>; +class V6_vdeal_enc : Enc_COPROC_VX_cards<0b10>; + + +class Enc_COPROC_VX_v_cmov<bits<1> opc> : OpcodeHexagon { + bits<2> src1; + bits<5> dst; + bits<5> src2; + + let Inst{31-16} = { 0b0001101000, opc{0}, 0b00000 }; + let Inst{13-0} = { 0, src2{4-0}, 0, src1{1-0}, dst{4-0} }; +} + +class V6_vcmov_enc : Enc_COPROC_VX_v_cmov<0>; +class V6_vncmov_enc : Enc_COPROC_VX_v_cmov<1>; + +class Enc_X_p3op<bits<8> opc> : OpcodeHexagon { + bits<2> src1; + bits<5> dst; + bits<5> src2; + bits<5> src3; + + let Inst{31-16} = { opc{7-5}, 0b1101, opc{4}, 0, opc{3-2}, src3{4-0} }; + let Inst{13-0} = { opc{1}, src2{4-0}, opc{0}, src1{1-0}, dst{4-0} }; +} + +class V6_vnccombine_enc : Enc_X_p3op<0b00001000>; +class V6_vccombine_enc : Enc_X_p3op<0b00001100>; + +class Enc_COPROC_VX_4op_r<bits<4> opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + bits<5> src2; + bits<3> src3; + + let Inst{31-16} = { 0b00011011, src2{4-0}, src3{2-0} }; + let Inst{13-0} = { opc{3}, src1{4-0}, opc{2-0}, dst{4-0} }; +} + +class V6_valignb_enc : Enc_COPROC_VX_4op_r<0b0000>; +class V6_vlalignb_enc : Enc_COPROC_VX_4op_r<0b0001>; +class V6_vasrwh_enc : Enc_COPROC_VX_4op_r<0b0010>; +class V6_vasrwhsat_enc : Enc_COPROC_VX_4op_r<0b0011>; +class V6_vasrwhrndsat_enc : Enc_COPROC_VX_4op_r<0b0100>; +class V6_vasrwuhsat_enc : Enc_COPROC_VX_4op_r<0b0101>; +class V6_vasrhubsat_enc : Enc_COPROC_VX_4op_r<0b0110>; +class V6_vasrhubrndsat_enc : Enc_COPROC_VX_4op_r<0b0111>; +class V6_vasrhbrndsat_enc : Enc_COPROC_VX_4op_r<0b1000>; +class V6_vlutvvb_enc : Enc_COPROC_VX_4op_r<0b1001>; +class V6_vshuffvdd_enc : Enc_COPROC_VX_4op_r<0b1011>; +class V6_vdealvdd_enc : Enc_COPROC_VX_4op_r<0b1100>; +class V6_vlutvvb_oracc_enc : Enc_COPROC_VX_4op_r<0b1101>; +class V6_vlutvwh_enc : Enc_COPROC_VX_4op_r<0b1110>; +class V6_vlutvwh_oracc_enc : Enc_COPROC_VX_4op_r<0b1111>; + +class Enc_S_3op_valign_i<bits<9> opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + bits<5> src2; + bits<3> src3; + + let Inst{31-16} = { opc{8-7}, 0, opc{6-3}, 0b00, opc{2-1}, src2{4-0} }; + let Inst{13-0} = { opc{0}, src1{4-0}, src3{2-0}, dst{4-0} }; +} + +class V6_vlutb_enc : Enc_S_3op_valign_i<0b001100000>; +class V6_vlutb_dv_enc : Enc_S_3op_valign_i<0b001100010>; +class V6_vlutb_acc_enc : Enc_S_3op_valign_i<0b001100100>; +class V6_vlutb_dv_acc_enc : Enc_S_3op_valign_i<0b001100110>; +class V6_valignbi_enc : Enc_S_3op_valign_i<0b001111011>; +class V6_vlalignbi_enc : Enc_S_3op_valign_i<0b001111111>; +class S2_valignib_enc : Enc_S_3op_valign_i<0b110000000>; +class S2_addasl_rrri_enc : Enc_S_3op_valign_i<0b110010000>; + +class Enc_COPROC_VX_3op_q<bits<3> opc> : OpcodeHexagon { + bits<2> dst; + bits<2> src1; + bits<2> src2; + + let Inst{31-16} = { 0b00011110, src2{1-0}, 0b000011 }; + let Inst{13-0} = { 0b0000, src1{1-0}, 0b000, opc{2-0}, dst{1-0} }; +} + +class V6_pred_and_enc : Enc_COPROC_VX_3op_q<0b000>; +class V6_pred_or_enc : Enc_COPROC_VX_3op_q<0b001>; +class V6_pred_xor_enc : Enc_COPROC_VX_3op_q<0b011>; +class V6_pred_or_n_enc : Enc_COPROC_VX_3op_q<0b100>; +class V6_pred_and_n_enc : Enc_COPROC_VX_3op_q<0b101>; + +class V6_pred_not_enc : OpcodeHexagon { + bits<2> dst; + bits<2> src1; + + let Inst{31-16} = { 0b0001111000000011 }; + let Inst{13-0} = { 0b0000, src1{1-0}, 0b000010, dst{1-0} }; +} + +class Enc_COPROC_VX_4op_q<bits<1> opc> : OpcodeHexagon { + bits<5> dst; + bits<2> src1; + bits<5> src2; + bits<5> src3; + + let Inst{31-16} = { 0b000111101, opc{0}, 1, src3{4-0} }; + let Inst{13-0} = { 1, src2{4-0}, 0, src1{1-0}, dst{4-0} }; +} + +class V6_vswap_enc : Enc_COPROC_VX_4op_q<0>; +class V6_vmux_enc : Enc_COPROC_VX_4op_q<1>; + +class Enc_X_2op<bits<16> opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + + let Inst{31-16} = { opc{15-5}, src1{4-0} }; + let Inst{13-0} = { opc{4-3}, 0b0000, opc{2-0}, dst{4-0} }; +} + +class V6_lvsplatw_enc : Enc_X_2op<0b0001100110100001>; +class V6_vinsertwr_enc : Enc_X_2op<0b0001100110110001>; +class S6_vsplatrbp_enc : Enc_X_2op<0b1000010001000100>; + + +class Enc_CR_2op_r<bits<12> opc> : OpcodeHexagon { + bits<2> dst; + bits<5> src1; + + let Inst{31-16} = { opc{11}, 0, opc{10-7}, 0, opc{6-3}, src1{4-0} }; + let Inst{13-0} = { opc{2}, 0b000000, opc{1}, 0b000, opc{0}, dst{1-0} }; +} + +class V6_pred_scalar2_enc : Enc_CR_2op_r<0b001101101011>; +class Y5_l2locka_enc : Enc_CR_2op_r<0b110000111100>; + +class Enc_S_3op_i6<bits<9> opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + bits<6> src2; + + let Inst{31-16} = { 0b1000, opc{8-6}, 0, opc{5-3}, src1{4-0} }; + let Inst{13-0} = { src2{5-0}, opc{2-0}, dst{4-0} }; +} + +class S6_rol_i_p_enc : Enc_S_3op_i6<0b000000011>; +class S6_rol_i_p_nac_enc : Enc_S_3op_i6<0b001000011>; +class S6_rol_i_p_acc_enc : Enc_S_3op_i6<0b001000111>; +class S6_rol_i_p_and_enc : Enc_S_3op_i6<0b001010011>; +class S6_rol_i_p_or_enc : Enc_S_3op_i6<0b001010111>; +class S6_rol_i_p_xacc_enc : Enc_S_3op_i6<0b001100011>; + +class Enc_X_3op_r<bits<15> opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + bits<5> src2; + + let Inst{31-16} = { opc{14-4}, src1{4-0} }; + let Inst{13-0} = { opc{3}, src2{4-0}, opc{2-0}, dst{4-0} }; +} + +class S6_rol_i_r_enc : Enc_X_3op_r<0b100011000000011>; +class S6_rol_i_r_nac_enc : Enc_X_3op_r<0b100011100000011>; +class S6_rol_i_r_acc_enc : Enc_X_3op_r<0b100011100000111>; +class S6_rol_i_r_and_enc : Enc_X_3op_r<0b100011100100011>; +class S6_rol_i_r_or_enc : Enc_X_3op_r<0b100011100100111>; +class S6_rol_i_r_xacc_enc : Enc_X_3op_r<0b100011101000011>; +class S6_vtrunehb_ppp_enc : Enc_X_3op_r<0b110000011000011>; +class S6_vtrunohb_ppp_enc : Enc_X_3op_r<0b110000011000101>; + +class Enc_no_operands<bits<25> opc> : OpcodeHexagon { + + let Inst{31-16} = { opc{24-10}, 0 }; + let Inst{13-0} = { opc{9-7}, 0b000, opc{6-0}, 0 }; +} + +class Y5_l2gunlock_enc : Enc_no_operands<0b1010100000100000010000000>; +class Y5_l2gclean_enc : Enc_no_operands<0b1010100000100000100000000>; +class Y5_l2gcleaninv_enc : Enc_no_operands<0b1010100000100000110000000>; +class V6_vhist_enc : Enc_no_operands<0b0001111000000001001000000>; + +class Enc_J_jumpr<bits<13> opc> : OpcodeHexagon { + bits<5> src1; + + let Inst{31-16} = { opc{12-6}, 0, opc{5-3}, src1{4-0} }; + let Inst{13-0} = { 0b00, opc{2}, 0b0000, opc{1-0}, 0b00000 }; +} + +class Y5_l2unlocka_enc : Enc_J_jumpr<0b1010011011000>; +class Y2_l2cleaninvidx_enc : Enc_J_jumpr<0b1010100011000>; + +class Enc_ST_l2gclean_pa<bits<2> opc> : OpcodeHexagon { + bits<5> src1; + + let Inst{31-16} = { 0b101001101, opc{1-0}, 0b00000 }; + let Inst{13-0} = { 0, src1{4-0}, 0b00000000 }; +} + +class Y6_l2gcleanpa_enc : Enc_ST_l2gclean_pa<0b01>; +class Y6_l2gcleaninvpa_enc : Enc_ST_l2gclean_pa<0b10>; + +class A5_ACS_enc : OpcodeHexagon { + bits<5> dst1; + bits<2> dst2; + bits<5> src1; + bits<5> src2; + + let Inst{31-16} = { 0b11101010101, src1{4-0} }; + let Inst{13-0} = { 0, src2{4-0}, 0, dst2{1-0}, dst1{4-0} }; +} + +class Enc_X_4op_r<bits<8> opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + bits<5> src2; + bits<2> src3; + + let Inst{31-16} = { 0b11, opc{7}, 0, opc{6-5}, 1, opc{4-1}, src1{4-0} }; + let Inst{13-0} = { 0, src2{4-0}, opc{0}, src3{1-0}, dst{4-0} }; +} + +class S2_vsplicerb_enc : Enc_X_4op_r<0b00001000>; +class S2_cabacencbin_enc : Enc_X_4op_r<0b00001010>; +class F2_sffma_sc_enc : Enc_X_4op_r<0b11110111>; + +class V6_vhistq_enc : OpcodeHexagon { + bits<2> src1; + + let Inst{31-16} = { 0b00011110, src1{1-0}, 0b000010 }; + let Inst{13-0} = { 0b10000010000000 }; +} + +// TODO: Change script to generate dst1 instead of dst. +class A6_vminub_RdP_enc : OpcodeHexagon { + bits<5> dst1; + bits<2> dst2; + bits<5> src1; + bits<5> src2; + + let Inst{31-16} = { 0b11101010111, src2{4-0} }; + let Inst{13-0} = { 0, src1{4-0}, 0, dst2{1-0}, dst1{4-0} }; +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td index 44bab292f32c..3c5ec1701dc2 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td @@ -34,6 +34,8 @@ class SubTarget<bits<6> value> { def HasAnySubT : SubTarget<0x3f>; // 111111 def HasV5SubT : SubTarget<0x3e>; // 111110 +def HasV55SubT : SubTarget<0x3c>; // 111100 +def HasV60SubT : SubTarget<0x38>; // 111000 // Addressing modes for load/store instructions class AddrModeType<bits<3> value> { @@ -57,6 +59,8 @@ def ByteAccess : MemAccessSize<1>;// Byte access instruction (memb). def HalfWordAccess : MemAccessSize<2>;// Half word access instruction (memh). def WordAccess : MemAccessSize<3>;// Word access instruction (memw). def DoubleWordAccess : MemAccessSize<4>;// Double word access instruction (memd) +def Vector64Access : MemAccessSize<7>;// Vector access instruction (memv) +def Vector128Access : MemAccessSize<8>;// Vector access instruction (memv) //===----------------------------------------------------------------------===// @@ -167,14 +171,23 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern, bits<1> isFP = 0; let TSFlags {48} = isFP; // Floating-point. + bits<1> hasNewValue2 = 0; + let TSFlags{50} = hasNewValue2; // Second New-value producer insn. + bits<3> opNewValue2 = 0; + let TSFlags{53-51} = opNewValue2; // Second New-value produced operand. + + bits<1> isAccumulator = 0; + let TSFlags{54} = isAccumulator; + // Fields used for relation models. + bit isNonTemporal = 0; + string isNT = ""; // set to "true" for non-temporal vector stores. string BaseOpcode = ""; string CextOpcode = ""; string PredSense = ""; string PNewValue = ""; string NValueST = ""; // Set to "true" for new-value stores. string InputType = ""; // Input is "imm" or "reg" type. - string isMEMri = "false"; // Set to "true" for load/store with MEMri operand. string isFloat = "false"; // Set to "true" for the floating-point load/store. string isBrTaken = !if(isTaken, "true", "false"); // Set to "true"/"false" for jump instructions @@ -182,6 +195,7 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern, ""); let PNewValue = !if(isPredicatedNew, "new", ""); let NValueST = !if(isNVStore, "true", "false"); + let isNT = !if(isNonTemporal, "true", "false"); // *** Must match MCTargetDesc/HexagonBaseInfo.h *** } @@ -217,6 +231,11 @@ class LD0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin=LD_tc_ld_SLOT0> : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>, OpcodeHexagon; +let mayLoad = 1 in +class LD1Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin=LD_tc_ld_SLOT0> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>; + // ST Instruction Class in V2/V3 can take SLOT0 only. // ST Instruction Class in V4 can take SLOT0 & SLOT1. // Definition of the instruction class CHANGED from V2/V3 to V4. @@ -234,6 +253,12 @@ class ST0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = ST_tc_ld_SLOT0> : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>, OpcodeHexagon; +// Same as ST0Inst but doesn't derive from OpcodeHexagon. +let mayStore = 1 in +class ST1Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = ST_tc_st_SLOT0> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>; + // ST Instruction Class in V2/V3 can take SLOT0 only. // ST Instruction Class in V4 can take SLOT0 & SLOT1. // Definition of the instruction class CHANGED from V2/V3 to V4. @@ -277,6 +302,11 @@ class MInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>, OpcodeHexagon; +// Same as above but doesn't derive from OpcodeHexagon +class MInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = M_tc_3x_SLOT23> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>; + // M Instruction Class in V2/V3. // XTYPE Instruction Class in V4. // Definition of the instruction class NOT CHANGED. @@ -294,6 +324,10 @@ class SInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>, OpcodeHexagon; +class SInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = S_2op_tc_1_SLOT23> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>; + // S Instruction Class in V2/V3. // XTYPE Instruction Class in V4. // Definition of the instruction class NOT CHANGED. @@ -402,3 +436,13 @@ include "HexagonInstrFormatsV4.td" //===----------------------------------------------------------------------===// // V4 Instruction Format Definitions + //===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// V60 Instruction Format Definitions + +//===----------------------------------------------------------------------===// + +include "HexagonInstrFormatsV60.td" + +//===----------------------------------------------------------------------===// +// V60 Instruction Format Definitions + +//===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td index db83ef6bc474..2d1dea526eed 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td @@ -21,8 +21,6 @@ def TypeMEMOP : IType<9>; def TypeNV : IType<10>; def TypeDUPLEX : IType<11>; def TypeCOMPOUND : IType<12>; -def TypeAG_VX : IType<28>; -def TypeAG_VM : IType<29>; def TypePREFIX : IType<30>; // Duplex Instruction Class Declaration diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV60.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV60.td new file mode 100644 index 000000000000..f3d43dec733e --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV60.td @@ -0,0 +1,238 @@ +//==- HexagonInstrFormatsV60.td - Hexagon Instruction Formats -*- tablegen -==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hexagon V60 instruction classes in TableGen format. +// +//===----------------------------------------------------------------------===// + +//----------------------------------------------------------------------------// +// Hexagon Intruction Flags + +// +// *** Must match BaseInfo.h *** +//----------------------------------------------------------------------------// + +def TypeCVI_VA : IType<13>; +def TypeCVI_VA_DV : IType<14>; +def TypeCVI_VX : IType<15>; +def TypeCVI_VX_DV : IType<16>; +def TypeCVI_VP : IType<17>; +def TypeCVI_VP_VS : IType<18>; +def TypeCVI_VS : IType<19>; +def TypeCVI_VINLANESAT : IType<20>; +def TypeCVI_VM_LD : IType<21>; +def TypeCVI_VM_TMP_LD : IType<22>; +def TypeCVI_VM_CUR_LD : IType<23>; +def TypeCVI_VM_VP_LDU : IType<24>; +def TypeCVI_VM_ST : IType<25>; +def TypeCVI_VM_NEW_ST : IType<26>; +def TypeCVI_VM_STU : IType<27>; +def TypeCVI_HIST : IType<28>; +//----------------------------------------------------------------------------// +// Intruction Classes Definitions + +//----------------------------------------------------------------------------// + +let validSubTargets = HasV60SubT in +{ +class CVI_VA_Resource<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VA> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VA>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VA_DV_Resource<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VA_DV> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VA_DV>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VX_Resource_long<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VX_LONG> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VX_Resource_late<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VX_LATE> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX>, + Requires<[HasV60T, UseHVX]>; + +class CVI_VX_Resource<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VX> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VX_DV_Resource<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VX_DV> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX_DV>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VX_DV_Slot2_Resource<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VX_DV_SLOT2> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX_DV>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VX_DV_Resource_long<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VX_DV_LONG> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX_DV>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VP_Resource_long<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VP_LONG> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VP>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VP_VS_Resource_early<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VP_VS_EARLY> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VP_VS>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VP_VS_Resource_long<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VP_VS_LONG> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VP_VS>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VP_VS_Resource_long_early<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VP_VS_LONG_EARLY> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VP_VS>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VS_Resource<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VS> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VS>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VINLANESAT_Resource<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VINLANESAT> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VINLANESAT>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VS_Resource_long<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VS> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VS>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VM_LD_Resource<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VM_LD> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_LD>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VM_LD_Resource_long<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VM_LD> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_LD>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VM_TMP_LD_Resource<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VM_TMP_LD> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_TMP_LD>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VM_TMP_LD_Resource_long<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VM_TMP_LD> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_TMP_LD>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VM_CUR_LD_Resource<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VM_CUR_LD> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_CUR_LD>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VM_VP_LDU_Resource<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VM_VP_LDU> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_VP_LDU>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VM_VP_LDU_Resource_long<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VM_VP_LDU> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_VP_LDU>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VM_ST_Resource<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VM_ST> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_ST>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VM_ST_Resource_long<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VM_ST> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_ST>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VM_NEW_ST_Resource<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VM_NEW_ST> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_NEW_ST>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VM_NEW_ST_Resource_long<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VM_NEW_ST> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_NEW_ST>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VM_STU_Resource<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VM_STU> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_STU>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VM_STU_Resource_long<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VM_STU> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_STU>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_HIST_Resource<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_HIST> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_HIST>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; +} + +let validSubTargets = HasV60SubT in +{ +class CVI_VA_Resource1<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VA> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VA>, + Requires<[HasV60T, UseHVX]>; + +class CVI_VX_DV_Resource1<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VX_DV> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX_DV>, + Requires<[HasV60T, UseHVX]>; + +class CVI_HIST_Resource1<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_HIST> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_HIST>, + Requires<[HasV60T, UseHVX]>; +} + + diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index 3cb082349b41..eb3590cb1076 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -23,9 +23,11 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include <cctype> using namespace llvm; @@ -36,9 +38,41 @@ using namespace llvm; #include "HexagonGenInstrInfo.inc" #include "HexagonGenDFAPacketizer.inc" +using namespace llvm; + +cl::opt<bool> ScheduleInlineAsm("hexagon-sched-inline-asm", cl::Hidden, + cl::init(false), cl::desc("Do not consider inline-asm a scheduling/" + "packetization boundary.")); + +static cl::opt<bool> EnableBranchPrediction("hexagon-enable-branch-prediction", + cl::Hidden, cl::init(true), cl::desc("Enable branch prediction")); + +static cl::opt<bool> DisableNVSchedule("disable-hexagon-nv-schedule", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Disable schedule adjustment for new value stores.")); + +static cl::opt<bool> EnableTimingClassLatency( + "enable-timing-class-latency", cl::Hidden, cl::init(false), + cl::desc("Enable timing class latency")); + +static cl::opt<bool> EnableALUForwarding( + "enable-alu-forwarding", cl::Hidden, cl::init(true), + cl::desc("Enable vec alu forwarding")); + +static cl::opt<bool> EnableACCForwarding( + "enable-acc-forwarding", cl::Hidden, cl::init(true), + cl::desc("Enable vec acc forwarding")); + +static cl::opt<bool> BranchRelaxAsmLarge("branch-relax-asm-large", + cl::init(true), cl::Hidden, cl::ZeroOrMore, cl::desc("branch relax asm")); + /// /// Constants for Hexagon instructions. /// +const int Hexagon_MEMV_OFFSET_MAX_128B = 2047; // #s7 +const int Hexagon_MEMV_OFFSET_MIN_128B = -2048; // #s7 +const int Hexagon_MEMV_OFFSET_MAX = 1023; // #s6 +const int Hexagon_MEMV_OFFSET_MIN = -1024; // #s6 const int Hexagon_MEMW_OFFSET_MAX = 4095; const int Hexagon_MEMW_OFFSET_MIN = -4096; const int Hexagon_MEMD_OFFSET_MAX = 8191; @@ -57,71 +91,49 @@ const int Hexagon_MEMH_AUTOINC_MAX = 14; const int Hexagon_MEMH_AUTOINC_MIN = -16; const int Hexagon_MEMB_AUTOINC_MAX = 7; const int Hexagon_MEMB_AUTOINC_MIN = -8; +const int Hexagon_MEMV_AUTOINC_MAX = 192; +const int Hexagon_MEMV_AUTOINC_MIN = -256; +const int Hexagon_MEMV_AUTOINC_MAX_128B = 384; +const int Hexagon_MEMV_AUTOINC_MIN_128B = -512; // Pin the vtable to this file. void HexagonInstrInfo::anchor() {} HexagonInstrInfo::HexagonInstrInfo(HexagonSubtarget &ST) : HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP), - RI(), Subtarget(ST) {} + RI() {} -/// isLoadFromStackSlot - If the specified machine instruction is a direct -/// load from a stack slot, return the virtual or physical register number of -/// the destination along with the FrameIndex of the loaded stack slot. If -/// not, return 0. This predicate must return 0 if the instruction has -/// any side effects other than loading from the stack slot. -unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const { +static bool isIntRegForSubInst(unsigned Reg) { + return (Reg >= Hexagon::R0 && Reg <= Hexagon::R7) || + (Reg >= Hexagon::R16 && Reg <= Hexagon::R23); +} - switch (MI->getOpcode()) { - default: break; - case Hexagon::L2_loadri_io: - case Hexagon::L2_loadrd_io: - case Hexagon::L2_loadrh_io: - case Hexagon::L2_loadrb_io: - case Hexagon::L2_loadrub_io: - if (MI->getOperand(2).isFI() && - MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) { - FrameIndex = MI->getOperand(2).getIndex(); - return MI->getOperand(0).getReg(); - } - break; - } - return 0; + +static bool isDblRegForSubInst(unsigned Reg, const HexagonRegisterInfo &HRI) { + return isIntRegForSubInst(HRI.getSubReg(Reg, Hexagon::subreg_loreg)) && + isIntRegForSubInst(HRI.getSubReg(Reg, Hexagon::subreg_hireg)); } -/// isStoreToStackSlot - If the specified machine instruction is a direct -/// store to a stack slot, return the virtual or physical register number of -/// the source reg along with the FrameIndex of the loaded stack slot. If -/// not, return 0. This predicate must return 0 if the instruction has -/// any side effects other than storing to the stack slot. -unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const { - switch (MI->getOpcode()) { - default: break; - case Hexagon::S2_storeri_io: - case Hexagon::S2_storerd_io: - case Hexagon::S2_storerh_io: - case Hexagon::S2_storerb_io: - if (MI->getOperand(2).isFI() && - MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) { - FrameIndex = MI->getOperand(0).getIndex(); - return MI->getOperand(2).getReg(); - } - break; +/// Calculate number of instructions excluding the debug instructions. +static unsigned nonDbgMICount(MachineBasicBlock::const_instr_iterator MIB, + MachineBasicBlock::const_instr_iterator MIE) { + unsigned Count = 0; + for (; MIB != MIE; ++MIB) { + if (!MIB->isDebugValue()) + ++Count; } - return 0; + return Count; } -// Find the hardware loop instruction used to set-up the specified loop. -// On Hexagon, we have two instructions used to set-up the hardware loop -// (LOOP0, LOOP1) with corresponding endloop (ENDLOOP0, ENDLOOP1) instructions -// to indicate the end of a loop. -static MachineInstr * -findLoopInstr(MachineBasicBlock *BB, int EndLoopOp, - SmallPtrSet<MachineBasicBlock *, 8> &Visited) { + +/// Find the hardware loop instruction used to set-up the specified loop. +/// On Hexagon, we have two instructions used to set-up the hardware loop +/// (LOOP0, LOOP1) with corresponding endloop (ENDLOOP0, ENDLOOP1) instructions +/// to indicate the end of a loop. +static MachineInstr *findLoopInstr(MachineBasicBlock *BB, int EndLoopOp, + SmallPtrSet<MachineBasicBlock *, 8> &Visited) { int LOOPi; int LOOPr; if (EndLoopOp == Hexagon::ENDLOOP0) { @@ -157,100 +169,108 @@ findLoopInstr(MachineBasicBlock *BB, int EndLoopOp, return 0; } -unsigned HexagonInstrInfo::InsertBranch( - MachineBasicBlock &MBB,MachineBasicBlock *TBB, MachineBasicBlock *FBB, - ArrayRef<MachineOperand> Cond, DebugLoc DL) const { - Opcode_t BOpc = Hexagon::J2_jump; - Opcode_t BccOpc = Hexagon::J2_jumpt; +/// Gather register def/uses from MI. +/// This treats possible (predicated) defs as actually happening ones +/// (conservatively). +static inline void parseOperands(const MachineInstr *MI, + SmallVector<unsigned, 4> &Defs, SmallVector<unsigned, 8> &Uses) { + Defs.clear(); + Uses.clear(); - assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); - // Check if ReverseBranchCondition has asked to reverse this branch - // If we want to reverse the branch an odd number of times, we want - // J2_jumpf. - if (!Cond.empty() && Cond[0].isImm()) - BccOpc = Cond[0].getImm(); + if (!MO.isReg()) + continue; - if (!FBB) { - if (Cond.empty()) { - // Due to a bug in TailMerging/CFG Optimization, we need to add a - // special case handling of a predicated jump followed by an - // unconditional jump. If not, Tail Merging and CFG Optimization go - // into an infinite loop. - MachineBasicBlock *NewTBB, *NewFBB; - SmallVector<MachineOperand, 4> Cond; - MachineInstr *Term = MBB.getFirstTerminator(); - if (Term != MBB.end() && isPredicated(Term) && - !AnalyzeBranch(MBB, NewTBB, NewFBB, Cond, false)) { - MachineBasicBlock *NextBB = - std::next(MachineFunction::iterator(&MBB)); - if (NewTBB == NextBB) { - ReverseBranchCondition(Cond); - RemoveBranch(MBB); - return InsertBranch(MBB, TBB, nullptr, Cond, DL); - } - } - BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); - } else if (isEndLoopN(Cond[0].getImm())) { - int EndLoopOp = Cond[0].getImm(); - assert(Cond[1].isMBB()); - // Since we're adding an ENDLOOP, there better be a LOOP instruction. - // Check for it, and change the BB target if needed. - SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs; - MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs); - assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP"); - Loop->getOperand(0).setMBB(TBB); - // Add the ENDLOOP after the finding the LOOP0. - BuildMI(&MBB, DL, get(EndLoopOp)).addMBB(TBB); - } else if (isNewValueJump(Cond[0].getImm())) { - assert((Cond.size() == 3) && "Only supporting rr/ri version of nvjump"); - // New value jump - // (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset) - // (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset) - unsigned Flags1 = getUndefRegState(Cond[1].isUndef()); - DEBUG(dbgs() << "\nInserting NVJump for BB#" << MBB.getNumber();); - if (Cond[2].isReg()) { - unsigned Flags2 = getUndefRegState(Cond[2].isUndef()); - BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1). - addReg(Cond[2].getReg(), Flags2).addMBB(TBB); - } else if(Cond[2].isImm()) { - BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1). - addImm(Cond[2].getImm()).addMBB(TBB); - } else - llvm_unreachable("Invalid condition for branching"); - } else { - assert((Cond.size() == 2) && "Malformed cond vector"); - const MachineOperand &RO = Cond[1]; - unsigned Flags = getUndefRegState(RO.isUndef()); - BuildMI(&MBB, DL, get(BccOpc)).addReg(RO.getReg(), Flags).addMBB(TBB); - } - return 1; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + + if (MO.isUse()) + Uses.push_back(MO.getReg()); + + if (MO.isDef()) + Defs.push_back(MO.getReg()); } - assert((!Cond.empty()) && - "Cond. cannot be empty when multiple branchings are required"); - assert((!isNewValueJump(Cond[0].getImm())) && - "NV-jump cannot be inserted with another branch"); - // Special case for hardware loops. The condition is a basic block. - if (isEndLoopN(Cond[0].getImm())) { - int EndLoopOp = Cond[0].getImm(); - assert(Cond[1].isMBB()); - // Since we're adding an ENDLOOP, there better be a LOOP instruction. - // Check for it, and change the BB target if needed. - SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs; - MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs); - assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP"); - Loop->getOperand(0).setMBB(TBB); - // Add the ENDLOOP after the finding the LOOP0. - BuildMI(&MBB, DL, get(EndLoopOp)).addMBB(TBB); - } else { - const MachineOperand &RO = Cond[1]; - unsigned Flags = getUndefRegState(RO.isUndef()); - BuildMI(&MBB, DL, get(BccOpc)).addReg(RO.getReg(), Flags).addMBB(TBB); +} + + +// Position dependent, so check twice for swap. +static bool isDuplexPairMatch(unsigned Ga, unsigned Gb) { + switch (Ga) { + case HexagonII::HSIG_None: + default: + return false; + case HexagonII::HSIG_L1: + return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_A); + case HexagonII::HSIG_L2: + return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 || + Gb == HexagonII::HSIG_A); + case HexagonII::HSIG_S1: + return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 || + Gb == HexagonII::HSIG_S1 || Gb == HexagonII::HSIG_A); + case HexagonII::HSIG_S2: + return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 || + Gb == HexagonII::HSIG_S1 || Gb == HexagonII::HSIG_S2 || + Gb == HexagonII::HSIG_A); + case HexagonII::HSIG_A: + return (Gb == HexagonII::HSIG_A); + case HexagonII::HSIG_Compound: + return (Gb == HexagonII::HSIG_Compound); } - BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB); + return false; +} - return 2; + + +/// isLoadFromStackSlot - If the specified machine instruction is a direct +/// load from a stack slot, return the virtual or physical register number of +/// the destination along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than loading from the stack slot. +unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + switch (MI->getOpcode()) { + default: break; + case Hexagon::L2_loadri_io: + case Hexagon::L2_loadrd_io: + case Hexagon::L2_loadrh_io: + case Hexagon::L2_loadrb_io: + case Hexagon::L2_loadrub_io: + if (MI->getOperand(2).isFI() && + MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) { + FrameIndex = MI->getOperand(2).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + } + return 0; +} + + +/// isStoreToStackSlot - If the specified machine instruction is a direct +/// store to a stack slot, return the virtual or physical register number of +/// the source reg along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than storing to the stack slot. +unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + switch (MI->getOpcode()) { + default: break; + case Hexagon::S2_storeri_io: + case Hexagon::S2_storerd_io: + case Hexagon::S2_storerh_io: + case Hexagon::S2_storerb_io: + if (MI->getOperand(2).isFI() && + MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) { + FrameIndex = MI->getOperand(0).getIndex(); + return MI->getOperand(2).getReg(); + } + break; + } + return 0; } @@ -269,9 +289,6 @@ unsigned HexagonInstrInfo::InsertBranch( /// Cond[0] = Hexagon::CMPEQri_f_Jumpnv_t_V4 -- specific opcode /// Cond[1] = R /// Cond[2] = Imm -/// @note Related function is \fn findInstrPredicate which fills in -/// Cond. vector when a predicated instruction is passed to it. -/// We follow same protocol in that case too. /// bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, @@ -314,7 +331,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, return false; --I; } - + bool JumpToBlock = I->getOpcode() == Hexagon::J2_jump && I->getOperand(0).isMBB(); // Delete the J2_jump if it's equivalent to a fall-through. @@ -327,17 +344,17 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, return false; --I; } - if (!isUnpredicatedTerminator(I)) + if (!isUnpredicatedTerminator(&*I)) return false; // Get the last instruction in the block. - MachineInstr *LastInst = I; + MachineInstr *LastInst = &*I; MachineInstr *SecondLastInst = nullptr; // Find one more terminator if present. - do { - if (&*I != LastInst && !I->isBundle() && isUnpredicatedTerminator(I)) { + for (;;) { + if (&*I != LastInst && !I->isBundle() && isUnpredicatedTerminator(&*I)) { if (!SecondLastInst) - SecondLastInst = I; + SecondLastInst = &*I; else // This is a third branch. return true; @@ -345,7 +362,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, if (I == MBB.instr_begin()) break; --I; - } while(I); + } int LastOpcode = LastInst->getOpcode(); int SecLastOpcode = SecondLastInst ? SecondLastInst->getOpcode() : 0; @@ -418,7 +435,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, // executed, so remove it. if (SecLastOpcode == Hexagon::J2_jump && LastOpcode == Hexagon::J2_jump) { TBB = SecondLastInst->getOperand(0).getMBB(); - I = LastInst; + I = LastInst->getIterator(); if (AllowModify) I->eraseFromParent(); return false; @@ -438,6 +455,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, return true; } + unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { DEBUG(dbgs() << "\nRemoving branches out of BB#" << MBB.getNumber()); MachineBasicBlock::iterator I = MBB.end(); @@ -458,100 +476,127 @@ unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { return Count; } -/// \brief For a comparison instruction, return the source registers in -/// \p SrcReg and \p SrcReg2 if having two register operands, and the value it -/// compares against in CmpValue. Return true if the comparison instruction -/// can be analyzed. -bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI, - unsigned &SrcReg, unsigned &SrcReg2, - int &Mask, int &Value) const { - unsigned Opc = MI->getOpcode(); - // Set mask and the first source register. - switch (Opc) { - case Hexagon::C2_cmpeq: - case Hexagon::C2_cmpeqp: - case Hexagon::C2_cmpgt: - case Hexagon::C2_cmpgtp: - case Hexagon::C2_cmpgtu: - case Hexagon::C2_cmpgtup: - case Hexagon::C4_cmpneq: - case Hexagon::C4_cmplte: - case Hexagon::C4_cmplteu: - case Hexagon::C2_cmpeqi: - case Hexagon::C2_cmpgti: - case Hexagon::C2_cmpgtui: - case Hexagon::C4_cmpneqi: - case Hexagon::C4_cmplteui: - case Hexagon::C4_cmpltei: - SrcReg = MI->getOperand(1).getReg(); - Mask = ~0; - break; - case Hexagon::A4_cmpbeq: - case Hexagon::A4_cmpbgt: - case Hexagon::A4_cmpbgtu: - case Hexagon::A4_cmpbeqi: - case Hexagon::A4_cmpbgti: - case Hexagon::A4_cmpbgtui: - SrcReg = MI->getOperand(1).getReg(); - Mask = 0xFF; - break; - case Hexagon::A4_cmpheq: - case Hexagon::A4_cmphgt: - case Hexagon::A4_cmphgtu: - case Hexagon::A4_cmpheqi: - case Hexagon::A4_cmphgti: - case Hexagon::A4_cmphgtui: - SrcReg = MI->getOperand(1).getReg(); - Mask = 0xFFFF; - break; - } +unsigned HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, MachineBasicBlock *FBB, + ArrayRef<MachineOperand> Cond, DebugLoc DL) const { + unsigned BOpc = Hexagon::J2_jump; + unsigned BccOpc = Hexagon::J2_jumpt; + assert(validateBranchCond(Cond) && "Invalid branching condition"); + assert(TBB && "InsertBranch must not be told to insert a fallthrough"); - // Set the value/second source register. - switch (Opc) { - case Hexagon::C2_cmpeq: - case Hexagon::C2_cmpeqp: - case Hexagon::C2_cmpgt: - case Hexagon::C2_cmpgtp: - case Hexagon::C2_cmpgtu: - case Hexagon::C2_cmpgtup: - case Hexagon::A4_cmpbeq: - case Hexagon::A4_cmpbgt: - case Hexagon::A4_cmpbgtu: - case Hexagon::A4_cmpheq: - case Hexagon::A4_cmphgt: - case Hexagon::A4_cmphgtu: - case Hexagon::C4_cmpneq: - case Hexagon::C4_cmplte: - case Hexagon::C4_cmplteu: - SrcReg2 = MI->getOperand(2).getReg(); - return true; + // Check if ReverseBranchCondition has asked to reverse this branch + // If we want to reverse the branch an odd number of times, we want + // J2_jumpf. + if (!Cond.empty() && Cond[0].isImm()) + BccOpc = Cond[0].getImm(); - case Hexagon::C2_cmpeqi: - case Hexagon::C2_cmpgtui: - case Hexagon::C2_cmpgti: - case Hexagon::C4_cmpneqi: - case Hexagon::C4_cmplteui: - case Hexagon::C4_cmpltei: - case Hexagon::A4_cmpbeqi: - case Hexagon::A4_cmpbgti: - case Hexagon::A4_cmpbgtui: - case Hexagon::A4_cmpheqi: - case Hexagon::A4_cmphgti: - case Hexagon::A4_cmphgtui: - SrcReg2 = 0; - Value = MI->getOperand(2).getImm(); - return true; + if (!FBB) { + if (Cond.empty()) { + // Due to a bug in TailMerging/CFG Optimization, we need to add a + // special case handling of a predicated jump followed by an + // unconditional jump. If not, Tail Merging and CFG Optimization go + // into an infinite loop. + MachineBasicBlock *NewTBB, *NewFBB; + SmallVector<MachineOperand, 4> Cond; + MachineInstr *Term = MBB.getFirstTerminator(); + if (Term != MBB.end() && isPredicated(Term) && + !AnalyzeBranch(MBB, NewTBB, NewFBB, Cond, false)) { + MachineBasicBlock *NextBB = &*++MBB.getIterator(); + if (NewTBB == NextBB) { + ReverseBranchCondition(Cond); + RemoveBranch(MBB); + return InsertBranch(MBB, TBB, nullptr, Cond, DL); + } + } + BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); + } else if (isEndLoopN(Cond[0].getImm())) { + int EndLoopOp = Cond[0].getImm(); + assert(Cond[1].isMBB()); + // Since we're adding an ENDLOOP, there better be a LOOP instruction. + // Check for it, and change the BB target if needed. + SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs; + MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs); + assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP"); + Loop->getOperand(0).setMBB(TBB); + // Add the ENDLOOP after the finding the LOOP0. + BuildMI(&MBB, DL, get(EndLoopOp)).addMBB(TBB); + } else if (isNewValueJump(Cond[0].getImm())) { + assert((Cond.size() == 3) && "Only supporting rr/ri version of nvjump"); + // New value jump + // (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset) + // (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset) + unsigned Flags1 = getUndefRegState(Cond[1].isUndef()); + DEBUG(dbgs() << "\nInserting NVJump for BB#" << MBB.getNumber();); + if (Cond[2].isReg()) { + unsigned Flags2 = getUndefRegState(Cond[2].isUndef()); + BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1). + addReg(Cond[2].getReg(), Flags2).addMBB(TBB); + } else if(Cond[2].isImm()) { + BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1). + addImm(Cond[2].getImm()).addMBB(TBB); + } else + llvm_unreachable("Invalid condition for branching"); + } else { + assert((Cond.size() == 2) && "Malformed cond vector"); + const MachineOperand &RO = Cond[1]; + unsigned Flags = getUndefRegState(RO.isUndef()); + BuildMI(&MBB, DL, get(BccOpc)).addReg(RO.getReg(), Flags).addMBB(TBB); + } + return 1; } + assert((!Cond.empty()) && + "Cond. cannot be empty when multiple branchings are required"); + assert((!isNewValueJump(Cond[0].getImm())) && + "NV-jump cannot be inserted with another branch"); + // Special case for hardware loops. The condition is a basic block. + if (isEndLoopN(Cond[0].getImm())) { + int EndLoopOp = Cond[0].getImm(); + assert(Cond[1].isMBB()); + // Since we're adding an ENDLOOP, there better be a LOOP instruction. + // Check for it, and change the BB target if needed. + SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs; + MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs); + assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP"); + Loop->getOperand(0).setMBB(TBB); + // Add the ENDLOOP after the finding the LOOP0. + BuildMI(&MBB, DL, get(EndLoopOp)).addMBB(TBB); + } else { + const MachineOperand &RO = Cond[1]; + unsigned Flags = getUndefRegState(RO.isUndef()); + BuildMI(&MBB, DL, get(BccOpc)).addReg(RO.getReg(), Flags).addMBB(TBB); + } + BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB); - return false; + return 2; +} + + +bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, + unsigned NumCycles, unsigned ExtraPredCycles, + BranchProbability Probability) const { + return nonDbgBBSize(&MBB) <= 3; +} + + +bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB, + unsigned NumTCycles, unsigned ExtraTCycles, MachineBasicBlock &FMBB, + unsigned NumFCycles, unsigned ExtraFCycles, BranchProbability Probability) + const { + return nonDbgBBSize(&TMBB) <= 3 && nonDbgBBSize(&FMBB) <= 3; +} + + +bool HexagonInstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB, + unsigned NumInstrs, BranchProbability Probability) const { + return NumInstrs <= 4; } void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const { + MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, + unsigned SrcReg, bool KillSrc) const { + auto &HRI = getRegisterInfo(); if (Hexagon::IntRegsRegClass.contains(SrcReg, DestReg)) { BuildMI(MBB, I, DL, get(Hexagon::A2_tfr), DestReg).addReg(SrcReg); return; @@ -599,28 +644,74 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB, addReg(SrcReg, getKillRegState(KillSrc)); return; } + if (Hexagon::PredRegsRegClass.contains(SrcReg) && + Hexagon::IntRegsRegClass.contains(DestReg)) { + BuildMI(MBB, I, DL, get(Hexagon::C2_tfrpr), DestReg). + addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + if (Hexagon::VectorRegsRegClass.contains(SrcReg, DestReg)) { + BuildMI(MBB, I, DL, get(Hexagon::V6_vassign), DestReg). + addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + if (Hexagon::VecDblRegsRegClass.contains(SrcReg, DestReg)) { + BuildMI(MBB, I, DL, get(Hexagon::V6_vcombine), DestReg). + addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_hireg), + getKillRegState(KillSrc)). + addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_loreg), + getKillRegState(KillSrc)); + return; + } + if (Hexagon::VecPredRegsRegClass.contains(SrcReg, DestReg)) { + BuildMI(MBB, I, DL, get(Hexagon::V6_pred_and), DestReg). + addReg(SrcReg). + addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + if (Hexagon::VecPredRegsRegClass.contains(SrcReg) && + Hexagon::VectorRegsRegClass.contains(DestReg)) { + llvm_unreachable("Unimplemented pred to vec"); + return; + } + if (Hexagon::VecPredRegsRegClass.contains(DestReg) && + Hexagon::VectorRegsRegClass.contains(SrcReg)) { + llvm_unreachable("Unimplemented vec to pred"); + return; + } + if (Hexagon::VecPredRegs128BRegClass.contains(SrcReg, DestReg)) { + BuildMI(MBB, I, DL, get(Hexagon::V6_pred_and), + HRI.getSubReg(DestReg, Hexagon::subreg_hireg)). + addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_hireg), + getKillRegState(KillSrc)); + BuildMI(MBB, I, DL, get(Hexagon::V6_pred_and), + HRI.getSubReg(DestReg, Hexagon::subreg_loreg)). + addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_loreg), + getKillRegState(KillSrc)); + return; + } +#ifndef NDEBUG + // Show the invalid registers to ease debugging. + dbgs() << "Invalid registers for copy in BB#" << MBB.getNumber() + << ": " << PrintReg(DestReg, &HRI) + << " = " << PrintReg(SrcReg, &HRI) << '\n'; +#endif llvm_unreachable("Unimplemented"); } -void HexagonInstrInfo:: -storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned SrcReg, bool isKill, int FI, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { - +void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, + const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { DebugLoc DL = MBB.findDebugLoc(I); MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); - MachineMemOperand *MMO = - MF.getMachineMemOperand( - MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)), - MachineMemOperand::MOStore, - MFI.getObjectSize(FI), - Align); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore, + MFI.getObjectSize(FI), Align); if (Hexagon::IntRegsRegClass.hasSubClassEq(RC)) { BuildMI(MBB, I, DL, get(Hexagon::S2_storeri_io)) @@ -640,33 +731,17 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, } -void HexagonInstrInfo::storeRegToAddr( - MachineFunction &MF, unsigned SrcReg, - bool isKill, - SmallVectorImpl<MachineOperand> &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl<MachineInstr*> &NewMIs) const -{ - llvm_unreachable("Unimplemented"); -} - - -void HexagonInstrInfo:: -loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned DestReg, int FI, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { +void HexagonInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, unsigned DestReg, int FI, + const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { DebugLoc DL = MBB.findDebugLoc(I); MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); - MachineMemOperand *MMO = - MF.getMachineMemOperand( - MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)), - MachineMemOperand::MOLoad, - MFI.getObjectSize(FI), - Align); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad, + MFI.getObjectSize(FI), Align); if (RC == &Hexagon::IntRegsRegClass) { BuildMI(MBB, I, DL, get(Hexagon::L2_loadri_io), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO); @@ -682,27 +757,136 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, } -void HexagonInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, - SmallVectorImpl<MachineOperand> &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl<MachineInstr*> &NewMIs) const { - llvm_unreachable("Unimplemented"); -} -bool -HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { - const HexagonRegisterInfo &TRI = getRegisterInfo(); +/// expandPostRAPseudo - This function is called for all pseudo instructions +/// that remain after register allocation. Many pseudo instructions are +/// created to help register allocation. This is the place to convert them +/// into real instructions. The target can edit MI in place, or it can insert +/// new instructions and erase MI. The function should return true if +/// anything was changed. +bool HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) + const { + const HexagonRegisterInfo &HRI = getRegisterInfo(); MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); MachineBasicBlock &MBB = *MI->getParent(); DebugLoc DL = MI->getDebugLoc(); unsigned Opc = MI->getOpcode(); + const unsigned VecOffset = 1; + bool Is128B = false; switch (Opc) { case Hexagon::ALIGNA: BuildMI(MBB, MI, DL, get(Hexagon::A2_andir), MI->getOperand(0).getReg()) - .addReg(TRI.getFrameRegister()) + .addReg(HRI.getFrameRegister()) .addImm(-MI->getOperand(1).getImm()); MBB.erase(MI); return true; + case Hexagon::HEXAGON_V6_vassignp_128B: + case Hexagon::HEXAGON_V6_vassignp: { + unsigned SrcReg = MI->getOperand(1).getReg(); + unsigned DstReg = MI->getOperand(0).getReg(); + if (SrcReg != DstReg) + copyPhysReg(MBB, MI, DL, DstReg, SrcReg, MI->getOperand(1).isKill()); + MBB.erase(MI); + return true; + } + case Hexagon::HEXAGON_V6_lo_128B: + case Hexagon::HEXAGON_V6_lo: { + unsigned SrcReg = MI->getOperand(1).getReg(); + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned SrcSubLo = HRI.getSubReg(SrcReg, Hexagon::subreg_loreg); + copyPhysReg(MBB, MI, DL, DstReg, SrcSubLo, MI->getOperand(1).isKill()); + MBB.erase(MI); + MRI.clearKillFlags(SrcSubLo); + return true; + } + case Hexagon::HEXAGON_V6_hi_128B: + case Hexagon::HEXAGON_V6_hi: { + unsigned SrcReg = MI->getOperand(1).getReg(); + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned SrcSubHi = HRI.getSubReg(SrcReg, Hexagon::subreg_hireg); + copyPhysReg(MBB, MI, DL, DstReg, SrcSubHi, MI->getOperand(1).isKill()); + MBB.erase(MI); + MRI.clearKillFlags(SrcSubHi); + return true; + } + case Hexagon::STrivv_indexed_128B: + Is128B = true; + case Hexagon::STrivv_indexed: { + unsigned SrcReg = MI->getOperand(2).getReg(); + unsigned SrcSubHi = HRI.getSubReg(SrcReg, Hexagon::subreg_hireg); + unsigned SrcSubLo = HRI.getSubReg(SrcReg, Hexagon::subreg_loreg); + unsigned NewOpcd = Is128B ? Hexagon::V6_vS32b_ai_128B + : Hexagon::V6_vS32b_ai; + unsigned Offset = Is128B ? VecOffset << 7 : VecOffset << 6; + MachineInstr *MI1New = BuildMI(MBB, MI, DL, get(NewOpcd)) + .addOperand(MI->getOperand(0)) + .addImm(MI->getOperand(1).getImm()) + .addReg(SrcSubLo) + .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + MI1New->getOperand(0).setIsKill(false); + BuildMI(MBB, MI, DL, get(NewOpcd)) + .addOperand(MI->getOperand(0)) + // The Vectors are indexed in multiples of vector size. + .addImm(MI->getOperand(1).getImm()+Offset) + .addReg(SrcSubHi) + .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + MBB.erase(MI); + return true; + } + case Hexagon::LDrivv_pseudo_V6_128B: + case Hexagon::LDrivv_indexed_128B: + Is128B = true; + case Hexagon::LDrivv_pseudo_V6: + case Hexagon::LDrivv_indexed: { + unsigned NewOpcd = Is128B ? Hexagon::V6_vL32b_ai_128B + : Hexagon::V6_vL32b_ai; + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned Offset = Is128B ? VecOffset << 7 : VecOffset << 6; + MachineInstr *MI1New = + BuildMI(MBB, MI, DL, get(NewOpcd), + HRI.getSubReg(DstReg, Hexagon::subreg_loreg)) + .addOperand(MI->getOperand(1)) + .addImm(MI->getOperand(2).getImm()); + MI1New->getOperand(1).setIsKill(false); + BuildMI(MBB, MI, DL, get(NewOpcd), + HRI.getSubReg(DstReg, Hexagon::subreg_hireg)) + .addOperand(MI->getOperand(1)) + // The Vectors are indexed in multiples of vector size. + .addImm(MI->getOperand(2).getImm() + Offset) + .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + MBB.erase(MI); + return true; + } + case Hexagon::LDriv_pseudo_V6_128B: + Is128B = true; + case Hexagon::LDriv_pseudo_V6: { + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned NewOpc = Is128B ? Hexagon::V6_vL32b_ai_128B + : Hexagon::V6_vL32b_ai; + int32_t Off = MI->getOperand(2).getImm(); + int32_t Idx = Off; + BuildMI(MBB, MI, DL, get(NewOpc), DstReg) + .addOperand(MI->getOperand(1)) + .addImm(Idx) + .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + MBB.erase(MI); + return true; + } + case Hexagon::STriv_pseudo_V6_128B: + Is128B = true; + case Hexagon::STriv_pseudo_V6: { + unsigned NewOpc = Is128B ? Hexagon::V6_vS32b_ai_128B + : Hexagon::V6_vS32b_ai; + int32_t Off = MI->getOperand(1).getImm(); + int32_t Idx = Is128B ? (Off >> 7) : (Off >> 6); + BuildMI(MBB, MI, DL, get(NewOpc)) + .addOperand(MI->getOperand(0)) + .addImm(Idx) + .addOperand(MI->getOperand(2)) + .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + MBB.erase(MI); + return true; + } case Hexagon::TFR_PdTrue: { unsigned Reg = MI->getOperand(0).getReg(); BuildMI(MBB, MI, DL, get(Hexagon::C2_orn), Reg) @@ -724,15 +908,15 @@ HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { unsigned DstReg = MI->getOperand(0).getReg(); unsigned Src1Reg = MI->getOperand(1).getReg(); unsigned Src2Reg = MI->getOperand(2).getReg(); - unsigned Src1SubHi = TRI.getSubReg(Src1Reg, Hexagon::subreg_hireg); - unsigned Src1SubLo = TRI.getSubReg(Src1Reg, Hexagon::subreg_loreg); - unsigned Src2SubHi = TRI.getSubReg(Src2Reg, Hexagon::subreg_hireg); - unsigned Src2SubLo = TRI.getSubReg(Src2Reg, Hexagon::subreg_loreg); + unsigned Src1SubHi = HRI.getSubReg(Src1Reg, Hexagon::subreg_hireg); + unsigned Src1SubLo = HRI.getSubReg(Src1Reg, Hexagon::subreg_loreg); + unsigned Src2SubHi = HRI.getSubReg(Src2Reg, Hexagon::subreg_hireg); + unsigned Src2SubLo = HRI.getSubReg(Src2Reg, Hexagon::subreg_loreg); BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_mpyi), - TRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi) + HRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi) .addReg(Src2SubHi); BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_mpyi), - TRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo) + HRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo) .addReg(Src2SubLo); MBB.erase(MI); MRI.clearKillFlags(Src1SubHi); @@ -747,17 +931,17 @@ HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { unsigned Src1Reg = MI->getOperand(1).getReg(); unsigned Src2Reg = MI->getOperand(2).getReg(); unsigned Src3Reg = MI->getOperand(3).getReg(); - unsigned Src1SubHi = TRI.getSubReg(Src1Reg, Hexagon::subreg_hireg); - unsigned Src1SubLo = TRI.getSubReg(Src1Reg, Hexagon::subreg_loreg); - unsigned Src2SubHi = TRI.getSubReg(Src2Reg, Hexagon::subreg_hireg); - unsigned Src2SubLo = TRI.getSubReg(Src2Reg, Hexagon::subreg_loreg); - unsigned Src3SubHi = TRI.getSubReg(Src3Reg, Hexagon::subreg_hireg); - unsigned Src3SubLo = TRI.getSubReg(Src3Reg, Hexagon::subreg_loreg); + unsigned Src1SubHi = HRI.getSubReg(Src1Reg, Hexagon::subreg_hireg); + unsigned Src1SubLo = HRI.getSubReg(Src1Reg, Hexagon::subreg_loreg); + unsigned Src2SubHi = HRI.getSubReg(Src2Reg, Hexagon::subreg_hireg); + unsigned Src2SubLo = HRI.getSubReg(Src2Reg, Hexagon::subreg_loreg); + unsigned Src3SubHi = HRI.getSubReg(Src3Reg, Hexagon::subreg_hireg); + unsigned Src3SubLo = HRI.getSubReg(Src3Reg, Hexagon::subreg_loreg); BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_maci), - TRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi) + HRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi) .addReg(Src2SubHi).addReg(Src3SubHi); BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_maci), - TRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo) + HRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo) .addReg(Src2SubLo).addReg(Src3SubLo); MBB.erase(MI); MRI.clearKillFlags(Src1SubHi); @@ -768,104 +952,168 @@ HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { MRI.clearKillFlags(Src3SubLo); return true; } + case Hexagon::MUX64_rr: { + const MachineOperand &Op0 = MI->getOperand(0); + const MachineOperand &Op1 = MI->getOperand(1); + const MachineOperand &Op2 = MI->getOperand(2); + const MachineOperand &Op3 = MI->getOperand(3); + unsigned Rd = Op0.getReg(); + unsigned Pu = Op1.getReg(); + unsigned Rs = Op2.getReg(); + unsigned Rt = Op3.getReg(); + DebugLoc DL = MI->getDebugLoc(); + unsigned K1 = getKillRegState(Op1.isKill()); + unsigned K2 = getKillRegState(Op2.isKill()); + unsigned K3 = getKillRegState(Op3.isKill()); + if (Rd != Rs) + BuildMI(MBB, MI, DL, get(Hexagon::A2_tfrpt), Rd) + .addReg(Pu, (Rd == Rt) ? K1 : 0) + .addReg(Rs, K2); + if (Rd != Rt) + BuildMI(MBB, MI, DL, get(Hexagon::A2_tfrpf), Rd) + .addReg(Pu, K1) + .addReg(Rt, K3); + MBB.erase(MI); + return true; + } case Hexagon::TCRETURNi: MI->setDesc(get(Hexagon::J2_jump)); return true; case Hexagon::TCRETURNr: MI->setDesc(get(Hexagon::J2_jumpr)); return true; + case Hexagon::TFRI_f: + case Hexagon::TFRI_cPt_f: + case Hexagon::TFRI_cNotPt_f: { + unsigned Opx = (Opc == Hexagon::TFRI_f) ? 1 : 2; + APFloat FVal = MI->getOperand(Opx).getFPImm()->getValueAPF(); + APInt IVal = FVal.bitcastToAPInt(); + MI->RemoveOperand(Opx); + unsigned NewOpc = (Opc == Hexagon::TFRI_f) ? Hexagon::A2_tfrsi : + (Opc == Hexagon::TFRI_cPt_f) ? Hexagon::C2_cmoveit : + Hexagon::C2_cmoveif; + MI->setDesc(get(NewOpc)); + MI->addOperand(MachineOperand::CreateImm(IVal.getZExtValue())); + return true; + } } return false; } -MachineInstr *HexagonInstrInfo::foldMemoryOperandImpl( - MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, - MachineBasicBlock::iterator InsertPt, int FI) const { - // Hexagon_TODO: Implement. - return nullptr; + +// We indicate that we want to reverse the branch by +// inserting the reversed branching opcode. +bool HexagonInstrInfo::ReverseBranchCondition( + SmallVectorImpl<MachineOperand> &Cond) const { + if (Cond.empty()) + return true; + assert(Cond[0].isImm() && "First entry in the cond vector not imm-val"); + unsigned opcode = Cond[0].getImm(); + //unsigned temp; + assert(get(opcode).isBranch() && "Should be a branching condition."); + if (isEndLoopN(opcode)) + return true; + unsigned NewOpcode = getInvertedPredicatedOpcode(opcode); + Cond[0].setImm(NewOpcode); + return false; } -unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const { - MachineRegisterInfo &RegInfo = MF->getRegInfo(); - const TargetRegisterClass *TRC; - if (VT == MVT::i1) { - TRC = &Hexagon::PredRegsRegClass; - } else if (VT == MVT::i32 || VT == MVT::f32) { - TRC = &Hexagon::IntRegsRegClass; - } else if (VT == MVT::i64 || VT == MVT::f64) { - TRC = &Hexagon::DoubleRegsRegClass; - } else { - llvm_unreachable("Cannot handle this register class"); - } +void HexagonInstrInfo::insertNoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const { + DebugLoc DL; + BuildMI(MBB, MI, DL, get(Hexagon::A2_nop)); +} - unsigned NewReg = RegInfo.createVirtualRegister(TRC); - return NewReg; + +// Returns true if an instruction is predicated irrespective of the predicate +// sense. For example, all of the following will return true. +// if (p0) R1 = add(R2, R3) +// if (!p0) R1 = add(R2, R3) +// if (p0.new) R1 = add(R2, R3) +// if (!p0.new) R1 = add(R2, R3) +// Note: New-value stores are not included here as in the current +// implementation, we don't need to check their predicate sense. +bool HexagonInstrInfo::isPredicated(const MachineInstr *MI) const { + const uint64_t F = MI->getDesc().TSFlags; + return (F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask; } -bool HexagonInstrInfo::isExtendable(const MachineInstr *MI) const { - const MCInstrDesc &MID = MI->getDesc(); - const uint64_t F = MID.TSFlags; - if ((F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask) - return true; - // TODO: This is largely obsolete now. Will need to be removed - // in consecutive patches. - switch(MI->getOpcode()) { - // TFR_FI Remains a special case. - case Hexagon::TFR_FI: - return true; - default: - return false; +bool HexagonInstrInfo::PredicateInstruction(MachineInstr *MI, + ArrayRef<MachineOperand> Cond) const { + if (Cond.empty() || isNewValueJump(Cond[0].getImm()) || + isEndLoopN(Cond[0].getImm())) { + DEBUG(dbgs() << "\nCannot predicate:"; MI->dump();); + return false; } - return false; -} + int Opc = MI->getOpcode(); + assert (isPredicable(MI) && "Expected predicable instruction"); + bool invertJump = predOpcodeHasNot(Cond); -// This returns true in two cases: -// - The OP code itself indicates that this is an extended instruction. -// - One of MOs has been marked with HMOTF_ConstExtended flag. -bool HexagonInstrInfo::isExtended(const MachineInstr *MI) const { - // First check if this is permanently extended op code. - const uint64_t F = MI->getDesc().TSFlags; - if ((F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask) - return true; - // Use MO operand flags to determine if one of MI's operands - // has HMOTF_ConstExtended flag set. - for (MachineInstr::const_mop_iterator I = MI->operands_begin(), - E = MI->operands_end(); I != E; ++I) { - if (I->getTargetFlags() && HexagonII::HMOTF_ConstExtended) - return true; + // We have to predicate MI "in place", i.e. after this function returns, + // MI will need to be transformed into a predicated form. To avoid com- + // plicated manipulations with the operands (handling tied operands, + // etc.), build a new temporary instruction, then overwrite MI with it. + + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + unsigned PredOpc = getCondOpcode(Opc, invertJump); + MachineInstrBuilder T = BuildMI(B, MI, DL, get(PredOpc)); + unsigned NOp = 0, NumOps = MI->getNumOperands(); + while (NOp < NumOps) { + MachineOperand &Op = MI->getOperand(NOp); + if (!Op.isReg() || !Op.isDef() || Op.isImplicit()) + break; + T.addOperand(Op); + NOp++; } - return false; -} -bool HexagonInstrInfo::isBranch (const MachineInstr *MI) const { - return MI->getDesc().isBranch(); -} + unsigned PredReg, PredRegPos, PredRegFlags; + bool GotPredReg = getPredReg(Cond, PredReg, PredRegPos, PredRegFlags); + (void)GotPredReg; + assert(GotPredReg); + T.addReg(PredReg, PredRegFlags); + while (NOp < NumOps) + T.addOperand(MI->getOperand(NOp++)); -bool HexagonInstrInfo::isNewValueInst(const MachineInstr *MI) const { - if (isNewValueJump(MI)) - return true; + MI->setDesc(get(PredOpc)); + while (unsigned n = MI->getNumOperands()) + MI->RemoveOperand(n-1); + for (unsigned i = 0, n = T->getNumOperands(); i < n; ++i) + MI->addOperand(T->getOperand(i)); - if (isNewValueStore(MI)) - return true; + MachineBasicBlock::instr_iterator TI = T->getIterator(); + B.erase(TI); - return false; + MachineRegisterInfo &MRI = B.getParent()->getRegInfo(); + MRI.clearKillFlags(PredReg); + return true; } -bool HexagonInstrInfo::isNewValue(const MachineInstr* MI) const { - const uint64_t F = MI->getDesc().TSFlags; - return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask); -} -bool HexagonInstrInfo::isNewValue(Opcode_t Opcode) const { - const uint64_t F = get(Opcode).TSFlags; - return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask); +bool HexagonInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1, + ArrayRef<MachineOperand> Pred2) const { + // TODO: Fix this + return false; } -bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr *MI) const { - return MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4; + +bool HexagonInstrInfo::DefinesPredicate(MachineInstr *MI, + std::vector<MachineOperand> &Pred) const { + auto &HRI = getRegisterInfo(); + for (unsigned oper = 0; oper < MI->getNumOperands(); ++oper) { + MachineOperand MO = MI->getOperand(oper); + if (MO.isReg() && MO.isDef()) { + const TargetRegisterClass* RC = HRI.getMinimalPhysRegClass(MO.getReg()); + if (RC == &Hexagon::PredRegsRegClass) { + Pred.push_back(MO); + return true; + } + } + } + return false; } bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const { @@ -875,10 +1123,21 @@ bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const { return false; const int Opc = MI->getOpcode(); + int NumOperands = MI->getNumOperands(); + + // Keep a flag for upto 4 operands in the instructions, to indicate if + // that operand has been constant extended. + bool OpCExtended[4]; + if (NumOperands > 4) + NumOperands = 4; + + for (int i = 0; i < NumOperands; i++) + OpCExtended[i] = (isOperandExtended(MI, i) && isConstExtended(MI)); switch(Opc) { case Hexagon::A2_tfrsi: - return (isOperandExtended(MI, 1) && isConstExtended(MI)) || isInt<12>(MI->getOperand(1).getImm()); + return (isOperandExtended(MI, 1) && isConstExtended(MI)) || + isInt<12>(MI->getOperand(1).getImm()); case Hexagon::S2_storerd_io: return isShiftedUInt<6,3>(MI->getOperand(1).getImm()); @@ -926,8 +1185,8 @@ bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const { case Hexagon::S4_storeirb_io: case Hexagon::S4_storeirh_io: case Hexagon::S4_storeiri_io: - return (isUInt<6>(MI->getOperand(1).getImm()) && - isInt<6>(MI->getOperand(2).getImm())); + return (OpCExtended[1] || isUInt<6>(MI->getOperand(1).getImm())) && + (OpCExtended[2] || isInt<6>(MI->getOperand(2).getImm())); case Hexagon::A2_addi: return isInt<8>(MI->getOperand(2).getImm()); @@ -944,269 +1203,1117 @@ bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const { return true; } -// This function performs the following inversiones: -// -// cPt ---> cNotPt -// cNotPt ---> cPt -// -unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const { - int InvPredOpcode; - InvPredOpcode = isPredicatedTrue(Opc) ? Hexagon::getFalsePredOpcode(Opc) - : Hexagon::getTruePredOpcode(Opc); - if (InvPredOpcode >= 0) // Valid instruction with the inverted predicate. - return InvPredOpcode; - switch(Opc) { - default: llvm_unreachable("Unexpected predicated instruction"); - case Hexagon::C2_ccombinewt: - return Hexagon::C2_ccombinewf; +bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr *MI, + const MachineBasicBlock *MBB, const MachineFunction &MF) const { + // Debug info is never a scheduling boundary. It's necessary to be explicit + // due to the special treatment of IT instructions below, otherwise a + // dbg_value followed by an IT will result in the IT instruction being + // considered a scheduling hazard, which is wrong. It should be the actual + // instruction preceding the dbg_value instruction(s), just like it is + // when debug info is not present. + if (MI->isDebugValue()) + return false; + + // Throwing call is a boundary. + if (MI->isCall()) { + // If any of the block's successors is a landing pad, this could be a + // throwing call. + for (auto I : MBB->successors()) + if (I->isEHPad()) + return true; + } + + // Don't mess around with no return calls. + if (MI->getOpcode() == Hexagon::CALLv3nr) + return true; + + // Terminators and labels can't be scheduled around. + if (MI->getDesc().isTerminator() || MI->isPosition()) + return true; + + if (MI->isInlineAsm() && !ScheduleInlineAsm) + return true; + + return false; +} + + +/// Measure the specified inline asm to determine an approximation of its +/// length. +/// Comments (which run till the next SeparatorString or newline) do not +/// count as an instruction. +/// Any other non-whitespace text is considered an instruction, with +/// multiple instructions separated by SeparatorString or newlines. +/// Variable-length instructions are not handled here; this function +/// may be overloaded in the target code to do that. +/// Hexagon counts the number of ##'s and adjust for that many +/// constant exenders. +unsigned HexagonInstrInfo::getInlineAsmLength(const char *Str, + const MCAsmInfo &MAI) const { + StringRef AStr(Str); + // Count the number of instructions in the asm. + bool atInsnStart = true; + unsigned Length = 0; + for (; *Str; ++Str) { + if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(), + strlen(MAI.getSeparatorString())) == 0) + atInsnStart = true; + if (atInsnStart && !std::isspace(static_cast<unsigned char>(*Str))) { + Length += MAI.getMaxInstLength(); + atInsnStart = false; + } + if (atInsnStart && strncmp(Str, MAI.getCommentString(), + strlen(MAI.getCommentString())) == 0) + atInsnStart = false; + } + + // Add to size number of constant extenders seen * 4. + StringRef Occ("##"); + Length += AStr.count(Occ)*4; + return Length; +} + + +ScheduleHazardRecognizer* +HexagonInstrInfo::CreateTargetPostRAHazardRecognizer( + const InstrItineraryData *II, const ScheduleDAG *DAG) const { + return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG); +} + + +/// \brief For a comparison instruction, return the source registers in +/// \p SrcReg and \p SrcReg2 if having two register operands, and the value it +/// compares against in CmpValue. Return true if the comparison instruction +/// can be analyzed. +bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI, + unsigned &SrcReg, unsigned &SrcReg2, int &Mask, int &Value) const { + unsigned Opc = MI->getOpcode(); + + // Set mask and the first source register. + switch (Opc) { + case Hexagon::C2_cmpeq: + case Hexagon::C2_cmpeqp: + case Hexagon::C2_cmpgt: + case Hexagon::C2_cmpgtp: + case Hexagon::C2_cmpgtu: + case Hexagon::C2_cmpgtup: + case Hexagon::C4_cmpneq: + case Hexagon::C4_cmplte: + case Hexagon::C4_cmplteu: + case Hexagon::C2_cmpeqi: + case Hexagon::C2_cmpgti: + case Hexagon::C2_cmpgtui: + case Hexagon::C4_cmpneqi: + case Hexagon::C4_cmplteui: + case Hexagon::C4_cmpltei: + SrcReg = MI->getOperand(1).getReg(); + Mask = ~0; + break; + case Hexagon::A4_cmpbeq: + case Hexagon::A4_cmpbgt: + case Hexagon::A4_cmpbgtu: + case Hexagon::A4_cmpbeqi: + case Hexagon::A4_cmpbgti: + case Hexagon::A4_cmpbgtui: + SrcReg = MI->getOperand(1).getReg(); + Mask = 0xFF; + break; + case Hexagon::A4_cmpheq: + case Hexagon::A4_cmphgt: + case Hexagon::A4_cmphgtu: + case Hexagon::A4_cmpheqi: + case Hexagon::A4_cmphgti: + case Hexagon::A4_cmphgtui: + SrcReg = MI->getOperand(1).getReg(); + Mask = 0xFFFF; + break; + } + + // Set the value/second source register. + switch (Opc) { + case Hexagon::C2_cmpeq: + case Hexagon::C2_cmpeqp: + case Hexagon::C2_cmpgt: + case Hexagon::C2_cmpgtp: + case Hexagon::C2_cmpgtu: + case Hexagon::C2_cmpgtup: + case Hexagon::A4_cmpbeq: + case Hexagon::A4_cmpbgt: + case Hexagon::A4_cmpbgtu: + case Hexagon::A4_cmpheq: + case Hexagon::A4_cmphgt: + case Hexagon::A4_cmphgtu: + case Hexagon::C4_cmpneq: + case Hexagon::C4_cmplte: + case Hexagon::C4_cmplteu: + SrcReg2 = MI->getOperand(2).getReg(); + return true; + + case Hexagon::C2_cmpeqi: + case Hexagon::C2_cmpgtui: + case Hexagon::C2_cmpgti: + case Hexagon::C4_cmpneqi: + case Hexagon::C4_cmplteui: + case Hexagon::C4_cmpltei: + case Hexagon::A4_cmpbeqi: + case Hexagon::A4_cmpbgti: + case Hexagon::A4_cmpbgtui: + case Hexagon::A4_cmpheqi: + case Hexagon::A4_cmphgti: + case Hexagon::A4_cmphgtui: + SrcReg2 = 0; + Value = MI->getOperand(2).getImm(); + return true; + } + + return false; +} + + +unsigned HexagonInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, + const MachineInstr *MI, unsigned *PredCost) const { + return getInstrTimingClassLatency(ItinData, MI); +} + + +DFAPacketizer *HexagonInstrInfo::CreateTargetScheduleState( + const TargetSubtargetInfo &STI) const { + const InstrItineraryData *II = STI.getInstrItineraryData(); + return static_cast<const HexagonSubtarget&>(STI).createDFAPacketizer(II); +} + + +// Inspired by this pair: +// %R13<def> = L2_loadri_io %R29, 136; mem:LD4[FixedStack0] +// S2_storeri_io %R29, 132, %R1<kill>; flags: mem:ST4[FixedStack1] +// Currently AA considers the addresses in these instructions to be aliasing. +bool HexagonInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa, + MachineInstr *MIb, AliasAnalysis *AA) const { + int OffsetA = 0, OffsetB = 0; + unsigned SizeA = 0, SizeB = 0; + + if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects() || + MIa->hasOrderedMemoryRef() || MIa->hasOrderedMemoryRef()) + return false; + + // Instructions that are pure loads, not loads and stores like memops are not + // dependent. + if (MIa->mayLoad() && !isMemOp(MIa) && MIb->mayLoad() && !isMemOp(MIb)) + return true; + + // Get base, offset, and access size in MIa. + unsigned BaseRegA = getBaseAndOffset(MIa, OffsetA, SizeA); + if (!BaseRegA || !SizeA) + return false; + + // Get base, offset, and access size in MIb. + unsigned BaseRegB = getBaseAndOffset(MIb, OffsetB, SizeB); + if (!BaseRegB || !SizeB) + return false; + + if (BaseRegA != BaseRegB) + return false; + + // This is a mem access with the same base register and known offsets from it. + // Reason about it. + if (OffsetA > OffsetB) { + uint64_t offDiff = (uint64_t)((int64_t)OffsetA - (int64_t)OffsetB); + return (SizeB <= offDiff); + } else if (OffsetA < OffsetB) { + uint64_t offDiff = (uint64_t)((int64_t)OffsetB - (int64_t)OffsetA); + return (SizeA <= offDiff); + } + + return false; +} + + +unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const { + MachineRegisterInfo &MRI = MF->getRegInfo(); + const TargetRegisterClass *TRC; + if (VT == MVT::i1) { + TRC = &Hexagon::PredRegsRegClass; + } else if (VT == MVT::i32 || VT == MVT::f32) { + TRC = &Hexagon::IntRegsRegClass; + } else if (VT == MVT::i64 || VT == MVT::f64) { + TRC = &Hexagon::DoubleRegsRegClass; + } else { + llvm_unreachable("Cannot handle this register class"); + } + + unsigned NewReg = MRI.createVirtualRegister(TRC); + return NewReg; +} + + +bool HexagonInstrInfo::isAbsoluteSet(const MachineInstr* MI) const { + return (getAddrMode(MI) == HexagonII::AbsoluteSet); +} + + +bool HexagonInstrInfo::isAccumulator(const MachineInstr *MI) const { + const uint64_t F = MI->getDesc().TSFlags; + return((F >> HexagonII::AccumulatorPos) & HexagonII::AccumulatorMask); +} + + +bool HexagonInstrInfo::isComplex(const MachineInstr *MI) const { + const MachineFunction *MF = MI->getParent()->getParent(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; + + if (!(isTC1(MI)) + && !(QII->isTC2Early(MI)) + && !(MI->getDesc().mayLoad()) + && !(MI->getDesc().mayStore()) + && (MI->getDesc().getOpcode() != Hexagon::S2_allocframe) + && (MI->getDesc().getOpcode() != Hexagon::L2_deallocframe) + && !(QII->isMemOp(MI)) + && !(MI->isBranch()) + && !(MI->isReturn()) + && !MI->isCall()) + return true; + + return false; +} + + +// Return true if the instruction is a compund branch instruction. +bool HexagonInstrInfo::isCompoundBranchInstr(const MachineInstr *MI) const { + return (getType(MI) == HexagonII::TypeCOMPOUND && MI->isBranch()); +} + + +bool HexagonInstrInfo::isCondInst(const MachineInstr *MI) const { + return (MI->isBranch() && isPredicated(MI)) || + isConditionalTransfer(MI) || + isConditionalALU32(MI) || + isConditionalLoad(MI) || + // Predicated stores which don't have a .new on any operands. + (MI->mayStore() && isPredicated(MI) && !isNewValueStore(MI) && + !isPredicatedNew(MI)); +} + + +bool HexagonInstrInfo::isConditionalALU32(const MachineInstr* MI) const { + switch (MI->getOpcode()) { + case Hexagon::A2_paddf: + case Hexagon::A2_paddfnew: + case Hexagon::A2_paddif: + case Hexagon::A2_paddifnew: + case Hexagon::A2_paddit: + case Hexagon::A2_padditnew: + case Hexagon::A2_paddt: + case Hexagon::A2_paddtnew: + case Hexagon::A2_pandf: + case Hexagon::A2_pandfnew: + case Hexagon::A2_pandt: + case Hexagon::A2_pandtnew: + case Hexagon::A2_porf: + case Hexagon::A2_porfnew: + case Hexagon::A2_port: + case Hexagon::A2_portnew: + case Hexagon::A2_psubf: + case Hexagon::A2_psubfnew: + case Hexagon::A2_psubt: + case Hexagon::A2_psubtnew: + case Hexagon::A2_pxorf: + case Hexagon::A2_pxorfnew: + case Hexagon::A2_pxort: + case Hexagon::A2_pxortnew: + case Hexagon::A4_paslhf: + case Hexagon::A4_paslhfnew: + case Hexagon::A4_paslht: + case Hexagon::A4_paslhtnew: + case Hexagon::A4_pasrhf: + case Hexagon::A4_pasrhfnew: + case Hexagon::A4_pasrht: + case Hexagon::A4_pasrhtnew: + case Hexagon::A4_psxtbf: + case Hexagon::A4_psxtbfnew: + case Hexagon::A4_psxtbt: + case Hexagon::A4_psxtbtnew: + case Hexagon::A4_psxthf: + case Hexagon::A4_psxthfnew: + case Hexagon::A4_psxtht: + case Hexagon::A4_psxthtnew: + case Hexagon::A4_pzxtbf: + case Hexagon::A4_pzxtbfnew: + case Hexagon::A4_pzxtbt: + case Hexagon::A4_pzxtbtnew: + case Hexagon::A4_pzxthf: + case Hexagon::A4_pzxthfnew: + case Hexagon::A4_pzxtht: + case Hexagon::A4_pzxthtnew: case Hexagon::C2_ccombinewf: - return Hexagon::C2_ccombinewt; + case Hexagon::C2_ccombinewt: + return true; + } + return false; +} + + +// FIXME - Function name and it's functionality don't match. +// It should be renamed to hasPredNewOpcode() +bool HexagonInstrInfo::isConditionalLoad(const MachineInstr* MI) const { + if (!MI->getDesc().mayLoad() || !isPredicated(MI)) + return false; + + int PNewOpcode = Hexagon::getPredNewOpcode(MI->getOpcode()); + // Instruction with valid predicated-new opcode can be promoted to .new. + return PNewOpcode >= 0; +} + - // Dealloc_return. - case Hexagon::L4_return_t: - return Hexagon::L4_return_f; - case Hexagon::L4_return_f: - return Hexagon::L4_return_t; +// Returns true if an instruction is a conditional store. +// +// Note: It doesn't include conditional new-value stores as they can't be +// converted to .new predicate. +bool HexagonInstrInfo::isConditionalStore(const MachineInstr* MI) const { + switch (MI->getOpcode()) { + default: return false; + case Hexagon::S4_storeirbt_io: + case Hexagon::S4_storeirbf_io: + case Hexagon::S4_pstorerbt_rr: + case Hexagon::S4_pstorerbf_rr: + case Hexagon::S2_pstorerbt_io: + case Hexagon::S2_pstorerbf_io: + case Hexagon::S2_pstorerbt_pi: + case Hexagon::S2_pstorerbf_pi: + case Hexagon::S2_pstorerdt_io: + case Hexagon::S2_pstorerdf_io: + case Hexagon::S4_pstorerdt_rr: + case Hexagon::S4_pstorerdf_rr: + case Hexagon::S2_pstorerdt_pi: + case Hexagon::S2_pstorerdf_pi: + case Hexagon::S2_pstorerht_io: + case Hexagon::S2_pstorerhf_io: + case Hexagon::S4_storeirht_io: + case Hexagon::S4_storeirhf_io: + case Hexagon::S4_pstorerht_rr: + case Hexagon::S4_pstorerhf_rr: + case Hexagon::S2_pstorerht_pi: + case Hexagon::S2_pstorerhf_pi: + case Hexagon::S2_pstorerit_io: + case Hexagon::S2_pstorerif_io: + case Hexagon::S4_storeirit_io: + case Hexagon::S4_storeirif_io: + case Hexagon::S4_pstorerit_rr: + case Hexagon::S4_pstorerif_rr: + case Hexagon::S2_pstorerit_pi: + case Hexagon::S2_pstorerif_pi: + + // V4 global address store before promoting to dot new. + case Hexagon::S4_pstorerdt_abs: + case Hexagon::S4_pstorerdf_abs: + case Hexagon::S4_pstorerbt_abs: + case Hexagon::S4_pstorerbf_abs: + case Hexagon::S4_pstorerht_abs: + case Hexagon::S4_pstorerhf_abs: + case Hexagon::S4_pstorerit_abs: + case Hexagon::S4_pstorerif_abs: + return true; + + // Predicated new value stores (i.e. if (p0) memw(..)=r0.new) are excluded + // from the "Conditional Store" list. Because a predicated new value store + // would NOT be promoted to a double dot new store. + // This function returns yes for those stores that are predicated but not + // yet promoted to predicate dot new instructions. } } -// New Value Store instructions. -bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const { + +bool HexagonInstrInfo::isConditionalTransfer(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + case Hexagon::A2_tfrt: + case Hexagon::A2_tfrf: + case Hexagon::C2_cmoveit: + case Hexagon::C2_cmoveif: + case Hexagon::A2_tfrtnew: + case Hexagon::A2_tfrfnew: + case Hexagon::C2_cmovenewit: + case Hexagon::C2_cmovenewif: + case Hexagon::A2_tfrpt: + case Hexagon::A2_tfrpf: + return true; + + default: + return false; + } + return false; +} + + +// TODO: In order to have isExtendable for fpimm/f32Ext, we need to handle +// isFPImm and later getFPImm as well. +bool HexagonInstrInfo::isConstExtended(const MachineInstr *MI) const { const uint64_t F = MI->getDesc().TSFlags; + unsigned isExtended = (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask; + if (isExtended) // Instruction must be extended. + return true; + + unsigned isExtendable = + (F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask; + if (!isExtendable) + return false; + + if (MI->isCall()) + return false; + + short ExtOpNum = getCExtOpNum(MI); + const MachineOperand &MO = MI->getOperand(ExtOpNum); + // Use MO operand flags to determine if MO + // has the HMOTF_ConstExtended flag set. + if (MO.getTargetFlags() && HexagonII::HMOTF_ConstExtended) + return true; + // If this is a Machine BB address we are talking about, and it is + // not marked as extended, say so. + if (MO.isMBB()) + return false; + + // We could be using an instruction with an extendable immediate and shoehorn + // a global address into it. If it is a global address it will be constant + // extended. We do this for COMBINE. + // We currently only handle isGlobal() because it is the only kind of + // object we are going to end up with here for now. + // In the future we probably should add isSymbol(), etc. + if (MO.isGlobal() || MO.isSymbol() || MO.isBlockAddress() || + MO.isJTI() || MO.isCPI()) + return true; - return ((F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask); + // If the extendable operand is not 'Immediate' type, the instruction should + // have 'isExtended' flag set. + assert(MO.isImm() && "Extendable operand must be Immediate type"); + + int MinValue = getMinValue(MI); + int MaxValue = getMaxValue(MI); + int ImmValue = MO.getImm(); + + return (ImmValue < MinValue || ImmValue > MaxValue); } -bool HexagonInstrInfo::isNewValueStore(unsigned Opcode) const { + +bool HexagonInstrInfo::isDeallocRet(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + case Hexagon::L4_return : + case Hexagon::L4_return_t : + case Hexagon::L4_return_f : + case Hexagon::L4_return_tnew_pnt : + case Hexagon::L4_return_fnew_pnt : + case Hexagon::L4_return_tnew_pt : + case Hexagon::L4_return_fnew_pt : + return true; + } + return false; +} + + +// Return true when ConsMI uses a register defined by ProdMI. +bool HexagonInstrInfo::isDependent(const MachineInstr *ProdMI, + const MachineInstr *ConsMI) const { + const MCInstrDesc &ProdMCID = ProdMI->getDesc(); + if (!ProdMCID.getNumDefs()) + return false; + + auto &HRI = getRegisterInfo(); + + SmallVector<unsigned, 4> DefsA; + SmallVector<unsigned, 4> DefsB; + SmallVector<unsigned, 8> UsesA; + SmallVector<unsigned, 8> UsesB; + + parseOperands(ProdMI, DefsA, UsesA); + parseOperands(ConsMI, DefsB, UsesB); + + for (auto &RegA : DefsA) + for (auto &RegB : UsesB) { + // True data dependency. + if (RegA == RegB) + return true; + + if (Hexagon::DoubleRegsRegClass.contains(RegA)) + for (MCSubRegIterator SubRegs(RegA, &HRI); SubRegs.isValid(); ++SubRegs) + if (RegB == *SubRegs) + return true; + + if (Hexagon::DoubleRegsRegClass.contains(RegB)) + for (MCSubRegIterator SubRegs(RegB, &HRI); SubRegs.isValid(); ++SubRegs) + if (RegA == *SubRegs) + return true; + } + + return false; +} + + +// Returns true if the instruction is alread a .cur. +bool HexagonInstrInfo::isDotCurInst(const MachineInstr* MI) const { + switch (MI->getOpcode()) { + case Hexagon::V6_vL32b_cur_pi: + case Hexagon::V6_vL32b_cur_ai: + case Hexagon::V6_vL32b_cur_pi_128B: + case Hexagon::V6_vL32b_cur_ai_128B: + return true; + } + return false; +} + + +// Returns true, if any one of the operands is a dot new +// insn, whether it is predicated dot new or register dot new. +bool HexagonInstrInfo::isDotNewInst(const MachineInstr* MI) const { + if (isNewValueInst(MI) || + (isPredicated(MI) && isPredicatedNew(MI))) + return true; + + return false; +} + + +/// Symmetrical. See if these two instructions are fit for duplex pair. +bool HexagonInstrInfo::isDuplexPair(const MachineInstr *MIa, + const MachineInstr *MIb) const { + HexagonII::SubInstructionGroup MIaG = getDuplexCandidateGroup(MIa); + HexagonII::SubInstructionGroup MIbG = getDuplexCandidateGroup(MIb); + return (isDuplexPairMatch(MIaG, MIbG) || isDuplexPairMatch(MIbG, MIaG)); +} + + +bool HexagonInstrInfo::isEarlySourceInstr(const MachineInstr *MI) const { + if (!MI) + return false; + + if (MI->mayLoad() || MI->mayStore() || MI->isCompare()) + return true; + + // Multiply + unsigned SchedClass = MI->getDesc().getSchedClass(); + if (SchedClass == Hexagon::Sched::M_tc_3or4x_SLOT23) + return true; + return false; +} + + +bool HexagonInstrInfo::isEndLoopN(unsigned Opcode) const { + return (Opcode == Hexagon::ENDLOOP0 || + Opcode == Hexagon::ENDLOOP1); +} + + +bool HexagonInstrInfo::isExpr(unsigned OpType) const { + switch(OpType) { + case MachineOperand::MO_MachineBasicBlock: + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_ExternalSymbol: + case MachineOperand::MO_JumpTableIndex: + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_BlockAddress: + return true; + default: + return false; + } +} + + +bool HexagonInstrInfo::isExtendable(const MachineInstr *MI) const { + const MCInstrDesc &MID = MI->getDesc(); + const uint64_t F = MID.TSFlags; + if ((F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask) + return true; + + // TODO: This is largely obsolete now. Will need to be removed + // in consecutive patches. + switch(MI->getOpcode()) { + // TFR_FI Remains a special case. + case Hexagon::TFR_FI: + return true; + default: + return false; + } + return false; +} + + +// This returns true in two cases: +// - The OP code itself indicates that this is an extended instruction. +// - One of MOs has been marked with HMOTF_ConstExtended flag. +bool HexagonInstrInfo::isExtended(const MachineInstr *MI) const { + // First check if this is permanently extended op code. + const uint64_t F = MI->getDesc().TSFlags; + if ((F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask) + return true; + // Use MO operand flags to determine if one of MI's operands + // has HMOTF_ConstExtended flag set. + for (MachineInstr::const_mop_iterator I = MI->operands_begin(), + E = MI->operands_end(); I != E; ++I) { + if (I->getTargetFlags() && HexagonII::HMOTF_ConstExtended) + return true; + } + return false; +} + + +bool HexagonInstrInfo::isFloat(const MachineInstr *MI) const { + unsigned Opcode = MI->getOpcode(); const uint64_t F = get(Opcode).TSFlags; + return (F >> HexagonII::FPPos) & HexagonII::FPMask; +} - return ((F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask); + +// No V60 HVX VMEM with A_INDIRECT. +bool HexagonInstrInfo::isHVXMemWithAIndirect(const MachineInstr *I, + const MachineInstr *J) const { + if (!isV60VectorInstruction(I)) + return false; + if (!I->mayLoad() && !I->mayStore()) + return false; + return J->isIndirectBranch() || isIndirectCall(J) || isIndirectL4Return(J); } -int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const { - enum Hexagon::PredSense inPredSense; - inPredSense = invertPredicate ? Hexagon::PredSense_false : - Hexagon::PredSense_true; - int CondOpcode = Hexagon::getPredOpcode(Opc, inPredSense); - if (CondOpcode >= 0) // Valid Conditional opcode/instruction - return CondOpcode; - // This switch case will be removed once all the instructions have been - // modified to use relation maps. - switch(Opc) { - case Hexagon::TFRI_f: - return !invertPredicate ? Hexagon::TFRI_cPt_f : - Hexagon::TFRI_cNotPt_f; - case Hexagon::A2_combinew: - return !invertPredicate ? Hexagon::C2_ccombinewt : - Hexagon::C2_ccombinewf; +bool HexagonInstrInfo::isIndirectCall(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + case Hexagon::J2_callr : + case Hexagon::J2_callrf : + case Hexagon::J2_callrt : + return true; + } + return false; +} - // DEALLOC_RETURN. - case Hexagon::L4_return: - return !invertPredicate ? Hexagon::L4_return_t: - Hexagon::L4_return_f; + +bool HexagonInstrInfo::isIndirectL4Return(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + case Hexagon::L4_return : + case Hexagon::L4_return_t : + case Hexagon::L4_return_f : + case Hexagon::L4_return_fnew_pnt : + case Hexagon::L4_return_fnew_pt : + case Hexagon::L4_return_tnew_pnt : + case Hexagon::L4_return_tnew_pt : + return true; } - llvm_unreachable("Unexpected predicable instruction"); + return false; } -bool HexagonInstrInfo:: -PredicateInstruction(MachineInstr *MI, - ArrayRef<MachineOperand> Cond) const { - if (Cond.empty() || isEndLoopN(Cond[0].getImm())) { - DEBUG(dbgs() << "\nCannot predicate:"; MI->dump();); +bool HexagonInstrInfo::isJumpR(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + case Hexagon::J2_jumpr : + case Hexagon::J2_jumprt : + case Hexagon::J2_jumprf : + case Hexagon::J2_jumprtnewpt : + case Hexagon::J2_jumprfnewpt : + case Hexagon::J2_jumprtnew : + case Hexagon::J2_jumprfnew : + return true; + } + return false; +} + + +// Return true if a given MI can accomodate given offset. +// Use abs estimate as oppose to the exact number. +// TODO: This will need to be changed to use MC level +// definition of instruction extendable field size. +bool HexagonInstrInfo::isJumpWithinBranchRange(const MachineInstr *MI, + unsigned offset) const { + // This selection of jump instructions matches to that what + // AnalyzeBranch can parse, plus NVJ. + if (isNewValueJump(MI)) // r9:2 + return isInt<11>(offset); + + switch (MI->getOpcode()) { + // Still missing Jump to address condition on register value. + default: return false; + case Hexagon::J2_jump: // bits<24> dst; // r22:2 + case Hexagon::J2_call: + case Hexagon::CALLv3nr: + return isInt<24>(offset); + case Hexagon::J2_jumpt: //bits<17> dst; // r15:2 + case Hexagon::J2_jumpf: + case Hexagon::J2_jumptnew: + case Hexagon::J2_jumptnewpt: + case Hexagon::J2_jumpfnew: + case Hexagon::J2_jumpfnewpt: + case Hexagon::J2_callt: + case Hexagon::J2_callf: + return isInt<17>(offset); + case Hexagon::J2_loop0i: + case Hexagon::J2_loop0iext: + case Hexagon::J2_loop0r: + case Hexagon::J2_loop0rext: + case Hexagon::J2_loop1i: + case Hexagon::J2_loop1iext: + case Hexagon::J2_loop1r: + case Hexagon::J2_loop1rext: + return isInt<9>(offset); + // TODO: Add all the compound branches here. Can we do this in Relation model? + case Hexagon::J4_cmpeqi_tp0_jump_nt: + case Hexagon::J4_cmpeqi_tp1_jump_nt: + return isInt<11>(offset); } - int Opc = MI->getOpcode(); - assert (isPredicable(MI) && "Expected predicable instruction"); - bool invertJump = predOpcodeHasNot(Cond); +} - // We have to predicate MI "in place", i.e. after this function returns, - // MI will need to be transformed into a predicated form. To avoid com- - // plicated manipulations with the operands (handling tied operands, - // etc.), build a new temporary instruction, then overwrite MI with it. - MachineBasicBlock &B = *MI->getParent(); - DebugLoc DL = MI->getDebugLoc(); - unsigned PredOpc = getCondOpcode(Opc, invertJump); - MachineInstrBuilder T = BuildMI(B, MI, DL, get(PredOpc)); - unsigned NOp = 0, NumOps = MI->getNumOperands(); - while (NOp < NumOps) { - MachineOperand &Op = MI->getOperand(NOp); - if (!Op.isReg() || !Op.isDef() || Op.isImplicit()) - break; - T.addOperand(Op); - NOp++; +bool HexagonInstrInfo::isLateInstrFeedsEarlyInstr(const MachineInstr *LRMI, + const MachineInstr *ESMI) const { + if (!LRMI || !ESMI) + return false; + + bool isLate = isLateResultInstr(LRMI); + bool isEarly = isEarlySourceInstr(ESMI); + + DEBUG(dbgs() << "V60" << (isLate ? "-LR " : " -- ")); + DEBUG(LRMI->dump()); + DEBUG(dbgs() << "V60" << (isEarly ? "-ES " : " -- ")); + DEBUG(ESMI->dump()); + + if (isLate && isEarly) { + DEBUG(dbgs() << "++Is Late Result feeding Early Source\n"); + return true; } - unsigned PredReg, PredRegPos, PredRegFlags; - bool GotPredReg = getPredReg(Cond, PredReg, PredRegPos, PredRegFlags); - (void)GotPredReg; - assert(GotPredReg); - T.addReg(PredReg, PredRegFlags); - while (NOp < NumOps) - T.addOperand(MI->getOperand(NOp++)); + return false; +} - MI->setDesc(get(PredOpc)); - while (unsigned n = MI->getNumOperands()) - MI->RemoveOperand(n-1); - for (unsigned i = 0, n = T->getNumOperands(); i < n; ++i) - MI->addOperand(T->getOperand(i)); - MachineBasicBlock::instr_iterator TI = &*T; - B.erase(TI); +bool HexagonInstrInfo::isLateResultInstr(const MachineInstr *MI) const { + if (!MI) + return false; - MachineRegisterInfo &MRI = B.getParent()->getRegInfo(); - MRI.clearKillFlags(PredReg); + switch (MI->getOpcode()) { + case TargetOpcode::EXTRACT_SUBREG: + case TargetOpcode::INSERT_SUBREG: + case TargetOpcode::SUBREG_TO_REG: + case TargetOpcode::REG_SEQUENCE: + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::COPY: + case TargetOpcode::INLINEASM: + case TargetOpcode::PHI: + return false; + default: + break; + } + unsigned SchedClass = MI->getDesc().getSchedClass(); + + switch (SchedClass) { + case Hexagon::Sched::ALU32_2op_tc_1_SLOT0123: + case Hexagon::Sched::ALU32_3op_tc_1_SLOT0123: + case Hexagon::Sched::ALU32_ADDI_tc_1_SLOT0123: + case Hexagon::Sched::ALU64_tc_1_SLOT23: + case Hexagon::Sched::EXTENDER_tc_1_SLOT0123: + case Hexagon::Sched::S_2op_tc_1_SLOT23: + case Hexagon::Sched::S_3op_tc_1_SLOT23: + case Hexagon::Sched::V2LDST_tc_ld_SLOT01: + case Hexagon::Sched::V2LDST_tc_st_SLOT0: + case Hexagon::Sched::V2LDST_tc_st_SLOT01: + case Hexagon::Sched::V4LDST_tc_ld_SLOT01: + case Hexagon::Sched::V4LDST_tc_st_SLOT0: + case Hexagon::Sched::V4LDST_tc_st_SLOT01: + return false; + } return true; } -bool -HexagonInstrInfo:: -isProfitableToIfCvt(MachineBasicBlock &MBB, - unsigned NumCycles, - unsigned ExtraPredCycles, - const BranchProbability &Probability) const { - return true; +bool HexagonInstrInfo::isLateSourceInstr(const MachineInstr *MI) const { + if (!MI) + return false; + + // Instructions with iclass A_CVI_VX and attribute A_CVI_LATE uses a multiply + // resource, but all operands can be received late like an ALU instruction. + return MI->getDesc().getSchedClass() == Hexagon::Sched::CVI_VX_LATE; } -bool -HexagonInstrInfo:: -isProfitableToIfCvt(MachineBasicBlock &TMBB, - unsigned NumTCycles, - unsigned ExtraTCycles, - MachineBasicBlock &FMBB, - unsigned NumFCycles, - unsigned ExtraFCycles, - const BranchProbability &Probability) const { - return true; +bool HexagonInstrInfo::isLoopN(const MachineInstr *MI) const { + unsigned Opcode = MI->getOpcode(); + return Opcode == Hexagon::J2_loop0i || + Opcode == Hexagon::J2_loop0r || + Opcode == Hexagon::J2_loop0iext || + Opcode == Hexagon::J2_loop0rext || + Opcode == Hexagon::J2_loop1i || + Opcode == Hexagon::J2_loop1r || + Opcode == Hexagon::J2_loop1iext || + Opcode == Hexagon::J2_loop1rext; +} + + +bool HexagonInstrInfo::isMemOp(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + default: return false; + case Hexagon::L4_iadd_memopw_io : + case Hexagon::L4_isub_memopw_io : + case Hexagon::L4_add_memopw_io : + case Hexagon::L4_sub_memopw_io : + case Hexagon::L4_and_memopw_io : + case Hexagon::L4_or_memopw_io : + case Hexagon::L4_iadd_memoph_io : + case Hexagon::L4_isub_memoph_io : + case Hexagon::L4_add_memoph_io : + case Hexagon::L4_sub_memoph_io : + case Hexagon::L4_and_memoph_io : + case Hexagon::L4_or_memoph_io : + case Hexagon::L4_iadd_memopb_io : + case Hexagon::L4_isub_memopb_io : + case Hexagon::L4_add_memopb_io : + case Hexagon::L4_sub_memopb_io : + case Hexagon::L4_and_memopb_io : + case Hexagon::L4_or_memopb_io : + case Hexagon::L4_ior_memopb_io: + case Hexagon::L4_ior_memoph_io: + case Hexagon::L4_ior_memopw_io: + case Hexagon::L4_iand_memopb_io: + case Hexagon::L4_iand_memoph_io: + case Hexagon::L4_iand_memopw_io: + return true; + } + return false; } -// Returns true if an instruction is predicated irrespective of the predicate -// sense. For example, all of the following will return true. -// if (p0) R1 = add(R2, R3) -// if (!p0) R1 = add(R2, R3) -// if (p0.new) R1 = add(R2, R3) -// if (!p0.new) R1 = add(R2, R3) -bool HexagonInstrInfo::isPredicated(const MachineInstr *MI) const { - const uint64_t F = MI->getDesc().TSFlags; - return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask); +bool HexagonInstrInfo::isNewValue(const MachineInstr* MI) const { + const uint64_t F = MI->getDesc().TSFlags; + return (F >> HexagonII::NewValuePos) & HexagonII::NewValueMask; } -bool HexagonInstrInfo::isPredicated(unsigned Opcode) const { + +bool HexagonInstrInfo::isNewValue(unsigned Opcode) const { const uint64_t F = get(Opcode).TSFlags; + return (F >> HexagonII::NewValuePos) & HexagonII::NewValueMask; +} + - return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask); +bool HexagonInstrInfo::isNewValueInst(const MachineInstr *MI) const { + return isNewValueJump(MI) || isNewValueStore(MI); } -bool HexagonInstrInfo::isPredicatedTrue(const MachineInstr *MI) const { - const uint64_t F = MI->getDesc().TSFlags; - assert(isPredicated(MI)); - return (!((F >> HexagonII::PredicatedFalsePos) & - HexagonII::PredicatedFalseMask)); +bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const { + return isNewValue(MI) && MI->isBranch(); } -bool HexagonInstrInfo::isPredicatedTrue(unsigned Opcode) const { + +bool HexagonInstrInfo::isNewValueJump(unsigned Opcode) const { + return isNewValue(Opcode) && get(Opcode).isBranch() && isPredicated(Opcode); +} + + +bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const { + const uint64_t F = MI->getDesc().TSFlags; + return (F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask; +} + + +bool HexagonInstrInfo::isNewValueStore(unsigned Opcode) const { const uint64_t F = get(Opcode).TSFlags; + return (F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask; +} - // Make sure that the instruction is predicated. - assert((F>> HexagonII::PredicatedPos) & HexagonII::PredicatedMask); - return (!((F >> HexagonII::PredicatedFalsePos) & - HexagonII::PredicatedFalseMask)); + +// Returns true if a particular operand is extendable for an instruction. +bool HexagonInstrInfo::isOperandExtended(const MachineInstr *MI, + unsigned OperandNum) const { + const uint64_t F = MI->getDesc().TSFlags; + return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask) + == OperandNum; } + +bool HexagonInstrInfo::isPostIncrement(const MachineInstr* MI) const { + return getAddrMode(MI) == HexagonII::PostInc; +} + + bool HexagonInstrInfo::isPredicatedNew(const MachineInstr *MI) const { const uint64_t F = MI->getDesc().TSFlags; - assert(isPredicated(MI)); - return ((F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask); + return (F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask; } + bool HexagonInstrInfo::isPredicatedNew(unsigned Opcode) const { const uint64_t F = get(Opcode).TSFlags; - assert(isPredicated(Opcode)); - return ((F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask); + return (F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask; } -// Returns true, if a ST insn can be promoted to a new-value store. -bool HexagonInstrInfo::mayBeNewStore(const MachineInstr *MI) const { + +bool HexagonInstrInfo::isPredicatedTrue(const MachineInstr *MI) const { const uint64_t F = MI->getDesc().TSFlags; + return !((F >> HexagonII::PredicatedFalsePos) & + HexagonII::PredicatedFalseMask); +} + - return ((F >> HexagonII::mayNVStorePos) & - HexagonII::mayNVStoreMask); +bool HexagonInstrInfo::isPredicatedTrue(unsigned Opcode) const { + const uint64_t F = get(Opcode).TSFlags; + // Make sure that the instruction is predicated. + assert((F>> HexagonII::PredicatedPos) & HexagonII::PredicatedMask); + return !((F >> HexagonII::PredicatedFalsePos) & + HexagonII::PredicatedFalseMask); } -bool -HexagonInstrInfo::DefinesPredicate(MachineInstr *MI, - std::vector<MachineOperand> &Pred) const { - for (unsigned oper = 0; oper < MI->getNumOperands(); ++oper) { - MachineOperand MO = MI->getOperand(oper); - if (MO.isReg() && MO.isDef()) { - const TargetRegisterClass* RC = RI.getMinimalPhysRegClass(MO.getReg()); - if (RC == &Hexagon::PredRegsRegClass) { - Pred.push_back(MO); - return true; - } - } + +bool HexagonInstrInfo::isPredicated(unsigned Opcode) const { + const uint64_t F = get(Opcode).TSFlags; + return (F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask; +} + + +bool HexagonInstrInfo::isPredicateLate(unsigned Opcode) const { + const uint64_t F = get(Opcode).TSFlags; + return ~(F >> HexagonII::PredicateLatePos) & HexagonII::PredicateLateMask; +} + + +bool HexagonInstrInfo::isPredictedTaken(unsigned Opcode) const { + const uint64_t F = get(Opcode).TSFlags; + assert(get(Opcode).isBranch() && + (isPredicatedNew(Opcode) || isNewValue(Opcode))); + return (F >> HexagonII::TakenPos) & HexagonII::TakenMask; +} + + +bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr *MI) const { + return MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4 || + MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4_EXT; +} + + +bool HexagonInstrInfo::isSolo(const MachineInstr* MI) const { + const uint64_t F = MI->getDesc().TSFlags; + return (F >> HexagonII::SoloPos) & HexagonII::SoloMask; +} + + +bool HexagonInstrInfo::isSpillPredRegOp(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + case Hexagon::STriw_pred : + case Hexagon::LDriw_pred : + return true; + default: + return false; } - return false; } -bool -HexagonInstrInfo:: -SubsumesPredicate(ArrayRef<MachineOperand> Pred1, - ArrayRef<MachineOperand> Pred2) const { - // TODO: Fix this - return false; +// Returns true when SU has a timing class TC1. +bool HexagonInstrInfo::isTC1(const MachineInstr *MI) const { + unsigned SchedClass = MI->getDesc().getSchedClass(); + switch (SchedClass) { + case Hexagon::Sched::ALU32_2op_tc_1_SLOT0123: + case Hexagon::Sched::ALU32_3op_tc_1_SLOT0123: + case Hexagon::Sched::ALU32_ADDI_tc_1_SLOT0123: + case Hexagon::Sched::ALU64_tc_1_SLOT23: + case Hexagon::Sched::EXTENDER_tc_1_SLOT0123: + //case Hexagon::Sched::M_tc_1_SLOT23: + case Hexagon::Sched::S_2op_tc_1_SLOT23: + case Hexagon::Sched::S_3op_tc_1_SLOT23: + return true; + + default: + return false; + } } -// -// We indicate that we want to reverse the branch by -// inserting the reversed branching opcode. -// -bool HexagonInstrInfo::ReverseBranchCondition( - SmallVectorImpl<MachineOperand> &Cond) const { - if (Cond.empty()) +bool HexagonInstrInfo::isTC2(const MachineInstr *MI) const { + unsigned SchedClass = MI->getDesc().getSchedClass(); + switch (SchedClass) { + case Hexagon::Sched::ALU32_3op_tc_2_SLOT0123: + case Hexagon::Sched::ALU64_tc_2_SLOT23: + case Hexagon::Sched::CR_tc_2_SLOT3: + case Hexagon::Sched::M_tc_2_SLOT23: + case Hexagon::Sched::S_2op_tc_2_SLOT23: + case Hexagon::Sched::S_3op_tc_2_SLOT23: return true; - assert(Cond[0].isImm() && "First entry in the cond vector not imm-val"); - Opcode_t opcode = Cond[0].getImm(); - //unsigned temp; - assert(get(opcode).isBranch() && "Should be a branching condition."); - if (isEndLoopN(opcode)) + + default: + return false; + } +} + + +bool HexagonInstrInfo::isTC2Early(const MachineInstr *MI) const { + unsigned SchedClass = MI->getDesc().getSchedClass(); + switch (SchedClass) { + case Hexagon::Sched::ALU32_2op_tc_2early_SLOT0123: + case Hexagon::Sched::ALU32_3op_tc_2early_SLOT0123: + case Hexagon::Sched::ALU64_tc_2early_SLOT23: + case Hexagon::Sched::CR_tc_2early_SLOT23: + case Hexagon::Sched::CR_tc_2early_SLOT3: + case Hexagon::Sched::J_tc_2early_SLOT0123: + case Hexagon::Sched::J_tc_2early_SLOT2: + case Hexagon::Sched::J_tc_2early_SLOT23: + case Hexagon::Sched::S_2op_tc_2early_SLOT23: + case Hexagon::Sched::S_3op_tc_2early_SLOT23: return true; - Opcode_t NewOpcode = getInvertedPredicatedOpcode(opcode); - Cond[0].setImm(NewOpcode); - return false; + + default: + return false; + } +} + + +bool HexagonInstrInfo::isTC4x(const MachineInstr *MI) const { + if (!MI) + return false; + + unsigned SchedClass = MI->getDesc().getSchedClass(); + return SchedClass == Hexagon::Sched::M_tc_3or4x_SLOT23; } -bool HexagonInstrInfo:: -isProfitableToDupForIfCvt(MachineBasicBlock &MBB,unsigned NumInstrs, - const BranchProbability &Probability) const { - return (NumInstrs <= 4); +bool HexagonInstrInfo::isV60VectorInstruction(const MachineInstr *MI) const { + if (!MI) + return false; + + const uint64_t V = getType(MI); + return HexagonII::TypeCVI_FIRST <= V && V <= HexagonII::TypeCVI_LAST; } -bool HexagonInstrInfo::isDeallocRet(const MachineInstr *MI) const { - switch (MI->getOpcode()) { - default: return false; - case Hexagon::L4_return: - case Hexagon::L4_return_t: - case Hexagon::L4_return_f: - case Hexagon::L4_return_tnew_pnt: - case Hexagon::L4_return_fnew_pnt: - case Hexagon::L4_return_tnew_pt: - case Hexagon::L4_return_fnew_pt: - return true; + +// Check if the Offset is a valid auto-inc imm by Load/Store Type. +// +bool HexagonInstrInfo::isValidAutoIncImm(const EVT VT, const int Offset) const { + if (VT == MVT::v16i32 || VT == MVT::v8i64 || + VT == MVT::v32i16 || VT == MVT::v64i8) { + return (Offset >= Hexagon_MEMV_AUTOINC_MIN && + Offset <= Hexagon_MEMV_AUTOINC_MAX && + (Offset & 0x3f) == 0); + } + // 128B + if (VT == MVT::v32i32 || VT == MVT::v16i64 || + VT == MVT::v64i16 || VT == MVT::v128i8) { + return (Offset >= Hexagon_MEMV_AUTOINC_MIN_128B && + Offset <= Hexagon_MEMV_AUTOINC_MAX_128B && + (Offset & 0x7f) == 0); + } + if (VT == MVT::i64) { + return (Offset >= Hexagon_MEMD_AUTOINC_MIN && + Offset <= Hexagon_MEMD_AUTOINC_MAX && + (Offset & 0x7) == 0); + } + if (VT == MVT::i32) { + return (Offset >= Hexagon_MEMW_AUTOINC_MIN && + Offset <= Hexagon_MEMW_AUTOINC_MAX && + (Offset & 0x3) == 0); + } + if (VT == MVT::i16) { + return (Offset >= Hexagon_MEMH_AUTOINC_MIN && + Offset <= Hexagon_MEMH_AUTOINC_MAX && + (Offset & 0x1) == 0); + } + if (VT == MVT::i8) { + return (Offset >= Hexagon_MEMB_AUTOINC_MIN && + Offset <= Hexagon_MEMB_AUTOINC_MAX); } + llvm_unreachable("Not an auto-inc opc!"); } @@ -1222,6 +2329,40 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, // misaligns with respect to load size. switch (Opcode) { + case Hexagon::STriq_pred_V6: + case Hexagon::STriq_pred_vec_V6: + case Hexagon::STriv_pseudo_V6: + case Hexagon::STrivv_pseudo_V6: + case Hexagon::LDriq_pred_V6: + case Hexagon::LDriq_pred_vec_V6: + case Hexagon::LDriv_pseudo_V6: + case Hexagon::LDrivv_pseudo_V6: + case Hexagon::LDrivv_indexed: + case Hexagon::STrivv_indexed: + case Hexagon::V6_vL32b_ai: + case Hexagon::V6_vS32b_ai: + case Hexagon::V6_vL32Ub_ai: + case Hexagon::V6_vS32Ub_ai: + return (Offset >= Hexagon_MEMV_OFFSET_MIN) && + (Offset <= Hexagon_MEMV_OFFSET_MAX); + + case Hexagon::STriq_pred_V6_128B: + case Hexagon::STriq_pred_vec_V6_128B: + case Hexagon::STriv_pseudo_V6_128B: + case Hexagon::STrivv_pseudo_V6_128B: + case Hexagon::LDriq_pred_V6_128B: + case Hexagon::LDriq_pred_vec_V6_128B: + case Hexagon::LDriv_pseudo_V6_128B: + case Hexagon::LDrivv_pseudo_V6_128B: + case Hexagon::LDrivv_indexed_128B: + case Hexagon::STrivv_indexed_128B: + case Hexagon::V6_vL32b_ai_128B: + case Hexagon::V6_vS32b_ai_128B: + case Hexagon::V6_vL32Ub_ai_128B: + case Hexagon::V6_vS32Ub_ai_128B: + return (Offset >= Hexagon_MEMV_OFFSET_MIN_128B) && + (Offset <= Hexagon_MEMV_OFFSET_MAX_128B); + case Hexagon::J2_loop0i: case Hexagon::J2_loop1i: return isUInt<10>(Offset); @@ -1248,8 +2389,8 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, (Offset <= Hexagon_MEMH_OFFSET_MAX); case Hexagon::L2_loadrb_io: - case Hexagon::S2_storerb_io: case Hexagon::L2_loadrub_io: + case Hexagon::S2_storerb_io: return (Offset >= Hexagon_MEMB_OFFSET_MIN) && (Offset <= Hexagon_MEMB_OFFSET_MAX); @@ -1257,28 +2398,28 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, return (Offset >= Hexagon_ADDI_OFFSET_MIN) && (Offset <= Hexagon_ADDI_OFFSET_MAX); - case Hexagon::L4_iadd_memopw_io: - case Hexagon::L4_isub_memopw_io: - case Hexagon::L4_add_memopw_io: - case Hexagon::L4_sub_memopw_io: - case Hexagon::L4_and_memopw_io: - case Hexagon::L4_or_memopw_io: + case Hexagon::L4_iadd_memopw_io : + case Hexagon::L4_isub_memopw_io : + case Hexagon::L4_add_memopw_io : + case Hexagon::L4_sub_memopw_io : + case Hexagon::L4_and_memopw_io : + case Hexagon::L4_or_memopw_io : return (0 <= Offset && Offset <= 255); - case Hexagon::L4_iadd_memoph_io: - case Hexagon::L4_isub_memoph_io: - case Hexagon::L4_add_memoph_io: - case Hexagon::L4_sub_memoph_io: - case Hexagon::L4_and_memoph_io: - case Hexagon::L4_or_memoph_io: + case Hexagon::L4_iadd_memoph_io : + case Hexagon::L4_isub_memoph_io : + case Hexagon::L4_add_memoph_io : + case Hexagon::L4_sub_memoph_io : + case Hexagon::L4_and_memoph_io : + case Hexagon::L4_or_memoph_io : return (0 <= Offset && Offset <= 127); - case Hexagon::L4_iadd_memopb_io: - case Hexagon::L4_isub_memopb_io: - case Hexagon::L4_add_memopb_io: - case Hexagon::L4_sub_memopb_io: - case Hexagon::L4_and_memopb_io: - case Hexagon::L4_or_memopb_io: + case Hexagon::L4_iadd_memopb_io : + case Hexagon::L4_isub_memopb_io : + case Hexagon::L4_add_memopb_io : + case Hexagon::L4_sub_memopb_io : + case Hexagon::L4_and_memopb_io : + case Hexagon::L4_or_memopb_io : return (0 <= Offset && Offset <= 63); // LDri_pred and STriw_pred are pseudo operations, so it has to take offset of @@ -1291,223 +2432,556 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, case Hexagon::TFR_FIA: case Hexagon::INLINEASM: return true; - } + + case Hexagon::L2_ploadrbt_io: + case Hexagon::L2_ploadrbf_io: + case Hexagon::L2_ploadrubt_io: + case Hexagon::L2_ploadrubf_io: + case Hexagon::S2_pstorerbt_io: + case Hexagon::S2_pstorerbf_io: + case Hexagon::S4_storeirb_io: + case Hexagon::S4_storeirbt_io: + case Hexagon::S4_storeirbf_io: + return isUInt<6>(Offset); + + case Hexagon::L2_ploadrht_io: + case Hexagon::L2_ploadrhf_io: + case Hexagon::L2_ploadruht_io: + case Hexagon::L2_ploadruhf_io: + case Hexagon::S2_pstorerht_io: + case Hexagon::S2_pstorerhf_io: + case Hexagon::S4_storeirh_io: + case Hexagon::S4_storeirht_io: + case Hexagon::S4_storeirhf_io: + return isShiftedUInt<6,1>(Offset); + + case Hexagon::L2_ploadrit_io: + case Hexagon::L2_ploadrif_io: + case Hexagon::S2_pstorerit_io: + case Hexagon::S2_pstorerif_io: + case Hexagon::S4_storeiri_io: + case Hexagon::S4_storeirit_io: + case Hexagon::S4_storeirif_io: + return isShiftedUInt<6,2>(Offset); + + case Hexagon::L2_ploadrdt_io: + case Hexagon::L2_ploadrdf_io: + case Hexagon::S2_pstorerdt_io: + case Hexagon::S2_pstorerdf_io: + return isShiftedUInt<6,3>(Offset); + } // switch llvm_unreachable("No offset range is defined for this opcode. " "Please define it in the above switch statement!"); } -// -// Check if the Offset is a valid auto-inc imm by Load/Store Type. -// -bool HexagonInstrInfo:: -isValidAutoIncImm(const EVT VT, const int Offset) const { +bool HexagonInstrInfo::isVecAcc(const MachineInstr *MI) const { + return MI && isV60VectorInstruction(MI) && isAccumulator(MI); +} - if (VT == MVT::i64) { - return (Offset >= Hexagon_MEMD_AUTOINC_MIN && - Offset <= Hexagon_MEMD_AUTOINC_MAX && - (Offset & 0x7) == 0); - } - if (VT == MVT::i32) { - return (Offset >= Hexagon_MEMW_AUTOINC_MIN && - Offset <= Hexagon_MEMW_AUTOINC_MAX && - (Offset & 0x3) == 0); - } - if (VT == MVT::i16) { - return (Offset >= Hexagon_MEMH_AUTOINC_MIN && - Offset <= Hexagon_MEMH_AUTOINC_MAX && - (Offset & 0x1) == 0); - } - if (VT == MVT::i8) { - return (Offset >= Hexagon_MEMB_AUTOINC_MIN && - Offset <= Hexagon_MEMB_AUTOINC_MAX); + +bool HexagonInstrInfo::isVecALU(const MachineInstr *MI) const { + if (!MI) + return false; + const uint64_t F = get(MI->getOpcode()).TSFlags; + const uint64_t V = ((F >> HexagonII::TypePos) & HexagonII::TypeMask); + return + V == HexagonII::TypeCVI_VA || + V == HexagonII::TypeCVI_VA_DV; +} + + +bool HexagonInstrInfo::isVecUsableNextPacket(const MachineInstr *ProdMI, + const MachineInstr *ConsMI) const { + if (EnableACCForwarding && isVecAcc(ProdMI) && isVecAcc(ConsMI)) + return true; + + if (EnableALUForwarding && (isVecALU(ConsMI) || isLateSourceInstr(ConsMI))) + return true; + + if (mayBeNewStore(ConsMI)) + return true; + + return false; +} + + +/// \brief Can these instructions execute at the same time in a bundle. +bool HexagonInstrInfo::canExecuteInBundle(const MachineInstr *First, + const MachineInstr *Second) const { + if (DisableNVSchedule) + return false; + if (mayBeNewStore(Second)) { + // Make sure the definition of the first instruction is the value being + // stored. + const MachineOperand &Stored = + Second->getOperand(Second->getNumOperands() - 1); + if (!Stored.isReg()) + return false; + for (unsigned i = 0, e = First->getNumOperands(); i < e; ++i) { + const MachineOperand &Op = First->getOperand(i); + if (Op.isReg() && Op.isDef() && Op.getReg() == Stored.getReg()) + return true; + } } - llvm_unreachable("Not an auto-inc opc!"); + return false; +} + + +bool HexagonInstrInfo::hasEHLabel(const MachineBasicBlock *B) const { + for (auto &I : *B) + if (I.isEHLabel()) + return true; + return false; } -bool HexagonInstrInfo:: -isMemOp(const MachineInstr *MI) const { -// return MI->getDesc().mayLoad() && MI->getDesc().mayStore(); - - switch (MI->getOpcode()) - { - default: return false; - case Hexagon::L4_iadd_memopw_io: - case Hexagon::L4_isub_memopw_io: - case Hexagon::L4_add_memopw_io: - case Hexagon::L4_sub_memopw_io: - case Hexagon::L4_and_memopw_io: - case Hexagon::L4_or_memopw_io: - case Hexagon::L4_iadd_memoph_io: - case Hexagon::L4_isub_memoph_io: - case Hexagon::L4_add_memoph_io: - case Hexagon::L4_sub_memoph_io: - case Hexagon::L4_and_memoph_io: - case Hexagon::L4_or_memoph_io: - case Hexagon::L4_iadd_memopb_io: - case Hexagon::L4_isub_memopb_io: - case Hexagon::L4_add_memopb_io: - case Hexagon::L4_sub_memopb_io: - case Hexagon::L4_and_memopb_io: - case Hexagon::L4_or_memopb_io: - case Hexagon::L4_ior_memopb_io: - case Hexagon::L4_ior_memoph_io: - case Hexagon::L4_ior_memopw_io: - case Hexagon::L4_iand_memopb_io: - case Hexagon::L4_iand_memoph_io: - case Hexagon::L4_iand_memopw_io: +// Returns true if an instruction can be converted into a non-extended +// equivalent instruction. +bool HexagonInstrInfo::hasNonExtEquivalent(const MachineInstr *MI) const { + short NonExtOpcode; + // Check if the instruction has a register form that uses register in place + // of the extended operand, if so return that as the non-extended form. + if (Hexagon::getRegForm(MI->getOpcode()) >= 0) + return true; + + if (MI->getDesc().mayLoad() || MI->getDesc().mayStore()) { + // Check addressing mode and retrieve non-ext equivalent instruction. + + switch (getAddrMode(MI)) { + case HexagonII::Absolute : + // Load/store with absolute addressing mode can be converted into + // base+offset mode. + NonExtOpcode = Hexagon::getBaseWithImmOffset(MI->getOpcode()); + break; + case HexagonII::BaseImmOffset : + // Load/store with base+offset addressing mode can be converted into + // base+register offset addressing mode. However left shift operand should + // be set to 0. + NonExtOpcode = Hexagon::getBaseWithRegOffset(MI->getOpcode()); + break; + case HexagonII::BaseLongOffset: + NonExtOpcode = Hexagon::getRegShlForm(MI->getOpcode()); + break; + default: + return false; + } + if (NonExtOpcode < 0) + return false; return true; } return false; } -bool HexagonInstrInfo:: -isSpillPredRegOp(const MachineInstr *MI) const { - switch (MI->getOpcode()) { - default: return false; - case Hexagon::STriw_pred : - case Hexagon::LDriw_pred : +bool HexagonInstrInfo::hasPseudoInstrPair(const MachineInstr *MI) const { + return Hexagon::getRealHWInstr(MI->getOpcode(), + Hexagon::InstrType_Pseudo) >= 0; +} + + +bool HexagonInstrInfo::hasUncondBranch(const MachineBasicBlock *B) + const { + MachineBasicBlock::const_iterator I = B->getFirstTerminator(), E = B->end(); + while (I != E) { + if (I->isBarrier()) return true; + ++I; } + return false; } -bool HexagonInstrInfo::isNewValueJumpCandidate(const MachineInstr *MI) const { - switch (MI->getOpcode()) { - default: return false; - case Hexagon::C2_cmpeq: - case Hexagon::C2_cmpeqi: - case Hexagon::C2_cmpgt: - case Hexagon::C2_cmpgti: - case Hexagon::C2_cmpgtu: - case Hexagon::C2_cmpgtui: + +// Returns true, if a LD insn can be promoted to a cur load. +bool HexagonInstrInfo::mayBeCurLoad(const MachineInstr *MI) const { + auto &HST = MI->getParent()->getParent()->getSubtarget<HexagonSubtarget>(); + const uint64_t F = MI->getDesc().TSFlags; + return ((F >> HexagonII::mayCVLoadPos) & HexagonII::mayCVLoadMask) && + HST.hasV60TOps(); +} + + +// Returns true, if a ST insn can be promoted to a new-value store. +bool HexagonInstrInfo::mayBeNewStore(const MachineInstr *MI) const { + const uint64_t F = MI->getDesc().TSFlags; + return (F >> HexagonII::mayNVStorePos) & HexagonII::mayNVStoreMask; +} + + +bool HexagonInstrInfo::producesStall(const MachineInstr *ProdMI, + const MachineInstr *ConsMI) const { + // There is no stall when ProdMI is not a V60 vector. + if (!isV60VectorInstruction(ProdMI)) + return false; + + // There is no stall when ProdMI and ConsMI are not dependent. + if (!isDependent(ProdMI, ConsMI)) + return false; + + // When Forward Scheduling is enabled, there is no stall if ProdMI and ConsMI + // are scheduled in consecutive packets. + if (isVecUsableNextPacket(ProdMI, ConsMI)) + return false; + + return true; +} + + +bool HexagonInstrInfo::producesStall(const MachineInstr *MI, + MachineBasicBlock::const_instr_iterator BII) const { + // There is no stall when I is not a V60 vector. + if (!isV60VectorInstruction(MI)) + return false; + + MachineBasicBlock::const_instr_iterator MII = BII; + MachineBasicBlock::const_instr_iterator MIE = MII->getParent()->instr_end(); + + if (!(*MII).isBundle()) { + const MachineInstr *J = &*MII; + if (!isV60VectorInstruction(J)) + return false; + else if (isVecUsableNextPacket(J, MI)) + return false; + return true; + } + + for (++MII; MII != MIE && MII->isInsideBundle(); ++MII) { + const MachineInstr *J = &*MII; + if (producesStall(J, MI)) return true; } + return false; +} + + +bool HexagonInstrInfo::predCanBeUsedAsDotNew(const MachineInstr *MI, + unsigned PredReg) const { + for (unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) { + const MachineOperand &MO = MI->getOperand(opNum); + if (MO.isReg() && MO.isDef() && MO.isImplicit() && (MO.getReg() == PredReg)) + return false; // Predicate register must be explicitly defined. + } + + // Hexagon Programmer's Reference says that decbin, memw_locked, and + // memd_locked cannot be used as .new as well, + // but we don't seem to have these instructions defined. + return MI->getOpcode() != Hexagon::A4_tlbmatch; +} + + +bool HexagonInstrInfo::PredOpcodeHasJMP_c(unsigned Opcode) const { + return (Opcode == Hexagon::J2_jumpt) || + (Opcode == Hexagon::J2_jumpf) || + (Opcode == Hexagon::J2_jumptnew) || + (Opcode == Hexagon::J2_jumpfnew) || + (Opcode == Hexagon::J2_jumptnewpt) || + (Opcode == Hexagon::J2_jumpfnewpt); +} + + +bool HexagonInstrInfo::predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const { + if (Cond.empty() || !isPredicated(Cond[0].getImm())) + return false; + return !isPredicatedTrue(Cond[0].getImm()); +} + + +unsigned HexagonInstrInfo::getAddrMode(const MachineInstr* MI) const { + const uint64_t F = MI->getDesc().TSFlags; + return (F >> HexagonII::AddrModePos) & HexagonII::AddrModeMask; +} + + +// Returns the base register in a memory access (load/store). The offset is +// returned in Offset and the access size is returned in AccessSize. +unsigned HexagonInstrInfo::getBaseAndOffset(const MachineInstr *MI, + int &Offset, unsigned &AccessSize) const { + // Return if it is not a base+offset type instruction or a MemOp. + if (getAddrMode(MI) != HexagonII::BaseImmOffset && + getAddrMode(MI) != HexagonII::BaseLongOffset && + !isMemOp(MI) && !isPostIncrement(MI)) + return 0; + + // Since it is a memory access instruction, getMemAccessSize() should never + // return 0. + assert (getMemAccessSize(MI) && + "BaseImmOffset or BaseLongOffset or MemOp without accessSize"); + + // Return Values of getMemAccessSize() are + // 0 - Checked in the assert above. + // 1, 2, 3, 4 & 7, 8 - The statement below is correct for all these. + // MemAccessSize is represented as 1+log2(N) where N is size in bits. + AccessSize = (1U << (getMemAccessSize(MI) - 1)); + + unsigned basePos = 0, offsetPos = 0; + if (!getBaseAndOffsetPosition(MI, basePos, offsetPos)) + return 0; + + // Post increment updates its EA after the mem access, + // so we need to treat its offset as zero. + if (isPostIncrement(MI)) + Offset = 0; + else { + Offset = MI->getOperand(offsetPos).getImm(); + } + + return MI->getOperand(basePos).getReg(); +} + + +/// Return the position of the base and offset operands for this instruction. +bool HexagonInstrInfo::getBaseAndOffsetPosition(const MachineInstr *MI, + unsigned &BasePos, unsigned &OffsetPos) const { + // Deal with memops first. + if (isMemOp(MI)) { + assert (MI->getOperand(0).isReg() && MI->getOperand(1).isImm() && + "Bad Memop."); + BasePos = 0; + OffsetPos = 1; + } else if (MI->mayStore()) { + BasePos = 0; + OffsetPos = 1; + } else if (MI->mayLoad()) { + BasePos = 1; + OffsetPos = 2; + } else + return false; + + if (isPredicated(MI)) { + BasePos++; + OffsetPos++; + } + if (isPostIncrement(MI)) { + BasePos++; + OffsetPos++; + } + + if (!MI->getOperand(BasePos).isReg() || !MI->getOperand(OffsetPos).isImm()) + return false; + + return true; +} + + +// Inserts branching instructions in reverse order of their occurence. +// e.g. jump_t t1 (i1) +// jump t2 (i2) +// Jumpers = {i2, i1} +SmallVector<MachineInstr*, 2> HexagonInstrInfo::getBranchingInstrs( + MachineBasicBlock& MBB) const { + SmallVector<MachineInstr*, 2> Jumpers; + // If the block has no terminators, it just falls into the block after it. + MachineBasicBlock::instr_iterator I = MBB.instr_end(); + if (I == MBB.instr_begin()) + return Jumpers; + + // A basic block may looks like this: + // + // [ insn + // EH_LABEL + // insn + // insn + // insn + // EH_LABEL + // insn ] + // + // It has two succs but does not have a terminator + // Don't know how to handle it. + do { + --I; + if (I->isEHLabel()) + return Jumpers; + } while (I != MBB.instr_begin()); + + I = MBB.instr_end(); + --I; + + while (I->isDebugValue()) { + if (I == MBB.instr_begin()) + return Jumpers; + --I; + } + if (!isUnpredicatedTerminator(&*I)) + return Jumpers; + + // Get the last instruction in the block. + MachineInstr *LastInst = &*I; + Jumpers.push_back(LastInst); + MachineInstr *SecondLastInst = nullptr; + // Find one more terminator if present. + do { + if (&*I != LastInst && !I->isBundle() && isUnpredicatedTerminator(&*I)) { + if (!SecondLastInst) { + SecondLastInst = &*I; + Jumpers.push_back(SecondLastInst); + } else // This is a third branch. + return Jumpers; + } + if (I == MBB.instr_begin()) + break; + --I; + } while (true); + return Jumpers; } -bool HexagonInstrInfo:: -isConditionalTransfer (const MachineInstr *MI) const { + +// Returns Operand Index for the constant extended instruction. +unsigned HexagonInstrInfo::getCExtOpNum(const MachineInstr *MI) const { + const uint64_t F = MI->getDesc().TSFlags; + return (F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask; +} + +// See if instruction could potentially be a duplex candidate. +// If so, return its group. Zero otherwise. +HexagonII::CompoundGroup HexagonInstrInfo::getCompoundCandidateGroup( + const MachineInstr *MI) const { + unsigned DstReg, SrcReg, Src1Reg, Src2Reg; + switch (MI->getOpcode()) { - default: return false; - case Hexagon::A2_tfrt: - case Hexagon::A2_tfrf: - case Hexagon::C2_cmoveit: - case Hexagon::C2_cmoveif: - case Hexagon::A2_tfrtnew: - case Hexagon::A2_tfrfnew: - case Hexagon::C2_cmovenewit: - case Hexagon::C2_cmovenewif: - return true; + default: + return HexagonII::HCG_None; + // + // Compound pairs. + // "p0=cmp.eq(Rs16,Rt16); if (p0.new) jump:nt #r9:2" + // "Rd16=#U6 ; jump #r9:2" + // "Rd16=Rs16 ; jump #r9:2" + // + case Hexagon::C2_cmpeq: + case Hexagon::C2_cmpgt: + case Hexagon::C2_cmpgtu: + DstReg = MI->getOperand(0).getReg(); + Src1Reg = MI->getOperand(1).getReg(); + Src2Reg = MI->getOperand(2).getReg(); + if (Hexagon::PredRegsRegClass.contains(DstReg) && + (Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) && + isIntRegForSubInst(Src1Reg) && isIntRegForSubInst(Src2Reg)) + return HexagonII::HCG_A; + break; + case Hexagon::C2_cmpeqi: + case Hexagon::C2_cmpgti: + case Hexagon::C2_cmpgtui: + // P0 = cmp.eq(Rs,#u2) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + if (Hexagon::PredRegsRegClass.contains(DstReg) && + (Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) && + isIntRegForSubInst(SrcReg) && MI->getOperand(2).isImm() && + ((isUInt<5>(MI->getOperand(2).getImm())) || + (MI->getOperand(2).getImm() == -1))) + return HexagonII::HCG_A; + break; + case Hexagon::A2_tfr: + // Rd = Rs + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg)) + return HexagonII::HCG_A; + break; + case Hexagon::A2_tfrsi: + // Rd = #u6 + // Do not test for #u6 size since the const is getting extended + // regardless and compound could be formed. + DstReg = MI->getOperand(0).getReg(); + if (isIntRegForSubInst(DstReg)) + return HexagonII::HCG_A; + break; + case Hexagon::S2_tstbit_i: + DstReg = MI->getOperand(0).getReg(); + Src1Reg = MI->getOperand(1).getReg(); + if (Hexagon::PredRegsRegClass.contains(DstReg) && + (Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) && + MI->getOperand(2).isImm() && + isIntRegForSubInst(Src1Reg) && (MI->getOperand(2).getImm() == 0)) + return HexagonII::HCG_A; + break; + // The fact that .new form is used pretty much guarantees + // that predicate register will match. Nevertheless, + // there could be some false positives without additional + // checking. + case Hexagon::J2_jumptnew: + case Hexagon::J2_jumpfnew: + case Hexagon::J2_jumptnewpt: + case Hexagon::J2_jumpfnewpt: + Src1Reg = MI->getOperand(0).getReg(); + if (Hexagon::PredRegsRegClass.contains(Src1Reg) && + (Hexagon::P0 == Src1Reg || Hexagon::P1 == Src1Reg)) + return HexagonII::HCG_B; + break; + // Transfer and jump: + // Rd=#U6 ; jump #r9:2 + // Rd=Rs ; jump #r9:2 + // Do not test for jump range here. + case Hexagon::J2_jump: + case Hexagon::RESTORE_DEALLOC_RET_JMP_V4: + return HexagonII::HCG_C; + break; } + + return HexagonII::HCG_None; +} + + +// Returns -1 when there is no opcode found. +unsigned HexagonInstrInfo::getCompoundOpcode(const MachineInstr *GA, + const MachineInstr *GB) const { + assert(getCompoundCandidateGroup(GA) == HexagonII::HCG_A); + assert(getCompoundCandidateGroup(GB) == HexagonII::HCG_B); + if ((GA->getOpcode() != Hexagon::C2_cmpeqi) || + (GB->getOpcode() != Hexagon::J2_jumptnew)) + return -1; + unsigned DestReg = GA->getOperand(0).getReg(); + if (!GB->readsRegister(DestReg)) + return -1; + if (DestReg == Hexagon::P0) + return Hexagon::J4_cmpeqi_tp0_jump_nt; + if (DestReg == Hexagon::P1) + return Hexagon::J4_cmpeqi_tp1_jump_nt; + return -1; } -bool HexagonInstrInfo::isConditionalALU32 (const MachineInstr* MI) const { - switch (MI->getOpcode()) - { - default: return false; - case Hexagon::A2_paddf: - case Hexagon::A2_paddfnew: - case Hexagon::A2_paddt: - case Hexagon::A2_paddtnew: - case Hexagon::A2_pandf: - case Hexagon::A2_pandfnew: - case Hexagon::A2_pandt: - case Hexagon::A2_pandtnew: - case Hexagon::A4_paslhf: - case Hexagon::A4_paslhfnew: - case Hexagon::A4_paslht: - case Hexagon::A4_paslhtnew: - case Hexagon::A4_pasrhf: - case Hexagon::A4_pasrhfnew: - case Hexagon::A4_pasrht: - case Hexagon::A4_pasrhtnew: - case Hexagon::A2_porf: - case Hexagon::A2_porfnew: - case Hexagon::A2_port: - case Hexagon::A2_portnew: - case Hexagon::A2_psubf: - case Hexagon::A2_psubfnew: - case Hexagon::A2_psubt: - case Hexagon::A2_psubtnew: - case Hexagon::A2_pxorf: - case Hexagon::A2_pxorfnew: - case Hexagon::A2_pxort: - case Hexagon::A2_pxortnew: - case Hexagon::A4_psxthf: - case Hexagon::A4_psxthfnew: - case Hexagon::A4_psxtht: - case Hexagon::A4_psxthtnew: - case Hexagon::A4_psxtbf: - case Hexagon::A4_psxtbfnew: - case Hexagon::A4_psxtbt: - case Hexagon::A4_psxtbtnew: - case Hexagon::A4_pzxtbf: - case Hexagon::A4_pzxtbfnew: - case Hexagon::A4_pzxtbt: - case Hexagon::A4_pzxtbtnew: - case Hexagon::A4_pzxthf: - case Hexagon::A4_pzxthfnew: - case Hexagon::A4_pzxtht: - case Hexagon::A4_pzxthtnew: - case Hexagon::A2_paddit: - case Hexagon::A2_paddif: - case Hexagon::C2_ccombinewt: - case Hexagon::C2_ccombinewf: - return true; + +int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const { + enum Hexagon::PredSense inPredSense; + inPredSense = invertPredicate ? Hexagon::PredSense_false : + Hexagon::PredSense_true; + int CondOpcode = Hexagon::getPredOpcode(Opc, inPredSense); + if (CondOpcode >= 0) // Valid Conditional opcode/instruction + return CondOpcode; + + // This switch case will be removed once all the instructions have been + // modified to use relation maps. + switch(Opc) { + case Hexagon::TFRI_f: + return !invertPredicate ? Hexagon::TFRI_cPt_f : + Hexagon::TFRI_cNotPt_f; } + + llvm_unreachable("Unexpected predicable instruction"); } -bool HexagonInstrInfo:: -isConditionalLoad (const MachineInstr* MI) const { - switch (MI->getOpcode()) - { - default: return false; - case Hexagon::L2_ploadrdt_io : - case Hexagon::L2_ploadrdf_io: - case Hexagon::L2_ploadrit_io: - case Hexagon::L2_ploadrif_io: - case Hexagon::L2_ploadrht_io: - case Hexagon::L2_ploadrhf_io: - case Hexagon::L2_ploadrbt_io: - case Hexagon::L2_ploadrbf_io: - case Hexagon::L2_ploadruht_io: - case Hexagon::L2_ploadruhf_io: - case Hexagon::L2_ploadrubt_io: - case Hexagon::L2_ploadrubf_io: - case Hexagon::L2_ploadrdt_pi: - case Hexagon::L2_ploadrdf_pi: - case Hexagon::L2_ploadrit_pi: - case Hexagon::L2_ploadrif_pi: - case Hexagon::L2_ploadrht_pi: - case Hexagon::L2_ploadrhf_pi: - case Hexagon::L2_ploadrbt_pi: - case Hexagon::L2_ploadrbf_pi: - case Hexagon::L2_ploadruht_pi: - case Hexagon::L2_ploadruhf_pi: - case Hexagon::L2_ploadrubt_pi: - case Hexagon::L2_ploadrubf_pi: - case Hexagon::L4_ploadrdt_rr: - case Hexagon::L4_ploadrdf_rr: - case Hexagon::L4_ploadrbt_rr: - case Hexagon::L4_ploadrbf_rr: - case Hexagon::L4_ploadrubt_rr: - case Hexagon::L4_ploadrubf_rr: - case Hexagon::L4_ploadrht_rr: - case Hexagon::L4_ploadrhf_rr: - case Hexagon::L4_ploadruht_rr: - case Hexagon::L4_ploadruhf_rr: - case Hexagon::L4_ploadrit_rr: - case Hexagon::L4_ploadrif_rr: - return true; + +// Return the cur value instruction for a given store. +int HexagonInstrInfo::getDotCurOp(const MachineInstr* MI) const { + switch (MI->getOpcode()) { + default: llvm_unreachable("Unknown .cur type"); + case Hexagon::V6_vL32b_pi: + return Hexagon::V6_vL32b_cur_pi; + case Hexagon::V6_vL32b_ai: + return Hexagon::V6_vL32b_cur_ai; + //128B + case Hexagon::V6_vL32b_pi_128B: + return Hexagon::V6_vL32b_cur_pi_128B; + case Hexagon::V6_vL32b_ai_128B: + return Hexagon::V6_vL32b_cur_ai_128B; } + return 0; } -// Returns true if an instruction is a conditional store. -// -// Note: It doesn't include conditional new-value stores as they can't be -// converted to .new predicate. + + +// The diagram below shows the steps involved in the conversion of a predicated +// store instruction to its .new predicated new-value form. // // p.new NV store [ if(p0.new)memw(R0+#0)=R2.new ] // ^ ^ @@ -1524,8 +2998,6 @@ isConditionalLoad (const MachineInstr* MI) const { // p.old store // [if (p0)memw(R0+#0)=R2] // -// The above diagram shows the steps involoved in the conversion of a predicated -// store instruction to its .new predicated new-value form. // // The following set of instructions further explains the scenario where // conditional new-value store becomes invalid when promoted to .new predicate @@ -1538,105 +3010,33 @@ isConditionalLoad (const MachineInstr* MI) const { // the first two instructions because in instr 1, r0 is conditional on old value // of p0 but its use in instr 3 is conditional on p0 modified by instr 2 which // is not valid for new-value stores. -bool HexagonInstrInfo:: -isConditionalStore (const MachineInstr* MI) const { - switch (MI->getOpcode()) - { - default: return false; - case Hexagon::S4_storeirbt_io: - case Hexagon::S4_storeirbf_io: - case Hexagon::S4_pstorerbt_rr: - case Hexagon::S4_pstorerbf_rr: - case Hexagon::S2_pstorerbt_io: - case Hexagon::S2_pstorerbf_io: - case Hexagon::S2_pstorerbt_pi: - case Hexagon::S2_pstorerbf_pi: - case Hexagon::S2_pstorerdt_io: - case Hexagon::S2_pstorerdf_io: - case Hexagon::S4_pstorerdt_rr: - case Hexagon::S4_pstorerdf_rr: - case Hexagon::S2_pstorerdt_pi: - case Hexagon::S2_pstorerdf_pi: - case Hexagon::S2_pstorerht_io: - case Hexagon::S2_pstorerhf_io: - case Hexagon::S4_storeirht_io: - case Hexagon::S4_storeirhf_io: - case Hexagon::S4_pstorerht_rr: - case Hexagon::S4_pstorerhf_rr: - case Hexagon::S2_pstorerht_pi: - case Hexagon::S2_pstorerhf_pi: - case Hexagon::S2_pstorerit_io: - case Hexagon::S2_pstorerif_io: - case Hexagon::S4_storeirit_io: - case Hexagon::S4_storeirif_io: - case Hexagon::S4_pstorerit_rr: - case Hexagon::S4_pstorerif_rr: - case Hexagon::S2_pstorerit_pi: - case Hexagon::S2_pstorerif_pi: - - // V4 global address store before promoting to dot new. - case Hexagon::S4_pstorerdt_abs: - case Hexagon::S4_pstorerdf_abs: - case Hexagon::S4_pstorerbt_abs: - case Hexagon::S4_pstorerbf_abs: - case Hexagon::S4_pstorerht_abs: - case Hexagon::S4_pstorerhf_abs: - case Hexagon::S4_pstorerit_abs: - case Hexagon::S4_pstorerif_abs: - return true; - - // Predicated new value stores (i.e. if (p0) memw(..)=r0.new) are excluded - // from the "Conditional Store" list. Because a predicated new value store - // would NOT be promoted to a double dot new store. See diagram below: - // This function returns yes for those stores that are predicated but not - // yet promoted to predicate dot new instructions. - // - // +---------------------+ - // /-----| if (p0) memw(..)=r0 |---------\~ - // || +---------------------+ || - // promote || /\ /\ || promote - // || /||\ /||\ || - // \||/ demote || \||/ - // \/ || || \/ - // +-------------------------+ || +-------------------------+ - // | if (p0.new) memw(..)=r0 | || | if (p0) memw(..)=r0.new | - // +-------------------------+ || +-------------------------+ - // || || || - // || demote \||/ - // promote || \/ NOT possible - // || || /\~ - // \||/ || /||\~ - // \/ || || - // +-----------------------------+ - // | if (p0.new) memw(..)=r0.new | - // +-----------------------------+ - // Double Dot New Store - // - } -} - - -bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const { - if (isNewValue(MI) && isBranch(MI)) - return true; - return false; -} - -bool HexagonInstrInfo::isNewValueJump(Opcode_t Opcode) const { - return isNewValue(Opcode) && get(Opcode).isBranch() && isPredicated(Opcode); -} - -bool HexagonInstrInfo::isPostIncrement (const MachineInstr* MI) const { - return (getAddrMode(MI) == HexagonII::PostInc); -} - -// Returns true, if any one of the operands is a dot new -// insn, whether it is predicated dot new or register dot new. -bool HexagonInstrInfo::isDotNewInst (const MachineInstr* MI) const { - return (isNewValueInst(MI) || - (isPredicated(MI) && isPredicatedNew(MI))); -} - +// Predicated new value stores (i.e. if (p0) memw(..)=r0.new) are excluded +// from the "Conditional Store" list. Because a predicated new value store +// would NOT be promoted to a double dot new store. See diagram below: +// This function returns yes for those stores that are predicated but not +// yet promoted to predicate dot new instructions. +// +// +---------------------+ +// /-----| if (p0) memw(..)=r0 |---------\~ +// || +---------------------+ || +// promote || /\ /\ || promote +// || /||\ /||\ || +// \||/ demote || \||/ +// \/ || || \/ +// +-------------------------+ || +-------------------------+ +// | if (p0.new) memw(..)=r0 | || | if (p0) memw(..)=r0.new | +// +-------------------------+ || +-------------------------+ +// || || || +// || demote \||/ +// promote || \/ NOT possible +// || || /\~ +// \||/ || /||\~ +// \/ || || +// +-----------------------------+ +// | if (p0.new) memw(..)=r0.new | +// +-----------------------------+ +// Double Dot New Store +// // Returns the most basic instruction for the .new predicated instructions and // new-value stores. // For example, all of the following instructions will be converted back to the @@ -1645,24 +3045,23 @@ bool HexagonInstrInfo::isDotNewInst (const MachineInstr* MI) const { // 2) if (p0) memw(R0+#0)= R1.new -------> if (p0) memw(R0+#0) = R1 // 3) if (p0.new) memw(R0+#0) = R1 ---> // +// To understand the translation of instruction 1 to its original form, consider +// a packet with 3 instructions. +// { p0 = cmp.eq(R0,R1) +// if (p0.new) R2 = add(R3, R4) +// R5 = add (R3, R1) +// } +// if (p0) memw(R5+#0) = R2 <--- trying to include it in the previous packet +// +// This instruction can be part of the previous packet only if both p0 and R2 +// are promoted to .new values. This promotion happens in steps, first +// predicate register is promoted to .new and in the next iteration R2 is +// promoted. Therefore, in case of dependence check failure (due to R5) during +// next iteration, it should be converted back to its most basic form. -int HexagonInstrInfo::GetDotOldOp(const int opc) const { - int NewOp = opc; - if (isPredicated(NewOp) && isPredicatedNew(NewOp)) { // Get predicate old form - NewOp = Hexagon::getPredOldOpcode(NewOp); - assert(NewOp >= 0 && - "Couldn't change predicate new instruction to its old form."); - } - - if (isNewValueStore(NewOp)) { // Convert into non-new-value format - NewOp = Hexagon::getNonNVStore(NewOp); - assert(NewOp >= 0 && "Couldn't change new-value store to its old form."); - } - return NewOp; -} // Return the new value instruction for a given store. -int HexagonInstrInfo::GetDotNewOp(const MachineInstr* MI) const { +int HexagonInstrInfo::getDotNewOp(const MachineInstr* MI) const { int NVOpcode = Hexagon::getNewValueOpcode(MI->getOpcode()); if (NVOpcode >= 0) // Valid new-value store instruction. return NVOpcode; @@ -1672,12 +3071,6 @@ int HexagonInstrInfo::GetDotNewOp(const MachineInstr* MI) const { case Hexagon::S4_storerb_ur: return Hexagon::S4_storerbnew_ur; - case Hexagon::S4_storerh_ur: - return Hexagon::S4_storerhnew_ur; - - case Hexagon::S4_storeri_ur: - return Hexagon::S4_storerinew_ur; - case Hexagon::S2_storerb_pci: return Hexagon::S2_storerb_pci; @@ -1692,203 +3085,496 @@ int HexagonInstrInfo::GetDotNewOp(const MachineInstr* MI) const { case Hexagon::S2_storerf_pci: return Hexagon::S2_storerf_pci; + + case Hexagon::V6_vS32b_ai: + return Hexagon::V6_vS32b_new_ai; + + case Hexagon::V6_vS32b_pi: + return Hexagon::V6_vS32b_new_pi; + + // 128B + case Hexagon::V6_vS32b_ai_128B: + return Hexagon::V6_vS32b_new_ai_128B; + + case Hexagon::V6_vS32b_pi_128B: + return Hexagon::V6_vS32b_new_pi_128B; } return 0; } -// Return .new predicate version for an instruction. -int HexagonInstrInfo::GetDotNewPredOp(MachineInstr *MI, - const MachineBranchProbabilityInfo - *MBPI) const { +// Returns the opcode to use when converting MI, which is a conditional jump, +// into a conditional instruction which uses the .new value of the predicate. +// We also use branch probabilities to add a hint to the jump. +int HexagonInstrInfo::getDotNewPredJumpOp(const MachineInstr *MI, + const MachineBranchProbabilityInfo *MBPI) const { + // We assume that block can have at most two successors. + bool taken = false; + const MachineBasicBlock *Src = MI->getParent(); + const MachineOperand *BrTarget = &MI->getOperand(1); + const MachineBasicBlock *Dst = BrTarget->getMBB(); + const BranchProbability Prediction = MBPI->getEdgeProbability(Src, Dst); + if (Prediction >= BranchProbability(1,2)) + taken = true; + + switch (MI->getOpcode()) { + case Hexagon::J2_jumpt: + return taken ? Hexagon::J2_jumptnewpt : Hexagon::J2_jumptnew; + case Hexagon::J2_jumpf: + return taken ? Hexagon::J2_jumpfnewpt : Hexagon::J2_jumpfnew; + + default: + llvm_unreachable("Unexpected jump instruction."); + } +} + + +// Return .new predicate version for an instruction. +int HexagonInstrInfo::getDotNewPredOp(const MachineInstr *MI, + const MachineBranchProbabilityInfo *MBPI) const { int NewOpcode = Hexagon::getPredNewOpcode(MI->getOpcode()); if (NewOpcode >= 0) // Valid predicate new instruction return NewOpcode; switch (MI->getOpcode()) { - default: llvm_unreachable("Unknown .new type"); // Condtional Jumps case Hexagon::J2_jumpt: case Hexagon::J2_jumpf: return getDotNewPredJumpOp(MI, MBPI); - case Hexagon::J2_jumprt: - return Hexagon::J2_jumptnewpt; - - case Hexagon::J2_jumprf: - return Hexagon::J2_jumprfnewpt; - - case Hexagon::JMPrett: - return Hexagon::J2_jumprtnewpt; - - case Hexagon::JMPretf: - return Hexagon::J2_jumprfnewpt; - - - // Conditional combine - case Hexagon::C2_ccombinewt: - return Hexagon::C2_ccombinewnewt; - case Hexagon::C2_ccombinewf: - return Hexagon::C2_ccombinewnewf; + default: + assert(0 && "Unknown .new type"); } + return 0; } -unsigned HexagonInstrInfo::getAddrMode(const MachineInstr* MI) const { - const uint64_t F = MI->getDesc().TSFlags; +int HexagonInstrInfo::getDotOldOp(const int opc) const { + int NewOp = opc; + if (isPredicated(NewOp) && isPredicatedNew(NewOp)) { // Get predicate old form + NewOp = Hexagon::getPredOldOpcode(NewOp); + assert(NewOp >= 0 && + "Couldn't change predicate new instruction to its old form."); + } - return((F >> HexagonII::AddrModePos) & HexagonII::AddrModeMask); + if (isNewValueStore(NewOp)) { // Convert into non-new-value format + NewOp = Hexagon::getNonNVStore(NewOp); + assert(NewOp >= 0 && "Couldn't change new-value store to its old form."); + } + return NewOp; } -/// immediateExtend - Changes the instruction in place to one using an immediate -/// extender. -void HexagonInstrInfo::immediateExtend(MachineInstr *MI) const { - assert((isExtendable(MI)||isConstExtended(MI)) && - "Instruction must be extendable"); - // Find which operand is extendable. - short ExtOpNum = getCExtOpNum(MI); - MachineOperand &MO = MI->getOperand(ExtOpNum); - // This needs to be something we understand. - assert((MO.isMBB() || MO.isImm()) && - "Branch with unknown extendable field type"); - // Mark given operand as extended. - MO.addTargetFlag(HexagonII::HMOTF_ConstExtended); -} -DFAPacketizer *HexagonInstrInfo::CreateTargetScheduleState( - const TargetSubtargetInfo &STI) const { - const InstrItineraryData *II = STI.getInstrItineraryData(); - return static_cast<const HexagonSubtarget &>(STI).createDFAPacketizer(II); -} - -bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr *MI, - const MachineBasicBlock *MBB, - const MachineFunction &MF) const { - // Debug info is never a scheduling boundary. It's necessary to be explicit - // due to the special treatment of IT instructions below, otherwise a - // dbg_value followed by an IT will result in the IT instruction being - // considered a scheduling hazard, which is wrong. It should be the actual - // instruction preceding the dbg_value instruction(s), just like it is - // when debug info is not present. - if (MI->isDebugValue()) - return false; +// See if instruction could potentially be a duplex candidate. +// If so, return its group. Zero otherwise. +HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( + const MachineInstr *MI) const { + unsigned DstReg, SrcReg, Src1Reg, Src2Reg; + auto &HRI = getRegisterInfo(); - // Terminators and labels can't be scheduled around. - if (MI->getDesc().isTerminator() || MI->isPosition() || MI->isInlineAsm()) - return true; + switch (MI->getOpcode()) { + default: + return HexagonII::HSIG_None; + // + // Group L1: + // + // Rd = memw(Rs+#u4:2) + // Rd = memub(Rs+#u4:0) + case Hexagon::L2_loadri_io: + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + // Special case this one from Group L2. + // Rd = memw(r29+#u5:2) + if (isIntRegForSubInst(DstReg)) { + if (Hexagon::IntRegsRegClass.contains(SrcReg) && + HRI.getStackRegister() == SrcReg && + MI->getOperand(2).isImm() && + isShiftedUInt<5,2>(MI->getOperand(2).getImm())) + return HexagonII::HSIG_L2; + // Rd = memw(Rs+#u4:2) + if (isIntRegForSubInst(SrcReg) && + (MI->getOperand(2).isImm() && + isShiftedUInt<4,2>(MI->getOperand(2).getImm()))) + return HexagonII::HSIG_L1; + } + break; + case Hexagon::L2_loadrub_io: + // Rd = memub(Rs+#u4:0) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg) && + MI->getOperand(2).isImm() && isUInt<4>(MI->getOperand(2).getImm())) + return HexagonII::HSIG_L1; + break; + // + // Group L2: + // + // Rd = memh/memuh(Rs+#u3:1) + // Rd = memb(Rs+#u3:0) + // Rd = memw(r29+#u5:2) - Handled above. + // Rdd = memd(r29+#u5:3) + // deallocframe + // [if ([!]p0[.new])] dealloc_return + // [if ([!]p0[.new])] jumpr r31 + case Hexagon::L2_loadrh_io: + case Hexagon::L2_loadruh_io: + // Rd = memh/memuh(Rs+#u3:1) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg) && + MI->getOperand(2).isImm() && + isShiftedUInt<3,1>(MI->getOperand(2).getImm())) + return HexagonII::HSIG_L2; + break; + case Hexagon::L2_loadrb_io: + // Rd = memb(Rs+#u3:0) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg) && + MI->getOperand(2).isImm() && + isUInt<3>(MI->getOperand(2).getImm())) + return HexagonII::HSIG_L2; + break; + case Hexagon::L2_loadrd_io: + // Rdd = memd(r29+#u5:3) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + if (isDblRegForSubInst(DstReg, HRI) && + Hexagon::IntRegsRegClass.contains(SrcReg) && + HRI.getStackRegister() == SrcReg && + MI->getOperand(2).isImm() && + isShiftedUInt<5,3>(MI->getOperand(2).getImm())) + return HexagonII::HSIG_L2; + break; + // dealloc_return is not documented in Hexagon Manual, but marked + // with A_SUBINSN attribute in iset_v4classic.py. + case Hexagon::RESTORE_DEALLOC_RET_JMP_V4: + case Hexagon::L4_return: + case Hexagon::L2_deallocframe: + return HexagonII::HSIG_L2; + case Hexagon::EH_RETURN_JMPR: + case Hexagon::JMPret : + // jumpr r31 + // Actual form JMPR %PC<imp-def>, %R31<imp-use>, %R0<imp-use,internal>. + DstReg = MI->getOperand(0).getReg(); + if (Hexagon::IntRegsRegClass.contains(DstReg) && (Hexagon::R31 == DstReg)) + return HexagonII::HSIG_L2; + break; + case Hexagon::JMPrett: + case Hexagon::JMPretf: + case Hexagon::JMPrettnewpt: + case Hexagon::JMPretfnewpt : + case Hexagon::JMPrettnew : + case Hexagon::JMPretfnew : + DstReg = MI->getOperand(1).getReg(); + SrcReg = MI->getOperand(0).getReg(); + // [if ([!]p0[.new])] jumpr r31 + if ((Hexagon::PredRegsRegClass.contains(SrcReg) && + (Hexagon::P0 == SrcReg)) && + (Hexagon::IntRegsRegClass.contains(DstReg) && (Hexagon::R31 == DstReg))) + return HexagonII::HSIG_L2; + break; + case Hexagon::L4_return_t : + case Hexagon::L4_return_f : + case Hexagon::L4_return_tnew_pnt : + case Hexagon::L4_return_fnew_pnt : + case Hexagon::L4_return_tnew_pt : + case Hexagon::L4_return_fnew_pt : + // [if ([!]p0[.new])] dealloc_return + SrcReg = MI->getOperand(0).getReg(); + if (Hexagon::PredRegsRegClass.contains(SrcReg) && (Hexagon::P0 == SrcReg)) + return HexagonII::HSIG_L2; + break; + // + // Group S1: + // + // memw(Rs+#u4:2) = Rt + // memb(Rs+#u4:0) = Rt + case Hexagon::S2_storeri_io: + // Special case this one from Group S2. + // memw(r29+#u5:2) = Rt + Src1Reg = MI->getOperand(0).getReg(); + Src2Reg = MI->getOperand(2).getReg(); + if (Hexagon::IntRegsRegClass.contains(Src1Reg) && + isIntRegForSubInst(Src2Reg) && + HRI.getStackRegister() == Src1Reg && MI->getOperand(1).isImm() && + isShiftedUInt<5,2>(MI->getOperand(1).getImm())) + return HexagonII::HSIG_S2; + // memw(Rs+#u4:2) = Rt + if (isIntRegForSubInst(Src1Reg) && isIntRegForSubInst(Src2Reg) && + MI->getOperand(1).isImm() && + isShiftedUInt<4,2>(MI->getOperand(1).getImm())) + return HexagonII::HSIG_S1; + break; + case Hexagon::S2_storerb_io: + // memb(Rs+#u4:0) = Rt + Src1Reg = MI->getOperand(0).getReg(); + Src2Reg = MI->getOperand(2).getReg(); + if (isIntRegForSubInst(Src1Reg) && isIntRegForSubInst(Src2Reg) && + MI->getOperand(1).isImm() && isUInt<4>(MI->getOperand(1).getImm())) + return HexagonII::HSIG_S1; + break; + // + // Group S2: + // + // memh(Rs+#u3:1) = Rt + // memw(r29+#u5:2) = Rt + // memd(r29+#s6:3) = Rtt + // memw(Rs+#u4:2) = #U1 + // memb(Rs+#u4) = #U1 + // allocframe(#u5:3) + case Hexagon::S2_storerh_io: + // memh(Rs+#u3:1) = Rt + Src1Reg = MI->getOperand(0).getReg(); + Src2Reg = MI->getOperand(2).getReg(); + if (isIntRegForSubInst(Src1Reg) && isIntRegForSubInst(Src2Reg) && + MI->getOperand(1).isImm() && + isShiftedUInt<3,1>(MI->getOperand(1).getImm())) + return HexagonII::HSIG_S1; + break; + case Hexagon::S2_storerd_io: + // memd(r29+#s6:3) = Rtt + Src1Reg = MI->getOperand(0).getReg(); + Src2Reg = MI->getOperand(2).getReg(); + if (isDblRegForSubInst(Src2Reg, HRI) && + Hexagon::IntRegsRegClass.contains(Src1Reg) && + HRI.getStackRegister() == Src1Reg && MI->getOperand(1).isImm() && + isShiftedInt<6,3>(MI->getOperand(1).getImm())) + return HexagonII::HSIG_S2; + break; + case Hexagon::S4_storeiri_io: + // memw(Rs+#u4:2) = #U1 + Src1Reg = MI->getOperand(0).getReg(); + if (isIntRegForSubInst(Src1Reg) && MI->getOperand(1).isImm() && + isShiftedUInt<4,2>(MI->getOperand(1).getImm()) && + MI->getOperand(2).isImm() && isUInt<1>(MI->getOperand(2).getImm())) + return HexagonII::HSIG_S2; + break; + case Hexagon::S4_storeirb_io: + // memb(Rs+#u4) = #U1 + Src1Reg = MI->getOperand(0).getReg(); + if (isIntRegForSubInst(Src1Reg) && MI->getOperand(1).isImm() && + isUInt<4>(MI->getOperand(1).getImm()) && MI->getOperand(2).isImm() && + MI->getOperand(2).isImm() && isUInt<1>(MI->getOperand(2).getImm())) + return HexagonII::HSIG_S2; + break; + case Hexagon::S2_allocframe: + if (MI->getOperand(0).isImm() && + isShiftedUInt<5,3>(MI->getOperand(0).getImm())) + return HexagonII::HSIG_S1; + break; + // + // Group A: + // + // Rx = add(Rx,#s7) + // Rd = Rs + // Rd = #u6 + // Rd = #-1 + // if ([!]P0[.new]) Rd = #0 + // Rd = add(r29,#u6:2) + // Rx = add(Rx,Rs) + // P0 = cmp.eq(Rs,#u2) + // Rdd = combine(#0,Rs) + // Rdd = combine(Rs,#0) + // Rdd = combine(#u2,#U2) + // Rd = add(Rs,#1) + // Rd = add(Rs,#-1) + // Rd = sxth/sxtb/zxtb/zxth(Rs) + // Rd = and(Rs,#1) + case Hexagon::A2_addi: + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + if (isIntRegForSubInst(DstReg)) { + // Rd = add(r29,#u6:2) + if (Hexagon::IntRegsRegClass.contains(SrcReg) && + HRI.getStackRegister() == SrcReg && MI->getOperand(2).isImm() && + isShiftedUInt<6,2>(MI->getOperand(2).getImm())) + return HexagonII::HSIG_A; + // Rx = add(Rx,#s7) + if ((DstReg == SrcReg) && MI->getOperand(2).isImm() && + isInt<7>(MI->getOperand(2).getImm())) + return HexagonII::HSIG_A; + // Rd = add(Rs,#1) + // Rd = add(Rs,#-1) + if (isIntRegForSubInst(SrcReg) && MI->getOperand(2).isImm() && + ((MI->getOperand(2).getImm() == 1) || + (MI->getOperand(2).getImm() == -1))) + return HexagonII::HSIG_A; + } + break; + case Hexagon::A2_add: + // Rx = add(Rx,Rs) + DstReg = MI->getOperand(0).getReg(); + Src1Reg = MI->getOperand(1).getReg(); + Src2Reg = MI->getOperand(2).getReg(); + if (isIntRegForSubInst(DstReg) && (DstReg == Src1Reg) && + isIntRegForSubInst(Src2Reg)) + return HexagonII::HSIG_A; + break; + case Hexagon::A2_andir: + // Same as zxtb. + // Rd16=and(Rs16,#255) + // Rd16=and(Rs16,#1) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg) && + MI->getOperand(2).isImm() && + ((MI->getOperand(2).getImm() == 1) || + (MI->getOperand(2).getImm() == 255))) + return HexagonII::HSIG_A; + break; + case Hexagon::A2_tfr: + // Rd = Rs + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg)) + return HexagonII::HSIG_A; + break; + case Hexagon::A2_tfrsi: + // Rd = #u6 + // Do not test for #u6 size since the const is getting extended + // regardless and compound could be formed. + // Rd = #-1 + DstReg = MI->getOperand(0).getReg(); + if (isIntRegForSubInst(DstReg)) + return HexagonII::HSIG_A; + break; + case Hexagon::C2_cmoveit: + case Hexagon::C2_cmovenewit: + case Hexagon::C2_cmoveif: + case Hexagon::C2_cmovenewif: + // if ([!]P0[.new]) Rd = #0 + // Actual form: + // %R16<def> = C2_cmovenewit %P0<internal>, 0, %R16<imp-use,undef>; + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + if (isIntRegForSubInst(DstReg) && + Hexagon::PredRegsRegClass.contains(SrcReg) && Hexagon::P0 == SrcReg && + MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) + return HexagonII::HSIG_A; + break; + case Hexagon::C2_cmpeqi: + // P0 = cmp.eq(Rs,#u2) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + if (Hexagon::PredRegsRegClass.contains(DstReg) && + Hexagon::P0 == DstReg && isIntRegForSubInst(SrcReg) && + MI->getOperand(2).isImm() && isUInt<2>(MI->getOperand(2).getImm())) + return HexagonII::HSIG_A; + break; + case Hexagon::A2_combineii: + case Hexagon::A4_combineii: + // Rdd = combine(#u2,#U2) + DstReg = MI->getOperand(0).getReg(); + if (isDblRegForSubInst(DstReg, HRI) && + ((MI->getOperand(1).isImm() && isUInt<2>(MI->getOperand(1).getImm())) || + (MI->getOperand(1).isGlobal() && + isUInt<2>(MI->getOperand(1).getOffset()))) && + ((MI->getOperand(2).isImm() && isUInt<2>(MI->getOperand(2).getImm())) || + (MI->getOperand(2).isGlobal() && + isUInt<2>(MI->getOperand(2).getOffset())))) + return HexagonII::HSIG_A; + break; + case Hexagon::A4_combineri: + // Rdd = combine(Rs,#0) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + if (isDblRegForSubInst(DstReg, HRI) && isIntRegForSubInst(SrcReg) && + ((MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) || + (MI->getOperand(2).isGlobal() && MI->getOperand(2).getOffset() == 0))) + return HexagonII::HSIG_A; + break; + case Hexagon::A4_combineir: + // Rdd = combine(#0,Rs) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(2).getReg(); + if (isDblRegForSubInst(DstReg, HRI) && isIntRegForSubInst(SrcReg) && + ((MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0) || + (MI->getOperand(1).isGlobal() && MI->getOperand(1).getOffset() == 0))) + return HexagonII::HSIG_A; + break; + case Hexagon::A2_sxtb: + case Hexagon::A2_sxth: + case Hexagon::A2_zxtb: + case Hexagon::A2_zxth: + // Rd = sxth/sxtb/zxtb/zxth(Rs) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg)) + return HexagonII::HSIG_A; + break; + } - return false; + return HexagonII::HSIG_None; } -bool HexagonInstrInfo::isConstExtended(const MachineInstr *MI) const { - const uint64_t F = MI->getDesc().TSFlags; - unsigned isExtended = (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask; - if (isExtended) // Instruction must be extended. - return true; - unsigned isExtendable = - (F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask; - if (!isExtendable) - return false; - - short ExtOpNum = getCExtOpNum(MI); - const MachineOperand &MO = MI->getOperand(ExtOpNum); - // Use MO operand flags to determine if MO - // has the HMOTF_ConstExtended flag set. - if (MO.getTargetFlags() && HexagonII::HMOTF_ConstExtended) - return true; - // If this is a Machine BB address we are talking about, and it is - // not marked as extended, say so. - if (MO.isMBB()) - return false; - - // We could be using an instruction with an extendable immediate and shoehorn - // a global address into it. If it is a global address it will be constant - // extended. We do this for COMBINE. - // We currently only handle isGlobal() because it is the only kind of - // object we are going to end up with here for now. - // In the future we probably should add isSymbol(), etc. - if (MO.isGlobal() || MO.isSymbol() || MO.isBlockAddress() || - MO.isJTI() || MO.isCPI()) - return true; - - // If the extendable operand is not 'Immediate' type, the instruction should - // have 'isExtended' flag set. - assert(MO.isImm() && "Extendable operand must be Immediate type"); +short HexagonInstrInfo::getEquivalentHWInstr(const MachineInstr *MI) const { + return Hexagon::getRealHWInstr(MI->getOpcode(), Hexagon::InstrType_Real); +} - int MinValue = getMinValue(MI); - int MaxValue = getMaxValue(MI); - int ImmValue = MO.getImm(); - return (ImmValue < MinValue || ImmValue > MaxValue); +// Return first non-debug instruction in the basic block. +MachineInstr *HexagonInstrInfo::getFirstNonDbgInst(MachineBasicBlock *BB) + const { + for (auto MII = BB->instr_begin(), End = BB->instr_end(); MII != End; MII++) { + MachineInstr *MI = &*MII; + if (MI->isDebugValue()) + continue; + return MI; + } + return nullptr; } -// Return the number of bytes required to encode the instruction. -// Hexagon instructions are fixed length, 4 bytes, unless they -// use a constant extender, which requires another 4 bytes. -// For debug instructions and prolog labels, return 0. -unsigned HexagonInstrInfo::getSize(const MachineInstr *MI) const { - if (MI->isDebugValue() || MI->isPosition()) - return 0; +unsigned HexagonInstrInfo::getInstrTimingClassLatency( + const InstrItineraryData *ItinData, const MachineInstr *MI) const { + // Default to one cycle for no itinerary. However, an "empty" itinerary may + // still have a MinLatency property, which getStageLatency checks. + if (!ItinData) + return getInstrLatency(ItinData, MI); - unsigned Size = MI->getDesc().getSize(); - if (!Size) - // Assume the default insn size in case it cannot be determined - // for whatever reason. - Size = HEXAGON_INSTR_SIZE; - - if (isConstExtended(MI) || isExtended(MI)) - Size += HEXAGON_INSTR_SIZE; - - return Size; + // Get the latency embedded in the itinerary. If we're not using timing class + // latencies or if we using BSB scheduling, then restrict the maximum latency + // to 1 (that is, either 0 or 1). + if (MI->isTransient()) + return 0; + unsigned Latency = ItinData->getStageLatency(MI->getDesc().getSchedClass()); + if (!EnableTimingClassLatency || + MI->getParent()->getParent()->getSubtarget<HexagonSubtarget>(). + useBSBScheduling()) + if (Latency > 1) + Latency = 1; + return Latency; } -// Returns the opcode to use when converting MI, which is a conditional jump, -// into a conditional instruction which uses the .new value of the predicate. -// We also use branch probabilities to add a hint to the jump. -int -HexagonInstrInfo::getDotNewPredJumpOp(MachineInstr *MI, - const - MachineBranchProbabilityInfo *MBPI) const { - - // We assume that block can have at most two successors. - bool taken = false; - MachineBasicBlock *Src = MI->getParent(); - MachineOperand *BrTarget = &MI->getOperand(1); - MachineBasicBlock *Dst = BrTarget->getMBB(); - const BranchProbability Prediction = MBPI->getEdgeProbability(Src, Dst); - if (Prediction >= BranchProbability(1,2)) - taken = true; +// inverts the predication logic. +// p -> NotP +// NotP -> P +bool HexagonInstrInfo::getInvertedPredSense( + SmallVectorImpl<MachineOperand> &Cond) const { + if (Cond.empty()) + return false; + unsigned Opc = getInvertedPredicatedOpcode(Cond[0].getImm()); + Cond[0].setImm(Opc); + return true; +} - switch (MI->getOpcode()) { - case Hexagon::J2_jumpt: - return taken ? Hexagon::J2_jumptnewpt : Hexagon::J2_jumptnew; - case Hexagon::J2_jumpf: - return taken ? Hexagon::J2_jumpfnewpt : Hexagon::J2_jumpfnew; - default: - llvm_unreachable("Unexpected jump instruction."); - } -} -// Returns true if a particular operand is extendable for an instruction. -bool HexagonInstrInfo::isOperandExtended(const MachineInstr *MI, - unsigned short OperandNum) const { - const uint64_t F = MI->getDesc().TSFlags; +unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const { + int InvPredOpcode; + InvPredOpcode = isPredicatedTrue(Opc) ? Hexagon::getFalsePredOpcode(Opc) + : Hexagon::getTruePredOpcode(Opc); + if (InvPredOpcode >= 0) // Valid instruction with the inverted predicate. + return InvPredOpcode; - return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask) - == OperandNum; + llvm_unreachable("Unexpected predicated instruction"); } -// Returns Operand Index for the constant extended instruction. -unsigned short HexagonInstrInfo::getCExtOpNum(const MachineInstr *MI) const { - const uint64_t F = MI->getDesc().TSFlags; - return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask); -} -// Returns the min value that doesn't need to be extended. -int HexagonInstrInfo::getMinValue(const MachineInstr *MI) const { +// Returns the max value that doesn't need to be extended. +int HexagonInstrInfo::getMaxValue(const MachineInstr *MI) const { const uint64_t F = MI->getDesc().TSFlags; unsigned isSigned = (F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask; @@ -1896,13 +3582,20 @@ int HexagonInstrInfo::getMinValue(const MachineInstr *MI) const { & HexagonII::ExtentBitsMask; if (isSigned) // if value is signed - return -1U << (bits - 1); + return ~(-1U << (bits - 1)); else - return 0; + return ~(-1U << bits); } -// Returns the max value that doesn't need to be extended. -int HexagonInstrInfo::getMaxValue(const MachineInstr *MI) const { + +unsigned HexagonInstrInfo::getMemAccessSize(const MachineInstr* MI) const { + const uint64_t F = MI->getDesc().TSFlags; + return (F >> HexagonII::MemAccessSizePos) & HexagonII::MemAccesSizeMask; +} + + +// Returns the min value that doesn't need to be extended. +int HexagonInstrInfo::getMinValue(const MachineInstr *MI) const { const uint64_t F = MI->getDesc().TSFlags; unsigned isSigned = (F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask; @@ -1910,49 +3603,14 @@ int HexagonInstrInfo::getMaxValue(const MachineInstr *MI) const { & HexagonII::ExtentBitsMask; if (isSigned) // if value is signed - return ~(-1U << (bits - 1)); + return -1U << (bits - 1); else - return ~(-1U << bits); + return 0; } -// Returns true if an instruction can be converted into a non-extended -// equivalent instruction. -bool HexagonInstrInfo::NonExtEquivalentExists (const MachineInstr *MI) const { - - short NonExtOpcode; - // Check if the instruction has a register form that uses register in place - // of the extended operand, if so return that as the non-extended form. - if (Hexagon::getRegForm(MI->getOpcode()) >= 0) - return true; - - if (MI->getDesc().mayLoad() || MI->getDesc().mayStore()) { - // Check addressing mode and retrieve non-ext equivalent instruction. - - switch (getAddrMode(MI)) { - case HexagonII::Absolute : - // Load/store with absolute addressing mode can be converted into - // base+offset mode. - NonExtOpcode = Hexagon::getBasedWithImmOffset(MI->getOpcode()); - break; - case HexagonII::BaseImmOffset : - // Load/store with base+offset addressing mode can be converted into - // base+register offset addressing mode. However left shift operand should - // be set to 0. - NonExtOpcode = Hexagon::getBaseWithRegOffset(MI->getOpcode()); - break; - default: - return false; - } - if (NonExtOpcode < 0) - return false; - return true; - } - return false; -} // Returns opcode of the non-extended equivalent instruction. -short HexagonInstrInfo::getNonExtOpcode (const MachineInstr *MI) const { - +short HexagonInstrInfo::getNonExtOpcode(const MachineInstr *MI) const { // Check if the instruction has a register form that uses register in place // of the extended operand, if so return that as the non-extended form. short NonExtOpcode = Hexagon::getRegForm(MI->getOpcode()); @@ -1963,9 +3621,12 @@ short HexagonInstrInfo::getNonExtOpcode (const MachineInstr *MI) const { // Check addressing mode and retrieve non-ext equivalent instruction. switch (getAddrMode(MI)) { case HexagonII::Absolute : - return Hexagon::getBasedWithImmOffset(MI->getOpcode()); + return Hexagon::getBaseWithImmOffset(MI->getOpcode()); case HexagonII::BaseImmOffset : return Hexagon::getBaseWithRegOffset(MI->getOpcode()); + case HexagonII::BaseLongOffset: + return Hexagon::getRegShlForm(MI->getOpcode()); + default: return -1; } @@ -1973,29 +3634,9 @@ short HexagonInstrInfo::getNonExtOpcode (const MachineInstr *MI) const { return -1; } -bool HexagonInstrInfo::PredOpcodeHasJMP_c(Opcode_t Opcode) const { - return (Opcode == Hexagon::J2_jumpt) || - (Opcode == Hexagon::J2_jumpf) || - (Opcode == Hexagon::J2_jumptnewpt) || - (Opcode == Hexagon::J2_jumpfnewpt) || - (Opcode == Hexagon::J2_jumpt) || - (Opcode == Hexagon::J2_jumpf); -} - -bool HexagonInstrInfo::predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const { - if (Cond.empty() || !isPredicated(Cond[0].getImm())) - return false; - return !isPredicatedTrue(Cond[0].getImm()); -} - -bool HexagonInstrInfo::isEndLoopN(Opcode_t Opcode) const { - return (Opcode == Hexagon::ENDLOOP0 || - Opcode == Hexagon::ENDLOOP1); -} bool HexagonInstrInfo::getPredReg(ArrayRef<MachineOperand> Cond, - unsigned &PredReg, unsigned &PredRegPos, - unsigned &PredRegFlags) const { + unsigned &PredReg, unsigned &PredRegPos, unsigned &PredRegFlags) const { if (Cond.empty()) return false; assert(Cond.size() == 2); @@ -2014,3 +3655,174 @@ bool HexagonInstrInfo::getPredReg(ArrayRef<MachineOperand> Cond, return true; } + +short HexagonInstrInfo::getPseudoInstrPair(const MachineInstr *MI) const { + return Hexagon::getRealHWInstr(MI->getOpcode(), Hexagon::InstrType_Pseudo); +} + + +short HexagonInstrInfo::getRegForm(const MachineInstr *MI) const { + return Hexagon::getRegForm(MI->getOpcode()); +} + + +// Return the number of bytes required to encode the instruction. +// Hexagon instructions are fixed length, 4 bytes, unless they +// use a constant extender, which requires another 4 bytes. +// For debug instructions and prolog labels, return 0. +unsigned HexagonInstrInfo::getSize(const MachineInstr *MI) const { + if (MI->isDebugValue() || MI->isPosition()) + return 0; + + unsigned Size = MI->getDesc().getSize(); + if (!Size) + // Assume the default insn size in case it cannot be determined + // for whatever reason. + Size = HEXAGON_INSTR_SIZE; + + if (isConstExtended(MI) || isExtended(MI)) + Size += HEXAGON_INSTR_SIZE; + + // Try and compute number of instructions in asm. + if (BranchRelaxAsmLarge && MI->getOpcode() == Hexagon::INLINEASM) { + const MachineBasicBlock &MBB = *MI->getParent(); + const MachineFunction *MF = MBB.getParent(); + const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); + + // Count the number of register definitions to find the asm string. + unsigned NumDefs = 0; + for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef(); + ++NumDefs) + assert(NumDefs != MI->getNumOperands()-2 && "No asm string?"); + + assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?"); + // Disassemble the AsmStr and approximate number of instructions. + const char *AsmStr = MI->getOperand(NumDefs).getSymbolName(); + Size = getInlineAsmLength(AsmStr, *MAI); + } + + return Size; +} + + +uint64_t HexagonInstrInfo::getType(const MachineInstr* MI) const { + const uint64_t F = MI->getDesc().TSFlags; + return (F >> HexagonII::TypePos) & HexagonII::TypeMask; +} + + +unsigned HexagonInstrInfo::getUnits(const MachineInstr* MI) const { + const TargetSubtargetInfo &ST = MI->getParent()->getParent()->getSubtarget(); + const InstrItineraryData &II = *ST.getInstrItineraryData(); + const InstrStage &IS = *II.beginStage(MI->getDesc().getSchedClass()); + + return IS.getUnits(); +} + + +unsigned HexagonInstrInfo::getValidSubTargets(const unsigned Opcode) const { + const uint64_t F = get(Opcode).TSFlags; + return (F >> HexagonII::validSubTargetPos) & HexagonII::validSubTargetMask; +} + + +// Calculate size of the basic block without debug instructions. +unsigned HexagonInstrInfo::nonDbgBBSize(const MachineBasicBlock *BB) const { + return nonDbgMICount(BB->instr_begin(), BB->instr_end()); +} + + +unsigned HexagonInstrInfo::nonDbgBundleSize( + MachineBasicBlock::const_iterator BundleHead) const { + assert(BundleHead->isBundle() && "Not a bundle header"); + auto MII = BundleHead.getInstrIterator(); + // Skip the bundle header. + return nonDbgMICount(++MII, getBundleEnd(BundleHead)); +} + + +/// immediateExtend - Changes the instruction in place to one using an immediate +/// extender. +void HexagonInstrInfo::immediateExtend(MachineInstr *MI) const { + assert((isExtendable(MI)||isConstExtended(MI)) && + "Instruction must be extendable"); + // Find which operand is extendable. + short ExtOpNum = getCExtOpNum(MI); + MachineOperand &MO = MI->getOperand(ExtOpNum); + // This needs to be something we understand. + assert((MO.isMBB() || MO.isImm()) && + "Branch with unknown extendable field type"); + // Mark given operand as extended. + MO.addTargetFlag(HexagonII::HMOTF_ConstExtended); +} + + +bool HexagonInstrInfo::invertAndChangeJumpTarget( + MachineInstr* MI, MachineBasicBlock* NewTarget) const { + DEBUG(dbgs() << "\n[invertAndChangeJumpTarget] to BB#" + << NewTarget->getNumber(); MI->dump();); + assert(MI->isBranch()); + unsigned NewOpcode = getInvertedPredicatedOpcode(MI->getOpcode()); + int TargetPos = MI->getNumOperands() - 1; + // In general branch target is the last operand, + // but some implicit defs added at the end might change it. + while ((TargetPos > -1) && !MI->getOperand(TargetPos).isMBB()) + --TargetPos; + assert((TargetPos >= 0) && MI->getOperand(TargetPos).isMBB()); + MI->getOperand(TargetPos).setMBB(NewTarget); + if (EnableBranchPrediction && isPredicatedNew(MI)) { + NewOpcode = reversePrediction(NewOpcode); + } + MI->setDesc(get(NewOpcode)); + return true; +} + + +void HexagonInstrInfo::genAllInsnTimingClasses(MachineFunction &MF) const { + /* +++ The code below is used to generate complete set of Hexagon Insn +++ */ + MachineFunction::iterator A = MF.begin(); + MachineBasicBlock &B = *A; + MachineBasicBlock::iterator I = B.begin(); + MachineInstr *MI = &*I; + DebugLoc DL = MI->getDebugLoc(); + MachineInstr *NewMI; + + for (unsigned insn = TargetOpcode::GENERIC_OP_END+1; + insn < Hexagon::INSTRUCTION_LIST_END; ++insn) { + NewMI = BuildMI(B, MI, DL, get(insn)); + DEBUG(dbgs() << "\n" << getName(NewMI->getOpcode()) << + " Class: " << NewMI->getDesc().getSchedClass()); + NewMI->eraseFromParent(); + } + /* --- The code above is used to generate complete set of Hexagon Insn --- */ +} + + +// inverts the predication logic. +// p -> NotP +// NotP -> P +bool HexagonInstrInfo::reversePredSense(MachineInstr* MI) const { + DEBUG(dbgs() << "\nTrying to reverse pred. sense of:"; MI->dump()); + MI->setDesc(get(getInvertedPredicatedOpcode(MI->getOpcode()))); + return true; +} + + +// Reverse the branch prediction. +unsigned HexagonInstrInfo::reversePrediction(unsigned Opcode) const { + int PredRevOpcode = -1; + if (isPredictedTaken(Opcode)) + PredRevOpcode = Hexagon::notTakenBranchPrediction(Opcode); + else + PredRevOpcode = Hexagon::takenBranchPrediction(Opcode); + assert(PredRevOpcode > 0); + return PredRevOpcode; +} + + +// TODO: Add more rigorous validation. +bool HexagonInstrInfo::validateBranchCond(const ArrayRef<MachineOperand> &Cond) + const { + return Cond.empty() || (Cond[0].isImm() && (Cond.size() != 1)); +} + diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h index d0b8a4631c1d..9530d9f2aa0d 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h @@ -1,4 +1,3 @@ - //===- HexagonInstrInfo.h - Hexagon Instruction Information -----*- C++ -*-===// // // The LLVM Compiler Infrastructure @@ -28,23 +27,18 @@ namespace llvm { struct EVT; class HexagonSubtarget; + class HexagonInstrInfo : public HexagonGenInstrInfo { virtual void anchor(); const HexagonRegisterInfo RI; - const HexagonSubtarget &Subtarget; public: - typedef unsigned Opcode_t; - explicit HexagonInstrInfo(HexagonSubtarget &ST); - /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As - /// such, whenever a client has an instance of instruction info, it should - /// always be able to get register info as well (through this method). + /// TargetInstrInfo overrides. /// - const HexagonRegisterInfo &getRegisterInfo() const { return RI; } - /// isLoadFromStackSlot - If the specified machine instruction is a direct + /// If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of /// the destination along with the FrameIndex of the loaded stack slot. If /// not, return 0. This predicate must return 0 if the instruction has @@ -52,7 +46,7 @@ public: unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const override; - /// isStoreToStackSlot - If the specified machine instruction is a direct + /// If the specified machine instruction is a direct /// store to a stack slot, return the virtual or physical register number of /// the source reg along with the FrameIndex of the loaded stack slot. If /// not, return 0. This predicate must return 0 if the instruction has @@ -60,50 +54,118 @@ public: unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const override; - + /// Analyze the branching code at the end of MBB, returning + /// true if it cannot be understood (e.g. it's a switch dispatch or isn't + /// implemented for a target). Upon success, this returns false and returns + /// with the following information in various cases: + /// + /// 1. If this block ends with no branches (it just falls through to its succ) + /// just return false, leaving TBB/FBB null. + /// 2. If this block ends with only an unconditional branch, it sets TBB to be + /// the destination block. + /// 3. If this block ends with a conditional branch and it falls through to a + /// successor block, it sets TBB to be the branch destination block and a + /// list of operands that evaluate the condition. These operands can be + /// passed to other TargetInstrInfo methods to create new branches. + /// 4. If this block ends with a conditional branch followed by an + /// unconditional branch, it returns the 'true' destination in TBB, the + /// 'false' destination in FBB, and a list of operands that evaluate the + /// condition. These operands can be passed to other TargetInstrInfo + /// methods to create new branches. + /// + /// Note that RemoveBranch and InsertBranch must be implemented to support + /// cases where this method returns success. + /// + /// If AllowModify is true, then this routine is allowed to modify the basic + /// block (e.g. delete instructions after the unconditional branch). + /// bool AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const override; + /// Remove the branching code at the end of the specific MBB. + /// This is only invoked in cases where AnalyzeBranch returns success. It + /// returns the number of instructions that were removed. unsigned RemoveBranch(MachineBasicBlock &MBB) const override; + /// Insert branch code into the end of the specified MachineBasicBlock. + /// The operands to this method are the same as those + /// returned by AnalyzeBranch. This is only invoked in cases where + /// AnalyzeBranch returns success. It returns the number of instructions + /// inserted. + /// + /// It is also invoked by tail merging to add unconditional branches in + /// cases where AnalyzeBranch doesn't apply because there was no original + /// branch to analyze. At least this much must be implemented, else tail + /// merging needs to be disabled. unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, DebugLoc DL) const override; - bool analyzeCompare(const MachineInstr *MI, - unsigned &SrcReg, unsigned &SrcReg2, - int &Mask, int &Value) const override; + /// Return true if it's profitable to predicate + /// instructions with accumulated instruction latency of "NumCycles" + /// of the specified basic block, where the probability of the instructions + /// being executed is given by Probability, and Confidence is a measure + /// of our confidence that it will be properly predicted. + bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, + unsigned ExtraPredCycles, + BranchProbability Probability) const override; + + /// Second variant of isProfitableToIfCvt. This one + /// checks for the case where two basic blocks from true and false path + /// of a if-then-else (diamond) are predicated on mutally exclusive + /// predicates, where the probability of the true path being taken is given + /// by Probability, and Confidence is a measure of our confidence that it + /// will be properly predicted. + bool isProfitableToIfCvt(MachineBasicBlock &TMBB, + unsigned NumTCycles, unsigned ExtraTCycles, + MachineBasicBlock &FMBB, + unsigned NumFCycles, unsigned ExtraFCycles, + BranchProbability Probability) const override; + + /// Return true if it's profitable for if-converter to duplicate instructions + /// of specified accumulated instruction latencies in the specified MBB to + /// enable if-conversion. + /// The probability of the instructions being executed is given by + /// Probability, and Confidence is a measure of our confidence that it + /// will be properly predicted. + bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, + BranchProbability Probability) const override; + /// Emit instructions to copy a pair of physical registers. + /// + /// This function should support copies within any legal register class as + /// well as any cross-class copies created during instruction selection. + /// + /// The source and destination registers may overlap, which may require a + /// careful implementation when multiple copy instructions are required for + /// large registers. See for example the ARM target. void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const override; + /// Store the specified register of the given register class to the specified + /// stack frame index. The store instruction is to be added to the given + /// machine basic block before the specified machine instruction. If isKill + /// is true, the register operand is the last use and must be marked kill. void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override; - void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, - SmallVectorImpl<MachineOperand> &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl<MachineInstr*> &NewMIs) const; - + /// Load the specified register of the given register class from the specified + /// stack frame index. The load instruction is to be added to the given + /// machine basic block before the specified machine instruction. void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override; - void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, - SmallVectorImpl<MachineOperand> &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl<MachineInstr*> &NewMIs) const; - - /// expandPostRAPseudo - This function is called for all pseudo instructions + /// This function is called for all pseudo instructions /// that remain after register allocation. Many pseudo instructions are /// created to help register allocation. This is the place to convert them /// into real instructions. The target can edit MI in place, or it can insert @@ -111,122 +173,228 @@ public: /// anything was changed. bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override; - MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - ArrayRef<unsigned> Ops, - MachineBasicBlock::iterator InsertPt, - int FrameIndex) const override; + /// Reverses the branch condition of the specified condition list, + /// returning false on success and true if it cannot be reversed. + bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) + const override; - MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - ArrayRef<unsigned> Ops, - MachineBasicBlock::iterator InsertPt, - MachineInstr *LoadMI) const override { - return nullptr; - } + /// Insert a noop into the instruction stream at the specified point. + void insertNoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const override; - unsigned createVR(MachineFunction* MF, MVT VT) const; + /// Returns true if the instruction is already predicated. + bool isPredicated(const MachineInstr *MI) const override; - bool isBranch(const MachineInstr *MI) const; - bool isPredicable(MachineInstr *MI) const override; + /// Convert the instruction into a predicated instruction. + /// It returns true if the operation was successful. bool PredicateInstruction(MachineInstr *MI, ArrayRef<MachineOperand> Cond) const override; - bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, - unsigned ExtraPredCycles, - const BranchProbability &Probability) const override; - - bool isProfitableToIfCvt(MachineBasicBlock &TMBB, - unsigned NumTCycles, unsigned ExtraTCycles, - MachineBasicBlock &FMBB, - unsigned NumFCycles, unsigned ExtraFCycles, - const BranchProbability &Probability) const override; + /// Returns true if the first specified predicate + /// subsumes the second, e.g. GE subsumes GT. + bool SubsumesPredicate(ArrayRef<MachineOperand> Pred1, + ArrayRef<MachineOperand> Pred2) const override; - bool isPredicated(const MachineInstr *MI) const override; - bool isPredicated(unsigned Opcode) const; - bool isPredicatedTrue(const MachineInstr *MI) const; - bool isPredicatedTrue(unsigned Opcode) const; - bool isPredicatedNew(const MachineInstr *MI) const; - bool isPredicatedNew(unsigned Opcode) const; + /// If the specified instruction defines any predicate + /// or condition code register(s) used for predication, returns true as well + /// as the definition predicate(s) by reference. bool DefinesPredicate(MachineInstr *MI, std::vector<MachineOperand> &Pred) const override; - bool SubsumesPredicate(ArrayRef<MachineOperand> Pred1, - ArrayRef<MachineOperand> Pred2) const override; - bool - ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override; + /// Return true if the specified instruction can be predicated. + /// By default, this returns true for every instruction with a + /// PredicateOperand. + bool isPredicable(MachineInstr *MI) const override; - bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, - const BranchProbability &Probability) const override; + /// Test if the given instruction should be considered a scheduling boundary. + /// This primarily includes labels and terminators. + bool isSchedulingBoundary(const MachineInstr *MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const override; + /// Measure the specified inline asm to determine an approximation of its + /// length. + unsigned getInlineAsmLength(const char *Str, + const MCAsmInfo &MAI) const override; + + /// Allocate and return a hazard recognizer to use for this target when + /// scheduling the machine instructions after register allocation. + ScheduleHazardRecognizer* + CreateTargetPostRAHazardRecognizer(const InstrItineraryData*, + const ScheduleDAG *DAG) const override; + + /// For a comparison instruction, return the source registers + /// in SrcReg and SrcReg2 if having two register operands, and the value it + /// compares against in CmpValue. Return true if the comparison instruction + /// can be analyzed. + bool analyzeCompare(const MachineInstr *MI, + unsigned &SrcReg, unsigned &SrcReg2, + int &Mask, int &Value) const override; + + /// Compute the instruction latency of a given instruction. + /// If the instruction has higher cost when predicated, it's returned via + /// PredCost. + unsigned getInstrLatency(const InstrItineraryData *ItinData, + const MachineInstr *MI, + unsigned *PredCost = 0) const override; + + /// Create machine specific model for scheduling. DFAPacketizer * CreateTargetScheduleState(const TargetSubtargetInfo &STI) const override; - bool isSchedulingBoundary(const MachineInstr *MI, - const MachineBasicBlock *MBB, - const MachineFunction &MF) const override; - bool isValidOffset(unsigned Opcode, int Offset, bool Extend = true) const; - bool isValidAutoIncImm(const EVT VT, const int Offset) const; - bool isMemOp(const MachineInstr *MI) const; - bool isSpillPredRegOp(const MachineInstr *MI) const; - bool isU6_3Immediate(const int value) const; - bool isU6_2Immediate(const int value) const; - bool isU6_1Immediate(const int value) const; - bool isU6_0Immediate(const int value) const; - bool isS4_3Immediate(const int value) const; - bool isS4_2Immediate(const int value) const; - bool isS4_1Immediate(const int value) const; - bool isS4_0Immediate(const int value) const; - bool isS12_Immediate(const int value) const; - bool isU6_Immediate(const int value) const; - bool isS8_Immediate(const int value) const; - bool isS6_Immediate(const int value) const; - - bool isSaveCalleeSavedRegsCall(const MachineInstr* MI) const; - bool isConditionalTransfer(const MachineInstr* MI) const; + // Sometimes, it is possible for the target + // to tell, even without aliasing information, that two MIs access different + // memory addresses. This function returns true if two MIs access different + // memory addresses and false otherwise. + bool areMemAccessesTriviallyDisjoint(MachineInstr *MIa, MachineInstr *MIb, + AliasAnalysis *AA = nullptr) + const override; + + + /// HexagonInstrInfo specifics. + /// + + const HexagonRegisterInfo &getRegisterInfo() const { return RI; } + + unsigned createVR(MachineFunction* MF, MVT VT) const; + + bool isAbsoluteSet(const MachineInstr* MI) const; + bool isAccumulator(const MachineInstr *MI) const; + bool isComplex(const MachineInstr *MI) const; + bool isCompoundBranchInstr(const MachineInstr *MI) const; + bool isCondInst(const MachineInstr *MI) const; bool isConditionalALU32 (const MachineInstr* MI) const; - bool isConditionalLoad (const MachineInstr* MI) const; + bool isConditionalLoad(const MachineInstr* MI) const; bool isConditionalStore(const MachineInstr* MI) const; - bool isNewValueInst(const MachineInstr* MI) const; - bool isNewValue(const MachineInstr* MI) const; - bool isNewValue(Opcode_t Opcode) const; - bool isDotNewInst(const MachineInstr* MI) const; - int GetDotOldOp(const int opc) const; - int GetDotNewOp(const MachineInstr* MI) const; - int GetDotNewPredOp(MachineInstr *MI, - const MachineBranchProbabilityInfo - *MBPI) const; - bool mayBeNewStore(const MachineInstr* MI) const; + bool isConditionalTransfer(const MachineInstr* MI) const; + bool isConstExtended(const MachineInstr *MI) const; bool isDeallocRet(const MachineInstr *MI) const; - unsigned getInvertedPredicatedOpcode(const int Opc) const; + bool isDependent(const MachineInstr *ProdMI, + const MachineInstr *ConsMI) const; + bool isDotCurInst(const MachineInstr* MI) const; + bool isDotNewInst(const MachineInstr* MI) const; + bool isDuplexPair(const MachineInstr *MIa, const MachineInstr *MIb) const; + bool isEarlySourceInstr(const MachineInstr *MI) const; + bool isEndLoopN(unsigned Opcode) const; + bool isExpr(unsigned OpType) const; bool isExtendable(const MachineInstr* MI) const; bool isExtended(const MachineInstr* MI) const; - bool isPostIncrement(const MachineInstr* MI) const; + bool isFloat(const MachineInstr *MI) const; + bool isHVXMemWithAIndirect(const MachineInstr *I, + const MachineInstr *J) const; + bool isIndirectCall(const MachineInstr *MI) const; + bool isIndirectL4Return(const MachineInstr *MI) const; + bool isJumpR(const MachineInstr *MI) const; + bool isJumpWithinBranchRange(const MachineInstr *MI, unsigned offset) const; + bool isLateInstrFeedsEarlyInstr(const MachineInstr *LRMI, + const MachineInstr *ESMI) const; + bool isLateResultInstr(const MachineInstr *MI) const; + bool isLateSourceInstr(const MachineInstr *MI) const; + bool isLoopN(const MachineInstr *MI) const; + bool isMemOp(const MachineInstr *MI) const; + bool isNewValue(const MachineInstr* MI) const; + bool isNewValue(unsigned Opcode) const; + bool isNewValueInst(const MachineInstr* MI) const; + bool isNewValueJump(const MachineInstr* MI) const; + bool isNewValueJump(unsigned Opcode) const; bool isNewValueStore(const MachineInstr* MI) const; bool isNewValueStore(unsigned Opcode) const; - bool isNewValueJump(const MachineInstr* MI) const; - bool isNewValueJump(Opcode_t Opcode) const; - bool isNewValueJumpCandidate(const MachineInstr *MI) const; + bool isOperandExtended(const MachineInstr *MI, unsigned OperandNum) const; + bool isPostIncrement(const MachineInstr* MI) const; + bool isPredicatedNew(const MachineInstr *MI) const; + bool isPredicatedNew(unsigned Opcode) const; + bool isPredicatedTrue(const MachineInstr *MI) const; + bool isPredicatedTrue(unsigned Opcode) const; + bool isPredicated(unsigned Opcode) const; + bool isPredicateLate(unsigned Opcode) const; + bool isPredictedTaken(unsigned Opcode) const; + bool isSaveCalleeSavedRegsCall(const MachineInstr *MI) const; + bool isSolo(const MachineInstr* MI) const; + bool isSpillPredRegOp(const MachineInstr *MI) const; + bool isTC1(const MachineInstr *MI) const; + bool isTC2(const MachineInstr *MI) const; + bool isTC2Early(const MachineInstr *MI) const; + bool isTC4x(const MachineInstr *MI) const; + bool isV60VectorInstruction(const MachineInstr *MI) const; + bool isValidAutoIncImm(const EVT VT, const int Offset) const; + bool isValidOffset(unsigned Opcode, int Offset, bool Extend = true) const; + bool isVecAcc(const MachineInstr *MI) const; + bool isVecALU(const MachineInstr *MI) const; + bool isVecUsableNextPacket(const MachineInstr *ProdMI, + const MachineInstr *ConsMI) const; + + + bool canExecuteInBundle(const MachineInstr *First, + const MachineInstr *Second) const; + bool hasEHLabel(const MachineBasicBlock *B) const; + bool hasNonExtEquivalent(const MachineInstr *MI) const; + bool hasPseudoInstrPair(const MachineInstr *MI) const; + bool hasUncondBranch(const MachineBasicBlock *B) const; + bool mayBeCurLoad(const MachineInstr* MI) const; + bool mayBeNewStore(const MachineInstr* MI) const; + bool producesStall(const MachineInstr *ProdMI, + const MachineInstr *ConsMI) const; + bool producesStall(const MachineInstr *MI, + MachineBasicBlock::const_instr_iterator MII) const; + bool predCanBeUsedAsDotNew(const MachineInstr *MI, unsigned PredReg) const; + bool PredOpcodeHasJMP_c(unsigned Opcode) const; + bool predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const; - void immediateExtend(MachineInstr *MI) const; - bool isConstExtended(const MachineInstr *MI) const; - unsigned getSize(const MachineInstr *MI) const; - int getDotNewPredJumpOp(MachineInstr *MI, - const MachineBranchProbabilityInfo *MBPI) const; unsigned getAddrMode(const MachineInstr* MI) const; - bool isOperandExtended(const MachineInstr *MI, - unsigned short OperandNum) const; - unsigned short getCExtOpNum(const MachineInstr *MI) const; - int getMinValue(const MachineInstr *MI) const; + unsigned getBaseAndOffset(const MachineInstr *MI, int &Offset, + unsigned &AccessSize) const; + bool getBaseAndOffsetPosition(const MachineInstr *MI, unsigned &BasePos, + unsigned &OffsetPos) const; + SmallVector<MachineInstr*,2> getBranchingInstrs(MachineBasicBlock& MBB) const; + unsigned getCExtOpNum(const MachineInstr *MI) const; + HexagonII::CompoundGroup + getCompoundCandidateGroup(const MachineInstr *MI) const; + unsigned getCompoundOpcode(const MachineInstr *GA, + const MachineInstr *GB) const; + int getCondOpcode(int Opc, bool sense) const; + int getDotCurOp(const MachineInstr* MI) const; + int getDotNewOp(const MachineInstr* MI) const; + int getDotNewPredJumpOp(const MachineInstr *MI, + const MachineBranchProbabilityInfo *MBPI) const; + int getDotNewPredOp(const MachineInstr *MI, + const MachineBranchProbabilityInfo *MBPI) const; + int getDotOldOp(const int opc) const; + HexagonII::SubInstructionGroup getDuplexCandidateGroup(const MachineInstr *MI) + const; + short getEquivalentHWInstr(const MachineInstr *MI) const; + MachineInstr *getFirstNonDbgInst(MachineBasicBlock *BB) const; + unsigned getInstrTimingClassLatency(const InstrItineraryData *ItinData, + const MachineInstr *MI) const; + bool getInvertedPredSense(SmallVectorImpl<MachineOperand> &Cond) const; + unsigned getInvertedPredicatedOpcode(const int Opc) const; int getMaxValue(const MachineInstr *MI) const; - bool NonExtEquivalentExists (const MachineInstr *MI) const; + unsigned getMemAccessSize(const MachineInstr* MI) const; + int getMinValue(const MachineInstr *MI) const; short getNonExtOpcode(const MachineInstr *MI) const; - bool PredOpcodeHasJMP_c(Opcode_t Opcode) const; - bool predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const; - bool isEndLoopN(Opcode_t Opcode) const; bool getPredReg(ArrayRef<MachineOperand> Cond, unsigned &PredReg, unsigned &PredRegPos, unsigned &PredRegFlags) const; - int getCondOpcode(int Opc, bool sense) const; + short getPseudoInstrPair(const MachineInstr *MI) const; + short getRegForm(const MachineInstr *MI) const; + unsigned getSize(const MachineInstr *MI) const; + uint64_t getType(const MachineInstr* MI) const; + unsigned getUnits(const MachineInstr* MI) const; + unsigned getValidSubTargets(const unsigned Opcode) const; + + /// getInstrTimingClassLatency - Compute the instruction latency of a given + /// instruction using Timing Class information, if available. + unsigned nonDbgBBSize(const MachineBasicBlock *BB) const; + unsigned nonDbgBundleSize(MachineBasicBlock::const_iterator BundleHead) const; + + + void immediateExtend(MachineInstr *MI) const; + bool invertAndChangeJumpTarget(MachineInstr* MI, + MachineBasicBlock* NewTarget) const; + void genAllInsnTimingClasses(MachineFunction &MF) const; + bool reversePredSense(MachineInstr* MI) const; + unsigned reversePrediction(unsigned Opcode) const; + bool validateBranchCond(const ArrayRef<MachineOperand> &Cond) const; }; } diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td index 3b32c10ed5b0..5cfeba720d90 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td @@ -13,7 +13,7 @@ include "HexagonInstrFormats.td" include "HexagonOperands.td" - +include "HexagonInstrEnc.td" // Pattern fragment that combines the value type and the register class // into a single parameter. // The pat frags in the definitions below need to have a named register, @@ -1426,9 +1426,6 @@ def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone, [SDNPHasChain]>; -def SDHexagonBR_JT: SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; -def HexagonBR_JT: SDNode<"HexagonISD::BR_JT", SDHexagonBR_JT, [SDNPHasChain]>; - class CondStr<string CReg, bit True, bit New> { string S = "if (" # !if(True,"","!") # CReg # !if(New,".new","") # ") "; } @@ -1606,8 +1603,6 @@ def EH_RETURN_JMPR : T_JMPr; def: Pat<(eh_return), (EH_RETURN_JMPR (i32 R31))>; -def: Pat<(HexagonBR_JT (i32 IntRegs:$dst)), - (J2_jumpr IntRegs:$dst)>; def: Pat<(brind (i32 IntRegs:$dst)), (J2_jumpr IntRegs:$dst)>; @@ -2825,7 +2820,7 @@ let CextOpcode = "ADD_acc" in { let isExtentSigned = 1 in def M2_accii : T_MType_acc_ri <"+= add", 0b100, s8Ext, [(set (i32 IntRegs:$dst), - (add (add (i32 IntRegs:$src2), s16_16ImmPred:$src3), + (add (add (i32 IntRegs:$src2), s32ImmPred:$src3), (i32 IntRegs:$src1)))]>, ImmRegRel; def M2_acci : T_MType_acc_rr <"+= add", 0b000, 0b001, 0, @@ -2859,7 +2854,7 @@ class T_MType_acc_pat2 <InstHexagon MI, SDNode firstOp, SDNode secOp> def : T_MType_acc_pat2 <M2_xor_xacc, xor, xor>; def : T_MType_acc_pat1 <M2_macsin, mul, sub, u32ImmPred>; -def : T_MType_acc_pat1 <M2_naccii, add, sub, s16_16ImmPred>; +def : T_MType_acc_pat1 <M2_naccii, add, sub, s32ImmPred>; def : T_MType_acc_pat2 <M2_nacci, add, sub>; //===----------------------------------------------------------------------===// @@ -3303,7 +3298,8 @@ class T_store_pi <string mnemonic, RegisterClass RC, Operand ImmOp, !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2}, !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1}, /* s4_0Imm */ offset{3-0}))); - let isNVStorable = !if (!eq(ImmOpStr, "s4_3Imm"), 0, 1); + // Store upper-half and store doubleword cannot be NV. + let isNVStorable = !if (!eq(ImmOpStr, "s4_3Imm"), 0, !if(isHalf,0,1)); let IClass = 0b1010; @@ -3322,7 +3318,7 @@ class T_store_pi <string mnemonic, RegisterClass RC, Operand ImmOp, //===----------------------------------------------------------------------===// let isPredicated = 1, hasSideEffects = 0, addrMode = PostInc in class T_pstore_pi <string mnemonic, RegisterClass RC, Operand ImmOp, - bits<4> MajOp, bit isHalf, bit isPredNot, bit isPredNew > + bits<4> MajOp, bit isHalf, bit isPredNot, bit isPredNew> : STInst <(outs IntRegs:$_dst_), (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset, RC:$src3), !if(isPredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", @@ -3341,7 +3337,8 @@ class T_pstore_pi <string mnemonic, RegisterClass RC, Operand ImmOp, !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1}, /* s4_0Imm */ offset{3-0}))); - let isNVStorable = !if (!eq(ImmOpStr, "s4_3Imm"), 0, 1); + // Store upper-half and store doubleword cannot be NV. + let isNVStorable = !if (!eq(ImmOpStr, "s4_3Imm"), 0, !if(isHalf,0,1)); let isPredicatedNew = isPredNew; let isPredicatedFalse = isPredNot; @@ -3404,7 +3401,6 @@ def: Storepi_pat<post_store, I64, s4_3ImmPred, S2_storerd_pi>; //===----------------------------------------------------------------------===// // Template class for post increment stores with register offset. //===----------------------------------------------------------------------===// -let isNVStorable = 1 in class T_store_pr <string mnemonic, RegisterClass RC, bits<3> MajOp, MemAccessSize AccessSz, bit isHalf = 0> : STInst <(outs IntRegs:$_dst_), @@ -3416,6 +3412,9 @@ class T_store_pr <string mnemonic, RegisterClass RC, bits<3> MajOp, bits<5> src3; let accessSize = AccessSz; + // Store upper-half and store doubleword cannot be NV. + let isNVStorable = !if(!eq(mnemonic,"memd"), 0, !if(isHalf,0,1)); + let IClass = 0b1010; let Inst{27-24} = 0b1101; @@ -3430,12 +3429,11 @@ def S2_storerb_pr : T_store_pr<"memb", IntRegs, 0b000, ByteAccess>; def S2_storerh_pr : T_store_pr<"memh", IntRegs, 0b010, HalfWordAccess>; def S2_storeri_pr : T_store_pr<"memw", IntRegs, 0b100, WordAccess>; def S2_storerd_pr : T_store_pr<"memd", DoubleRegs, 0b110, DoubleWordAccess>; - def S2_storerf_pr : T_store_pr<"memh", IntRegs, 0b011, HalfWordAccess, 1>; let opExtendable = 1, isExtentSigned = 1, isPredicable = 1 in class T_store_io <string mnemonic, RegisterClass RC, Operand ImmOp, - bits<3>MajOp, bit isH = 0> + bits<3> MajOp, bit isH = 0> : STInst <(outs), (ins IntRegs:$src1, ImmOp:$src2, RC:$src3), mnemonic#"($src1+#$src2) = $src3"#!if(isH,".h","")>, @@ -3455,6 +3453,8 @@ class T_store_io <string mnemonic, RegisterClass RC, Operand ImmOp, !if (!eq(ImmOpStr, "s11_2Ext"), src2{12-2}, !if (!eq(ImmOpStr, "s11_1Ext"), src2{11-1}, /* s11_0Ext */ src2{10-0}))); + // Store upper-half and store doubleword cannot be NV. + let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isH,0,1)); let IClass = 0b1010; let Inst{27} = 0b0; @@ -3494,7 +3494,10 @@ class T_pstore_io <string mnemonic, RegisterClass RC, Operand ImmOp, !if (!eq(ImmOpStr, "u6_2Ext"), src3{7-2}, !if (!eq(ImmOpStr, "u6_1Ext"), src3{6-1}, /* u6_0Ext */ src3{5-0}))); - let IClass = 0b0100; + // Store upper-half and store doubleword cannot be NV. + let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isH,0,1)); + + let IClass = 0b0100; let Inst{27} = 0b0; let Inst{26} = PredNot; @@ -3508,7 +3511,7 @@ class T_pstore_io <string mnemonic, RegisterClass RC, Operand ImmOp, let Inst{1-0} = src1; } -let isExtendable = 1, isNVStorable = 1, hasSideEffects = 0 in +let isExtendable = 1, hasSideEffects = 0 in multiclass ST_Idxd<string mnemonic, string CextOp, RegisterClass RC, Operand ImmOp, Operand predImmOp, bits<3> MajOp, bit isH = 0> { let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in { @@ -3665,7 +3668,7 @@ def S2_allocframe: ST0Inst < // S2_storer[bhwdf]_pci: Store byte/half/word/double. // S2_storer[bhwdf]_pci -> S2_storerbnew_pci -let Uses = [CS], isNVStorable = 1 in +let Uses = [CS] in class T_store_pci <string mnemonic, RegisterClass RC, Operand Imm, bits<4>MajOp, MemAccessSize AlignSize, string RegSrc = "Rt"> @@ -3679,6 +3682,8 @@ class T_store_pci <string mnemonic, RegisterClass RC, bits<1> Mu; bits<5> Rt; let accessSize = AlignSize; + let isNVStorable = !if(!eq(mnemonic,"memd"), 0, + !if(!eq(RegSrc,"Rt.h"), 0, 1)); let IClass = 0b1010; let Inst{27-25} = 0b100; @@ -3696,15 +3701,15 @@ class T_store_pci <string mnemonic, RegisterClass RC, } def S2_storerb_pci : T_store_pci<"memb", IntRegs, s4_0Imm, 0b1000, - ByteAccess>; + ByteAccess>; def S2_storerh_pci : T_store_pci<"memh", IntRegs, s4_1Imm, 0b1010, - HalfWordAccess>; + HalfWordAccess>; def S2_storerf_pci : T_store_pci<"memh", IntRegs, s4_1Imm, 0b1011, - HalfWordAccess, "Rt.h">; + HalfWordAccess, "Rt.h">; def S2_storeri_pci : T_store_pci<"memw", IntRegs, s4_2Imm, 0b1100, - WordAccess>; + WordAccess>; def S2_storerd_pci : T_store_pci<"memd", DoubleRegs, s4_3Imm, 0b1110, - DoubleWordAccess>; + DoubleWordAccess>; let Uses = [CS], isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 4 in class T_storenew_pci <string mnemonic, Operand Imm, @@ -3762,7 +3767,7 @@ def S2_storerd_pci_pseudo : T_store_pci_pseudo <"memd", DoubleRegs>; //===----------------------------------------------------------------------===// // Circular stores with auto-increment register //===----------------------------------------------------------------------===// -let Uses = [CS], isNVStorable = 1 in +let Uses = [CS] in class T_store_pcr <string mnemonic, RegisterClass RC, bits<4>MajOp, MemAccessSize AlignSize, string RegSrc = "Rt"> : STInst <(outs IntRegs:$_dst_), @@ -3775,6 +3780,8 @@ class T_store_pcr <string mnemonic, RegisterClass RC, bits<4>MajOp, bits<5> Rt; let accessSize = AlignSize; + let isNVStorable = !if(!eq(mnemonic,"memd"), 0, + !if(!eq(RegSrc,"Rt.h"), 0, 1)); let IClass = 0b1010; let Inst{27-25} = 0b100; @@ -5784,7 +5791,19 @@ include "HexagonInstrInfoV5.td" //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// +// V60 Instructions + +//===----------------------------------------------------------------------===// + +include "HexagonInstrInfoV60.td" + +//===----------------------------------------------------------------------===// +// V60 Instructions - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// // ALU32/64/Vector + //===----------------------------------------------------------------------===/// include "HexagonInstrInfoVector.td" + +include "HexagonInstrAlias.td" diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td index 65b0f4974367..87d6b359f5fb 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -684,7 +684,7 @@ def: Pat<(i64 (zext (i32 IntRegs:$src1))), // Template class for store instructions with Absolute set addressing mode. //===----------------------------------------------------------------------===// let isExtended = 1, opExtendable = 1, opExtentBits = 6, - addrMode = AbsoluteSet, isNVStorable = 1 in + addrMode = AbsoluteSet in class T_ST_absset <string mnemonic, string BaseOp, RegisterClass RC, bits<3> MajOp, MemAccessSize AccessSz, bit isHalf = 0> : STInst<(outs IntRegs:$dst), @@ -696,6 +696,9 @@ class T_ST_absset <string mnemonic, string BaseOp, RegisterClass RC, let accessSize = AccessSz; let BaseOpcode = BaseOp#"_AbsSet"; + // Store upper-half and store doubleword cannot be NV. + let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1)); + let IClass = 0b1010; let Inst{27-24} = 0b1011; @@ -750,7 +753,7 @@ let mayStore = 1, addrMode = AbsoluteSet in { } let isExtended = 1, opExtendable = 2, opExtentBits = 6, InputType = "imm", -addrMode = BaseLongOffset, AddedComplexity = 40 in + addrMode = BaseLongOffset, AddedComplexity = 40 in class T_StoreAbsReg <string mnemonic, string CextOp, RegisterClass RC, bits<3> MajOp, MemAccessSize AccessSz, bit isHalf = 0> : STInst<(outs), @@ -766,6 +769,10 @@ class T_StoreAbsReg <string mnemonic, string CextOp, RegisterClass RC, let accessSize = AccessSz; let CextOpcode = CextOp; let BaseOpcode = CextOp#"_shl"; + + // Store upper-half and store doubleword cannot be NV. + let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1)); + let IClass = 0b1010; let Inst{27-24} =0b1101; @@ -856,6 +863,9 @@ class T_store_rr <string mnemonic, RegisterClass RC, bits<3> MajOp, bit isH> bits<2> u2; bits<5> Rt; + // Store upper-half and store doubleword cannot be NV. + let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isH,0,1)); + let IClass = 0b0011; let Inst{27-24} = 0b1011; @@ -888,6 +898,8 @@ class T_pstore_rr <string mnemonic, RegisterClass RC, bits<3> MajOp, let isPredicatedFalse = isNot; let isPredicatedNew = isPredNew; + // Store upper-half and store doubleword cannot be NV. + let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isH,0,1)); let IClass = 0b0011; @@ -1826,43 +1838,22 @@ def: LogLogNot_pat<or, or, C4_or_orn>; // below are needed to support code generation for PIC //===----------------------------------------------------------------------===// -def SDT_HexagonPICAdd +def SDT_HexagonAtGot + : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>; +def SDT_HexagonAtPcrel : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; -def SDT_HexagonGOTAdd - : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; - -def SDT_HexagonGOTAddInternal : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>; -def SDT_HexagonGOTAddInternalJT : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>; -def SDT_HexagonGOTAddInternalBA : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>; - -def Hexagonpic_add : SDNode<"HexagonISD::PIC_ADD", SDT_HexagonPICAdd>; -def Hexagonat_got : SDNode<"HexagonISD::AT_GOT", SDT_HexagonGOTAdd>; -def Hexagongat_pcrel : SDNode<"HexagonISD::AT_PCREL", - SDT_HexagonGOTAddInternal>; -def Hexagongat_pcrel_jt : SDNode<"HexagonISD::AT_PCREL", - SDT_HexagonGOTAddInternalJT>; -def Hexagongat_pcrel_ba : SDNode<"HexagonISD::AT_PCREL", - SDT_HexagonGOTAddInternalBA>; - -// PIC: Map from a block address computation to a PC-relative add -def: Pat<(Hexagongat_pcrel_ba tblockaddress:$src1), - (C4_addipc u32ImmPred:$src1)>; - -// PIC: Map from the computation to generate a GOT pointer to a PC-relative add -def: Pat<(Hexagonpic_add texternalsym:$src1), - (C4_addipc u32ImmPred:$src1)>; -// PIC: Map from a jump table address computation to a PC-relative add -def: Pat<(Hexagongat_pcrel_jt tjumptable:$src1), - (C4_addipc u32ImmPred:$src1)>; +// AT_GOT address-of-GOT, address-of-global, offset-in-global +def HexagonAtGot : SDNode<"HexagonISD::AT_GOT", SDT_HexagonAtGot>; +// AT_PCREL address-of-global +def HexagonAtPcrel : SDNode<"HexagonISD::AT_PCREL", SDT_HexagonAtPcrel>; -// PIC: Map from a GOT-relative symbol reference to a load -def: Pat<(Hexagonat_got (i32 IntRegs:$src1), tglobaladdr:$src2), - (L2_loadri_io IntRegs:$src1, s30_2ImmPred:$src2)>; - -// PIC: Map from a static symbol reference to a PC-relative add -def: Pat<(Hexagongat_pcrel tglobaladdr:$src1), - (C4_addipc u32ImmPred:$src1)>; +def: Pat<(HexagonAtGot I32:$got, I32:$addr, (i32 0)), + (L2_loadri_io I32:$got, imm:$addr)>; +def: Pat<(HexagonAtGot I32:$got, I32:$addr, s30_2ImmPred:$off), + (A2_addi (L2_loadri_io I32:$got, imm:$addr), imm:$off)>; +def: Pat<(HexagonAtPcrel I32:$addr), + (C4_addipc imm:$addr)>; //===----------------------------------------------------------------------===// // CR - @@ -1903,7 +1894,7 @@ def S4_addaddi : ALU64Inst <(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Ru, s6Ext:$s6), "$Rd = add($Rs, add($Ru, #$s6))" , [(set (i32 IntRegs:$Rd), (add (i32 IntRegs:$Rs), - (add (i32 IntRegs:$Ru), s16_16ImmPred:$s6)))], + (add (i32 IntRegs:$Ru), s32ImmPred:$s6)))], "", ALU64_tc_2_SLOT23> { bits<5> Rd; bits<5> Rs; @@ -3290,27 +3281,33 @@ defm L4_return: LD_MISC_L4_RETURN <"dealloc_return">, PredNewRel; let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1, Defs = [R29, R30, R31, PC], isPredicable = 0, isAsmParserOnly = 1 in { def RESTORE_DEALLOC_RET_JMP_V4 : T_JMP<"">; + let isExtended = 1, opExtendable = 0 in + def RESTORE_DEALLOC_RET_JMP_V4_EXT : T_JMP<"">; } // Restore registers and dealloc frame before a tail call. let isCall = 1, Defs = [R29, R30, R31, PC], isAsmParserOnly = 1 in { def RESTORE_DEALLOC_BEFORE_TAILCALL_V4 : T_Call<"">, PredRel; + let isExtended = 1, opExtendable = 0 in + def RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT : T_Call<"">, PredRel; } // Save registers function call. let isCall = 1, Uses = [R29, R31], isAsmParserOnly = 1 in { def SAVE_REGISTERS_CALL_V4 : T_Call<"">, PredRel; + let isExtended = 1, opExtendable = 0 in + def SAVE_REGISTERS_CALL_V4_EXT : T_Call<"">, PredRel; } //===----------------------------------------------------------------------===// // Template class for non predicated store instructions with // GP-Relative or absolute addressing. //===----------------------------------------------------------------------===// -let hasSideEffects = 0, isPredicable = 1, isNVStorable = 1 in +let hasSideEffects = 0, isPredicable = 1 in class T_StoreAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp, - bits<2>MajOp, Operand AddrOp, bit isAbs, bit isHalf> - : STInst<(outs), (ins AddrOp:$addr, RC:$src), - mnemonic # !if(isAbs, "(##", "(#")#"$addr) = $src"#!if(isHalf, ".h",""), + bits<2>MajOp, bit isAbs, bit isHalf> + : STInst<(outs), (ins ImmOp:$addr, RC:$src), + mnemonic # "(#$addr) = $src"#!if(isHalf, ".h",""), [], "", V2LDST_tc_st_SLOT01> { bits<19> addr; bits<5> src; @@ -3321,6 +3318,9 @@ class T_StoreAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp, !if (!eq(ImmOpStr, "u16_2Imm"), addr{17-2}, !if (!eq(ImmOpStr, "u16_1Imm"), addr{16-1}, /* u16_0Imm */ addr{15-0}))); + // Store upper-half and store doubleword cannot be NV. + let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1)); + let IClass = 0b0100; let Inst{27} = 1; let Inst{26-25} = offsetBits{15-14}; @@ -3337,11 +3337,10 @@ class T_StoreAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp, // Template class for predicated store instructions with // GP-Relative or absolute addressing. //===----------------------------------------------------------------------===// -let hasSideEffects = 0, isPredicated = 1, isNVStorable = 1, opExtentBits = 6, - opExtendable = 1 in +let hasSideEffects = 0, isPredicated = 1, opExtentBits = 6, opExtendable = 1 in class T_StoreAbs_Pred <string mnemonic, RegisterClass RC, bits<2> MajOp, bit isHalf, bit isNot, bit isNew> - : STInst<(outs), (ins PredRegs:$src1, u6Ext:$absaddr, RC: $src2), + : STInst<(outs), (ins PredRegs:$src1, u32MustExt:$absaddr, RC: $src2), !if(isNot, "if (!$src1", "if ($src1")#!if(isNew, ".new) ", ") ")#mnemonic#"(#$absaddr) = $src2"#!if(isHalf, ".h",""), [], "", ST_tc_st_SLOT01>, AddrModeRel { @@ -3351,6 +3350,8 @@ class T_StoreAbs_Pred <string mnemonic, RegisterClass RC, bits<2> MajOp, let isPredicatedNew = isNew; let isPredicatedFalse = isNot; + // Store upper-half and store doubleword cannot be NV. + let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1)); let IClass = 0b1010; @@ -3371,7 +3372,7 @@ class T_StoreAbs_Pred <string mnemonic, RegisterClass RC, bits<2> MajOp, //===----------------------------------------------------------------------===// class T_StoreAbs <string mnemonic, RegisterClass RC, Operand ImmOp, bits<2> MajOp, bit isHalf> - : T_StoreAbsGP <mnemonic, RC, ImmOp, MajOp, u32Imm, 1, isHalf>, + : T_StoreAbsGP <mnemonic, RC, u32MustExt, MajOp, 1, isHalf>, AddrModeRel { string ImmOpStr = !cast<string>(ImmOp); let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19, @@ -3538,7 +3539,7 @@ defm storerf : ST_Abs <"memh", "STrif", IntRegs, u16_1Imm, 0b01, 1>; let isAsmParserOnly = 1 in class T_StoreGP <string mnemonic, string BaseOp, RegisterClass RC, Operand ImmOp, bits<2> MajOp, bit isHalf = 0> - : T_StoreAbsGP <mnemonic, RC, ImmOp, MajOp, globaladdress, 0, isHalf> { + : T_StoreAbsGP <mnemonic, RC, ImmOp, MajOp, 0, isHalf> { // Set BaseOpcode same as absolute addressing instructions so that // non-predicated GP-Rel instructions can have relate with predicated // Absolute instruction. @@ -3553,7 +3554,7 @@ multiclass ST_GP <string mnemonic, string BaseOp, Operand ImmOp, // Absolute instruction. let BaseOpcode = BaseOp#_abs in { def NAME#gp : T_StoreAbsGP <mnemonic, IntRegs, ImmOp, MajOp, - globaladdress, 0, isHalf>; + 0, isHalf>; // New-value store def NAME#newgp : T_StoreAbsGP_NV <mnemonic, ImmOp, MajOp, 0> ; } @@ -3615,9 +3616,9 @@ let AddedComplexity = 100 in { //===----------------------------------------------------------------------===// let isPredicable = 1, hasSideEffects = 0 in class T_LoadAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp, - bits<3> MajOp, Operand AddrOp, bit isAbs> - : LDInst <(outs RC:$dst), (ins AddrOp:$addr), - "$dst = "#mnemonic# !if(isAbs, "(##", "(#")#"$addr)", + bits<3> MajOp> + : LDInst <(outs RC:$dst), (ins ImmOp:$addr), + "$dst = "#mnemonic# "(#$addr)", [], "", V2LDST_tc_ld_SLOT01> { bits<5> dst; bits<19> addr; @@ -3642,7 +3643,7 @@ class T_LoadAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp, class T_LoadAbs <string mnemonic, RegisterClass RC, Operand ImmOp, bits<3> MajOp> - : T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp, u32Imm, 1>, AddrModeRel { + : T_LoadAbsGP <mnemonic, RC, u32MustExt, MajOp>, AddrModeRel { string ImmOpStr = !cast<string>(ImmOp); let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19, @@ -3660,10 +3661,11 @@ class T_LoadAbs <string mnemonic, RegisterClass RC, Operand ImmOp, // Template class for predicated load instructions with // absolute addressing mode. //===----------------------------------------------------------------------===// -let isPredicated = 1, opExtentBits = 6, opExtendable = 2 in +let isPredicated = 1, hasSideEffects = 0, hasNewValue = 1, opExtentBits = 6, + opExtendable = 2 in class T_LoadAbs_Pred <string mnemonic, RegisterClass RC, bits<3> MajOp, bit isPredNot, bit isPredNew> - : LDInst <(outs RC:$dst), (ins PredRegs:$src1, u6Ext:$absaddr), + : LDInst <(outs RC:$dst), (ins PredRegs:$src1, u32MustExt:$absaddr), !if(isPredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", ") ")#"$dst = "#mnemonic#"(#$absaddr)">, AddrModeRel { bits<5> dst; @@ -3737,7 +3739,7 @@ defm loadrd : LD_Abs<"memd", "LDrid", DoubleRegs, u16_3Imm, 0b110>; let isAsmParserOnly = 1 in class T_LoadGP <string mnemonic, string BaseOp, RegisterClass RC, Operand ImmOp, bits<3> MajOp> - : T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp, globaladdress, 0>, PredNewRel { + : T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp>, PredNewRel { let BaseOpcode = BaseOp#_abs; } @@ -3841,26 +3843,6 @@ let AddedComplexity = 100 in { def: Stoream_pat<truncstorei32, I64, addrga, LoReg, S2_storeriabs>; } -// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd -let AddedComplexity = 100 in -def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))), - (i1 (C2_tfrrp (i32 (L2_loadrbgp tglobaladdr:$global))))>; - -// Transfer global address into a register -let isExtended = 1, opExtendable = 1, AddedComplexity=50, isMoveImm = 1, -isAsCheapAsAMove = 1, isReMaterializable = 1, isCodeGenOnly = 1 in -def TFRI_V4 : ALU32_ri<(outs IntRegs:$dst), (ins s16Ext:$src1), - "$dst = #$src1", - [(set IntRegs:$dst, (HexagonCONST32 tglobaladdr:$src1))]>; - -// Transfer a block address into a register -def : Pat<(HexagonCONST32_GP tblockaddress:$src1), - (TFRI_V4 tblockaddress:$src1)>; - -let AddedComplexity = 50 in -def : Pat<(HexagonCONST32_GP tglobaladdr:$src1), - (TFRI_V4 tglobaladdr:$src1)>; - // i8/i16/i32 -> i64 loads // We need a complexity of 120 here to override preceding handling of // zextload. diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td index 337f4ea2184a..823961fb6e6f 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td @@ -98,21 +98,21 @@ def CONST32_Float_Real : LDInst<(outs IntRegs:$dst), (ins f32imm:$src1), // HexagonInstrInfo.td patterns. let isExtended = 1, opExtendable = 1, isMoveImm = 1, isReMaterializable = 1, isPredicable = 1, AddedComplexity = 30, validSubTargets = HasV5SubT, - isCodeGenOnly = 1 in + isCodeGenOnly = 1, isPseudo = 1 in def TFRI_f : ALU32_ri<(outs IntRegs:$dst), (ins f32Ext:$src1), "$dst = #$src1", [(set F32:$dst, fpimm:$src1)]>, Requires<[HasV5T]>; -let isExtended = 1, opExtendable = 2, isPredicated = 1, - hasSideEffects = 0, validSubTargets = HasV5SubT, isCodeGenOnly = 1 in +let isExtended = 1, opExtendable = 2, isPredicated = 1, hasSideEffects = 0, + validSubTargets = HasV5SubT, isCodeGenOnly = 1, isPseudo = 1 in def TFRI_cPt_f : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, f32Ext:$src2), "if ($src1) $dst = #$src2", []>, Requires<[HasV5T]>; -let isPseudo = 1, isExtended = 1, opExtendable = 2, isPredicated = 1, - isPredicatedFalse = 1, hasSideEffects = 0, validSubTargets = HasV5SubT in +let isExtended = 1, opExtendable = 2, isPredicated = 1, isPredicatedFalse = 1, + hasSideEffects = 0, validSubTargets = HasV5SubT, isPseudo = 1 in def TFRI_cNotPt_f : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, f32Ext:$src2), "if (!$src1) $dst = #$src2", []>, diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td new file mode 100644 index 000000000000..897ada081534 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td @@ -0,0 +1,2241 @@ +//=- HexagonInstrInfoV60.td - Target Desc. for Hexagon Target -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hexagon V60 instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + + +// Vector store +let mayStore = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in +{ + class VSTInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = CVI_VM_ST, + IType type = TypeCVI_VM_ST> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>, OpcodeHexagon; + +} + +// Vector load +let Predicates = [HasV60T, UseHVX] in +let mayLoad = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in + class V6_LDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = CVI_VM_LD, + IType type = TypeCVI_VM_LD> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>; + +let Predicates = [HasV60T, UseHVX] in +let mayStore = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in +class V6_STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = CVI_VM_ST, + IType type = TypeCVI_VM_ST> +: InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>; + +//===----------------------------------------------------------------------===// +// Vector loads with base + immediate offset +//===----------------------------------------------------------------------===// +let addrMode = BaseImmOffset, accessSize = Vector64Access in +class T_vload_ai<string asmStr> + : V6_LDInst <(outs VectorRegs:$dst), (ins IntRegs:$src1, s4_6Imm:$src2), + asmStr>; + +let isCodeGenOnly = 1, addrMode = BaseImmOffset, accessSize = Vector128Access in +class T_vload_ai_128B<string asmStr> + : V6_LDInst <(outs VectorRegs128B:$dst), (ins IntRegs:$src1, s4_7Imm:$src2), + asmStr>; + +let isCVLoadable = 1, hasNewValue = 1 in { + def V6_vL32b_ai : T_vload_ai <"$dst = vmem($src1+#$src2)">, + V6_vL32b_ai_enc; + def V6_vL32b_nt_ai : T_vload_ai <"$dst = vmem($src1+#$src2):nt">, + V6_vL32b_nt_ai_enc; + // 128B + def V6_vL32b_ai_128B : T_vload_ai_128B <"$dst = vmem($src1+#$src2)">, + V6_vL32b_ai_128B_enc; + def V6_vL32b_nt_ai_128B : T_vload_ai_128B <"$dst = vmem($src1+#$src2):nt">, + V6_vL32b_nt_ai_128B_enc; +} + +let Itinerary = CVI_VM_VP_LDU, Type = TypeCVI_VM_VP_LDU, hasNewValue = 1 in { + def V6_vL32Ub_ai : T_vload_ai <"$dst = vmemu($src1+#$src2)">, + V6_vL32Ub_ai_enc; + def V6_vL32Ub_ai_128B : T_vload_ai_128B <"$dst = vmemu($src1+#$src2)">, + V6_vL32Ub_ai_128B_enc; +} + +let Itinerary = CVI_VM_LD, Type = TypeCVI_VM_LD, isCVLoad = 1, + hasNewValue = 1 in { + def V6_vL32b_cur_ai : T_vload_ai <"$dst.cur = vmem($src1+#$src2)">, + V6_vL32b_cur_ai_enc; + def V6_vL32b_nt_cur_ai : T_vload_ai <"$dst.cur = vmem($src1+#$src2):nt">, + V6_vL32b_nt_cur_ai_enc; + // 128B + def V6_vL32b_cur_ai_128B : T_vload_ai_128B + <"$dst.cur = vmem($src1+#$src2)">, + V6_vL32b_cur_ai_128B_enc; + def V6_vL32b_nt_cur_ai_128B : T_vload_ai_128B + <"$dst.cur = vmem($src1+#$src2):nt">, + V6_vL32b_nt_cur_ai_128B_enc; +} + + +let Itinerary = CVI_VM_TMP_LD, Type = TypeCVI_VM_TMP_LD, hasNewValue = 1 in { + def V6_vL32b_tmp_ai : T_vload_ai <"$dst.tmp = vmem($src1+#$src2)">, + V6_vL32b_tmp_ai_enc; + def V6_vL32b_nt_tmp_ai : T_vload_ai <"$dst.tmp = vmem($src1+#$src2):nt">, + V6_vL32b_nt_tmp_ai_enc; + // 128B + def V6_vL32b_tmp_ai_128B : T_vload_ai_128B + <"$dst.tmp = vmem($src1+#$src2)">, + V6_vL32b_tmp_ai_128B_enc; + def V6_vL32b_nt_tmp_ai_128B : T_vload_ai_128B + <"$dst.tmp = vmem($src1+#$src2)">, + V6_vL32b_nt_tmp_ai_128B_enc; +} + +//===----------------------------------------------------------------------===// +// Vector stores with base + immediate offset - unconditional +//===----------------------------------------------------------------------===// +let addrMode = BaseImmOffset, accessSize = Vector64Access in +class T_vstore_ai <string mnemonic, string baseOp, Operand ImmOp, + RegisterClass RC, bit isNT> + : V6_STInst <(outs), (ins IntRegs:$src1, ImmOp:$src2, RC:$src3), + mnemonic#"($src1+#$src2)"#!if(isNT, ":nt", "")#" = $src3">, NewValueRel { + let BaseOpcode = baseOp; +} + +let accessSize = Vector64Access in +class T_vstore_ai_64B <string mnemonic, string baseOp, bit isNT = 0> + : T_vstore_ai <mnemonic, baseOp, s4_6Imm, VectorRegs, isNT>; + +let isCodeGenOnly = 1, accessSize = Vector128Access in +class T_vstore_ai_128B <string mnemonic, string baseOp, bit isNT = 0> + : T_vstore_ai <mnemonic, baseOp#"128B", s4_7Imm, VectorRegs128B, isNT>; + +let isNVStorable = 1 in { + def V6_vS32b_ai : T_vstore_ai_64B <"vmem", "vS32b_ai">, + V6_vS32b_ai_enc; + def V6_vS32b_ai_128B : T_vstore_ai_128B <"vmem", "vS32b_ai">, + V6_vS32b_ai_128B_enc; +} + +let isNVStorable = 1, isNonTemporal = 1 in { + def V6_vS32b_nt_ai : T_vstore_ai_64B <"vmem", "vS32b_ai", 1>, + V6_vS32b_nt_ai_enc; + def V6_vS32b_nt_ai_128B : T_vstore_ai_128B <"vmem", "vS32b_ai", 1>, + V6_vS32b_nt_ai_128B_enc; +} + +let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in { + def V6_vS32Ub_ai : T_vstore_ai_64B <"vmemu", "vs32Ub_ai">, + V6_vS32Ub_ai_enc; + def V6_vS32Ub_ai_128B : T_vstore_ai_128B <"vmemu", "vs32Ub_ai">, + V6_vS32Ub_ai_128B_enc; +} +//===----------------------------------------------------------------------===// +// Vector stores with base + immediate offset - unconditional new +//===----------------------------------------------------------------------===// +let addrMode = BaseImmOffset, isNewValue = 1, opNewValue = 2, isNVStore = 1, + Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST in +class T_vstore_new_ai <string baseOp, Operand ImmOp, RegisterClass RC, bit isNT> + : V6_STInst <(outs ), (ins IntRegs:$src1, ImmOp:$src2, RC:$src3), + "vmem($src1+#$src2)"#!if(isNT, ":nt", "")#" = $src3.new">, NewValueRel { + let BaseOpcode = baseOp; +} + +let accessSize = Vector64Access in +class T_vstore_new_ai_64B <string baseOp, bit isNT = 0> + : T_vstore_new_ai <baseOp, s4_6Imm, VectorRegs, isNT>; + +let isCodeGenOnly = 1, accessSize = Vector128Access in +class T_vstore_new_ai_128B <string baseOp, bit isNT = 0> + : T_vstore_new_ai <baseOp#"128B", s4_7Imm, VectorRegs128B, isNT>; + +def V6_vS32b_new_ai : T_vstore_new_ai_64B <"vS32b_ai">, V6_vS32b_new_ai_enc; +def V6_vS32b_new_ai_128B : T_vstore_new_ai_128B <"vS32b_ai">, + V6_vS32b_new_ai_128B_enc; + +let isNonTemporal = 1 in { + def V6_vS32b_nt_new_ai : T_vstore_new_ai_64B<"vS32b_ai", 1>, + V6_vS32b_nt_new_ai_enc; + def V6_vS32b_nt_new_ai_128B : T_vstore_new_ai_128B<"vS32b_ai", 1>, + V6_vS32b_nt_new_ai_128B_enc; +} + +//===----------------------------------------------------------------------===// +// Vector stores with base + immediate offset - conditional +//===----------------------------------------------------------------------===// +let addrMode = BaseImmOffset, isPredicated = 1 in +class T_vstore_pred_ai <string mnemonic, string baseOp, Operand ImmOp, + RegisterClass RC, bit isPredNot = 0, bit isNT = 0> + : V6_STInst <(outs), + (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4), + "if ("#!if(isPredNot, "!", "")#"$src1) " + #mnemonic#"($src2+#$src3)"#!if(isNT, ":nt", "")#" = $src4">, NewValueRel { + let isPredicatedFalse = isPredNot; + let BaseOpcode = baseOp; +} + +let accessSize = Vector64Access in +class T_vstore_pred_ai_64B <string mnemonic, string baseOp, + bit isPredNot = 0, bit isNT = 0> + : T_vstore_pred_ai <mnemonic, baseOp, s4_6Imm, VectorRegs, isPredNot, isNT>; + +let isCodeGenOnly = 1, accessSize = Vector128Access in +class T_vstore_pred_ai_128B <string mnemonic, string baseOp, + bit isPredNot = 0, bit isNT = 0> + : T_vstore_pred_ai <mnemonic, baseOp#"128B", s4_7Imm, VectorRegs128B, + isPredNot, isNT>; + +let isNVStorable = 1 in { + def V6_vS32b_pred_ai : T_vstore_pred_ai_64B <"vmem", "vS32b_ai">, + V6_vS32b_pred_ai_enc; + def V6_vS32b_npred_ai : T_vstore_pred_ai_64B <"vmem", "vS32b_ai", 1>, + V6_vS32b_npred_ai_enc; + // 128B + def V6_vS32b_pred_ai_128B : T_vstore_pred_ai_128B <"vmem", "vS32b_ai">, + V6_vS32b_pred_ai_128B_enc; + def V6_vS32b_npred_ai_128B : T_vstore_pred_ai_128B <"vmem", "vS32b_ai", 1>, + V6_vS32b_npred_ai_128B_enc; +} +let isNVStorable = 1, isNonTemporal = 1 in { + def V6_vS32b_nt_pred_ai : T_vstore_pred_ai_64B <"vmem", "vS32b_ai", 0, 1>, + V6_vS32b_nt_pred_ai_enc; + def V6_vS32b_nt_npred_ai : T_vstore_pred_ai_64B <"vmem", "vS32b_ai", 1, 1>, + V6_vS32b_nt_npred_ai_enc; + // 128B + def V6_vS32b_nt_pred_ai_128B : T_vstore_pred_ai_128B + <"vmem", "vS32b_ai", 0, 1>, + V6_vS32b_nt_pred_ai_128B_enc; + def V6_vS32b_nt_npred_ai_128B : T_vstore_pred_ai_128B + <"vmem", "vS32b_ai", 1, 1>, + V6_vS32b_nt_npred_ai_128B_enc; +} + +let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in { + def V6_vS32Ub_pred_ai : T_vstore_pred_ai_64B <"vmemu", "vS32Ub_ai">, + V6_vS32Ub_pred_ai_enc; + def V6_vS32Ub_npred_ai : T_vstore_pred_ai_64B <"vmemu", "vS32Ub_ai", 1>, + V6_vS32Ub_npred_ai_enc; + // 128B + def V6_vS32Ub_pred_ai_128B :T_vstore_pred_ai_128B <"vmemu", "vS32Ub_ai">, + V6_vS32Ub_pred_ai_128B_enc; + def V6_vS32Ub_npred_ai_128B :T_vstore_pred_ai_128B <"vmemu", "vS32Ub_ai", 1>, + V6_vS32Ub_npred_ai_128B_enc; +} + +//===----------------------------------------------------------------------===// +// Vector stores with base + immediate offset - byte-enabled aligned +//===----------------------------------------------------------------------===// +let addrMode = BaseImmOffset in +class T_vstore_qpred_ai <Operand ImmOp, RegisterClass RC, + bit isPredNot = 0, bit isNT = 0> + : V6_STInst <(outs), + (ins VecPredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4), + "if ("#!if(isPredNot, "!", "")#"$src1) vmem($src2+#$src3)" + #!if(isNT, ":nt", "")#" = $src4"> { + let isPredicatedFalse = isPredNot; +} + +let accessSize = Vector64Access in +class T_vstore_qpred_ai_64B <bit isPredNot = 0, bit isNT = 0> + : T_vstore_qpred_ai <s4_6Imm, VectorRegs, isPredNot, isNT>; + +let isCodeGenOnly = 1, accessSize = Vector128Access in +class T_vstore_qpred_ai_128B <bit isPredNot = 0, bit isNT = 0> + : T_vstore_qpred_ai <s4_7Imm, VectorRegs128B, isPredNot, isNT>; + +def V6_vS32b_qpred_ai : T_vstore_qpred_ai_64B, V6_vS32b_qpred_ai_enc; +def V6_vS32b_nqpred_ai : T_vstore_qpred_ai_64B <1>, + V6_vS32b_nqpred_ai_enc; +def V6_vS32b_nt_qpred_ai : T_vstore_qpred_ai_64B <0, 1>, + V6_vS32b_nt_qpred_ai_enc; +def V6_vS32b_nt_nqpred_ai : T_vstore_qpred_ai_64B <1, 1>, + V6_vS32b_nt_nqpred_ai_enc; +// 128B +def V6_vS32b_qpred_ai_128B : T_vstore_qpred_ai_128B, V6_vS32b_qpred_ai_128B_enc; +def V6_vS32b_nqpred_ai_128B : T_vstore_qpred_ai_128B<1>, + V6_vS32b_nqpred_ai_128B_enc; +def V6_vS32b_nt_qpred_ai_128B : T_vstore_qpred_ai_128B<0, 1>, + V6_vS32b_nt_qpred_ai_128B_enc; +def V6_vS32b_nt_nqpred_ai_128B : T_vstore_qpred_ai_128B<1, 1>, + V6_vS32b_nt_nqpred_ai_128B_enc; + + +//===----------------------------------------------------------------------===// +// Vector stores with base + immediate offset - conditional new +//===----------------------------------------------------------------------===// +let addrMode = BaseImmOffset, isPredicated = 1, isNewValue = 1, opNewValue = 3, + isNVStore = 1, Type = TypeCVI_VM_NEW_ST, Itinerary = CVI_VM_NEW_ST in +class T_vstore_new_pred_ai <string baseOp, Operand ImmOp, RegisterClass RC, + bit isPredNot, bit isNT> + : V6_STInst <(outs), + (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4), + "if("#!if(isPredNot, "!", "")#"$src1) vmem($src2+#$src3)" + #!if(isNT, ":nt", "")#" = $src4.new">, NewValueRel { + let isPredicatedFalse = isPredNot; + let BaseOpcode = baseOp; +} + +let accessSize = Vector64Access in +class T_vstore_new_pred_ai_64B <string baseOp, bit isPredNot = 0, bit isNT = 0> + : T_vstore_new_pred_ai <baseOp, s4_6Imm, VectorRegs, isPredNot, isNT>; + +let isCodeGenOnly = 1, accessSize = Vector128Access in +class T_vstore_new_pred_ai_128B <string baseOp, bit isPredNot = 0, bit isNT = 0> + : T_vstore_new_pred_ai <baseOp#"128B", s4_7Imm, VectorRegs128B, + isPredNot, isNT>; + + +def V6_vS32b_new_pred_ai : T_vstore_new_pred_ai_64B <"vS32b_ai">, + V6_vS32b_new_pred_ai_enc; +def V6_vS32b_new_npred_ai : T_vstore_new_pred_ai_64B <"vS32b_ai", 1>, + V6_vS32b_new_npred_ai_enc; +// 128B +def V6_vS32b_new_pred_ai_128B : T_vstore_new_pred_ai_128B <"vS32b_ai">, + V6_vS32b_new_pred_ai_128B_enc; +def V6_vS32b_new_npred_ai_128B : T_vstore_new_pred_ai_128B <"vS32b_ai", 1>, + V6_vS32b_new_npred_ai_128B_enc; +let isNonTemporal = 1 in { + def V6_vS32b_nt_new_pred_ai : T_vstore_new_pred_ai_64B <"vS32b_ai", 0, 1>, + V6_vS32b_nt_new_pred_ai_enc; + def V6_vS32b_nt_new_npred_ai : T_vstore_new_pred_ai_64B <"vS32b_ai", 1, 1>, + V6_vS32b_nt_new_npred_ai_enc; + // 128B + def V6_vS32b_nt_new_pred_ai_128B : T_vstore_new_pred_ai_128B + <"vS32b_ai", 0, 1>, + V6_vS32b_nt_new_pred_ai_128B_enc; + def V6_vS32b_nt_new_npred_ai_128B : T_vstore_new_pred_ai_128B + <"vS32b_ai", 1, 1>, + V6_vS32b_nt_new_npred_ai_128B_enc; +} + +//===----------------------------------------------------------------------===// +// Post increment vector loads with immediate offset. +//===----------------------------------------------------------------------===// +let addrMode = PostInc, hasNewValue = 1 in +class T_vload_pi<string asmStr, Operand ImmOp, RegisterClass RC> + : V6_LDInst <(outs RC:$dst, IntRegs:$_dst_), + (ins IntRegs:$src1, ImmOp:$src2), asmStr, [], + "$src1 = $_dst_">; + +let accessSize = Vector64Access in +class T_vload_pi_64B <string asmStr> + : T_vload_pi <asmStr, s3_6Imm, VectorRegs>; + +let isCodeGenOnly = 1, accessSize = Vector128Access in +class T_vload_pi_128B <string asmStr> + : T_vload_pi <asmStr, s3_7Imm, VectorRegs128B>; + +let isCVLoadable = 1 in { + def V6_vL32b_pi : T_vload_pi_64B <"$dst = vmem($src1++#$src2)">, + V6_vL32b_pi_enc; + def V6_vL32b_nt_pi : T_vload_pi_64B <"$dst = vmem($src1++#$src2):nt">, + V6_vL32b_nt_pi_enc; + // 128B + def V6_vL32b_pi_128B : T_vload_pi_128B <"$dst = vmem($src1++#$src2)">, + V6_vL32b_pi_128B_enc; + def V6_vL32b_nt_pi_128B : T_vload_pi_128B <"$dst = vmem($src1++#$src2):nt">, + V6_vL32b_nt_pi_128B_enc; +} + +let Itinerary = CVI_VM_VP_LDU, Type = TypeCVI_VM_VP_LDU in { + def V6_vL32Ub_pi : T_vload_pi_64B <"$dst = vmemu($src1++#$src2)">, + V6_vL32Ub_pi_enc; + // 128B + def V6_vL32Ub_pi_128B : T_vload_pi_128B <"$dst = vmemu($src1++#$src2)">, + V6_vL32Ub_pi_128B_enc; +} + +let isCVLoad = 1, Itinerary = CVI_VM_LD, Type = TypeCVI_VM_LD in { + def V6_vL32b_cur_pi : T_vload_pi_64B <"$dst.cur = vmem($src1++#$src2)">, + V6_vL32b_cur_pi_enc; + def V6_vL32b_nt_cur_pi : T_vload_pi_64B <"$dst.cur = vmem($src1++#$src2):nt">, + V6_vL32b_nt_cur_pi_enc; + // 128B + def V6_vL32b_cur_pi_128B : T_vload_pi_128B + <"$dst.cur = vmem($src1++#$src2)">, + V6_vL32b_cur_pi_128B_enc; + def V6_vL32b_nt_cur_pi_128B : T_vload_pi_128B + <"$dst.cur = vmem($src1++#$src2):nt">, + V6_vL32b_nt_cur_pi_128B_enc; +} + +let Itinerary = CVI_VM_TMP_LD, Type = TypeCVI_VM_TMP_LD in { + def V6_vL32b_tmp_pi : T_vload_pi_64B <"$dst.tmp = vmem($src1++#$src2)">, + V6_vL32b_tmp_pi_enc; + def V6_vL32b_nt_tmp_pi : T_vload_pi_64B <"$dst.tmp = vmem($src1++#$src2):nt">, + V6_vL32b_nt_tmp_pi_enc; + //128B + def V6_vL32b_tmp_pi_128B : T_vload_pi_128B + <"$dst.tmp = vmem($src1++#$src2)">, + V6_vL32b_tmp_pi_128B_enc; + def V6_vL32b_nt_tmp_pi_128B : T_vload_pi_128B + <"$dst.tmp = vmem($src1++#$src2):nt">, + V6_vL32b_nt_tmp_pi_128B_enc; +} + +//===----------------------------------------------------------------------===// +// Post increment vector stores with immediate offset. +//===----------------------------------------------------------------------===// +let addrMode = PostInc in +class T_vstore_pi <string mnemonic, string baseOp, Operand ImmOp, + RegisterClass RC, bit isNT> + : V6_STInst <(outs IntRegs:$_dst_), + (ins IntRegs:$src1, ImmOp:$src2, RC:$src3), + mnemonic#"($src1++#$src2)"#!if(isNT, ":nt", "")#" = $src3", [], + "$src1 = $_dst_">, NewValueRel; + +let accessSize = Vector64Access in +class T_vstore_pi_64B <string mnemonic, string baseOp, bit isNT = 0> + : T_vstore_pi <mnemonic, baseOp, s3_6Imm, VectorRegs, isNT>; + +let isCodeGenOnly = 1, accessSize = Vector128Access in +class T_vstore_pi_128B <string mnemonic, string baseOp, bit isNT = 0> + : T_vstore_pi <mnemonic, baseOp, s3_7Imm, VectorRegs128B, isNT>; + +let isNVStorable = 1 in { + def V6_vS32b_pi : T_vstore_pi_64B <"vmem", "vS32b_pi">, V6_vS32b_pi_enc; + def V6_vS32b_pi_128B : T_vstore_pi_128B <"vmem", "vS32b_pi">, + V6_vS32b_pi_128B_enc; +} + +let isNVStorable = 1 , isNonTemporal = 1 in { + def V6_vS32b_nt_pi : T_vstore_pi_64B <"vmem", "vS32b_pi", 1>, + V6_vS32b_nt_pi_enc; + def V6_vS32b_nt_pi_128B : T_vstore_pi_128B <"vmem", "vS32b_pi", 1>, + V6_vS32b_nt_pi_128B_enc; +} + + +let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in { + def V6_vS32Ub_pi : T_vstore_pi_64B <"vmemu", "vS32Ub_pi">, + V6_vS32Ub_pi_enc; + def V6_vS32Ub_pi_128B : T_vstore_pi_128B <"vmemu", "vS32Ub_pi">, + V6_vS32Ub_pi_128B_enc; +} + +//===----------------------------------------------------------------------===// +// Post increment unconditional .new vector stores with immediate offset. +//===----------------------------------------------------------------------===// +let addrMode = PostInc, isNVStore = 1 in +let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isNewValue = 1, + opNewValue = 3, isNVStore = 1 in +class T_vstore_new_pi <string baseOp, Operand ImmOp, RegisterClass RC, bit isNT> + : V6_STInst <(outs IntRegs:$_dst_), + (ins IntRegs:$src1, ImmOp:$src2, RC:$src3), + "vmem($src1++#$src2)"#!if(isNT, ":nt", "")#" = $src3.new", [], + "$src1 = $_dst_">, NewValueRel { + let BaseOpcode = baseOp; +} + +let accessSize = Vector64Access in +class T_vstore_new_pi_64B <string baseOp, bit isNT = 0> + : T_vstore_new_pi <baseOp, s3_6Imm, VectorRegs, isNT>; + +let isCodeGenOnly = 1, accessSize = Vector128Access in +class T_vstore_new_pi_128B <string baseOp, bit isNT = 0> + : T_vstore_new_pi <baseOp#"128B", s3_7Imm, VectorRegs128B, isNT>; + + +def V6_vS32b_new_pi : T_vstore_new_pi_64B <"vS32b_pi">, + V6_vS32b_new_pi_enc; +def V6_vS32b_new_pi_128B : T_vstore_new_pi_128B <"vS32b_pi">, + V6_vS32b_new_pi_128B_enc; + +let isNonTemporal = 1 in { + def V6_vS32b_nt_new_pi : T_vstore_new_pi_64B <"vS32b_pi", 1>, + V6_vS32b_nt_new_pi_enc; + def V6_vS32b_nt_new_pi_128B : T_vstore_new_pi_128B <"vS32b_pi", 1>, + V6_vS32b_nt_new_pi_128B_enc; +} + +//===----------------------------------------------------------------------===// +// Post increment conditional vector stores with immediate offset +//===----------------------------------------------------------------------===// +let isPredicated = 1, addrMode = PostInc in +class T_vstore_pred_pi <string mnemonic, string baseOp, Operand ImmOp, + RegisterClass RC, bit isPredNot, bit isNT> + : V6_STInst<(outs IntRegs:$_dst_), + (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4), + "if ("#!if(isPredNot, "!", "")#"$src1) "#mnemonic#"($src2++#$src3)" + #!if(isNT, ":nt", "")#" = $src4", [], + "$src2 = $_dst_">, NewValueRel { + let isPredicatedFalse = isPredNot; + let BaseOpcode = baseOp; +} + +let accessSize = Vector64Access in +class T_vstore_pred_pi_64B <string mnemonic, string baseOp, + bit isPredNot = 0, bit isNT = 0> + : T_vstore_pred_pi <mnemonic, baseOp, s3_6Imm, VectorRegs, isPredNot, isNT>; + +let isCodeGenOnly = 1, accessSize = Vector128Access in +class T_vstore_pred_pi_128B <string mnemonic, string baseOp, + bit isPredNot = 0, bit isNT = 0> + : T_vstore_pred_pi <mnemonic, baseOp#"128B", s3_7Imm, VectorRegs128B, + isPredNot, isNT>; + +let isNVStorable = 1 in { + def V6_vS32b_pred_pi : T_vstore_pred_pi_64B <"vmem", "vS32b_pi">, + V6_vS32b_pred_pi_enc; + def V6_vS32b_npred_pi : T_vstore_pred_pi_64B <"vmem", "vS32b_pi", 1>, + V6_vS32b_npred_pi_enc; + // 128B + def V6_vS32b_pred_pi_128B : T_vstore_pred_pi_128B <"vmem", "vS32b_pi">, + V6_vS32b_pred_pi_128B_enc; + def V6_vS32b_npred_pi_128B : T_vstore_pred_pi_128B <"vmem", "vS32b_pi", 1>, + V6_vS32b_npred_pi_128B_enc; +} +let isNVStorable = 1, isNonTemporal = 1 in { + def V6_vS32b_nt_pred_pi : T_vstore_pred_pi_64B <"vmem", "vS32b_pi", 0, 1>, + V6_vS32b_nt_pred_pi_enc; + def V6_vS32b_nt_npred_pi : T_vstore_pred_pi_64B <"vmem", "vS32b_pi", 1, 1>, + V6_vS32b_nt_npred_pi_enc; + // 128B + def V6_vS32b_nt_pred_pi_128B : T_vstore_pred_pi_128B + <"vmem", "vS32b_pi", 0, 1>, + V6_vS32b_nt_pred_pi_128B_enc; + def V6_vS32b_nt_npred_pi_128B : T_vstore_pred_pi_128B + <"vmem", "vS32b_pi", 1, 1>, + V6_vS32b_nt_npred_pi_128B_enc; +} + +let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in { + def V6_vS32Ub_pred_pi : T_vstore_pred_pi_64B <"vmemu", "vS32Ub_pi">, + V6_vS32Ub_pred_pi_enc; + def V6_vS32Ub_npred_pi : T_vstore_pred_pi_64B <"vmemu", "vS32Ub_pi", 1>, + V6_vS32Ub_npred_pi_enc; + // 128B + def V6_vS32Ub_pred_pi_128B : T_vstore_pred_pi_128B <"vmemu", "vS32Ub_pi">, + V6_vS32Ub_pred_pi_128B_enc; + def V6_vS32Ub_npred_pi_128B : T_vstore_pred_pi_128B <"vmemu", "vS32Ub_pi", 1>, + V6_vS32Ub_npred_pi_128B_enc; +} + +//===----------------------------------------------------------------------===// +// Post increment vector stores with immediate offset - byte-enabled aligned +//===----------------------------------------------------------------------===// +let addrMode = PostInc in +class T_vstore_qpred_pi <Operand ImmOp, RegisterClass RC, bit isPredNot = 0, + bit isNT = 0> + : V6_STInst <(outs IntRegs:$_dst_), + (ins VecPredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4), + "if ("#!if(isPredNot, "!", "")#"$src1) vmem($src2++#$src3)" + #!if(isNT, ":nt", "")#" = $src4", [], + "$src2 = $_dst_">; + +let accessSize = Vector64Access in +class T_vstore_qpred_pi_64B <bit isPredNot = 0, bit isNT = 0> + : T_vstore_qpred_pi <s3_6Imm, VectorRegs, isPredNot, isNT>; + +let isCodeGenOnly = 1, accessSize = Vector128Access in +class T_vstore_qpred_pi_128B <bit isPredNot = 0, bit isNT = 0> + : T_vstore_qpred_pi <s3_7Imm, VectorRegs128B, isPredNot, isNT>; + +def V6_vS32b_qpred_pi : T_vstore_qpred_pi_64B, V6_vS32b_qpred_pi_enc; +def V6_vS32b_nqpred_pi : T_vstore_qpred_pi_64B <1>, V6_vS32b_nqpred_pi_enc; +// 128B +def V6_vS32b_qpred_pi_128B : T_vstore_qpred_pi_128B, + V6_vS32b_qpred_pi_128B_enc; +def V6_vS32b_nqpred_pi_128B : T_vstore_qpred_pi_128B<1>, + V6_vS32b_nqpred_pi_128B_enc; + +let isNonTemporal = 1 in { + def V6_vS32b_nt_qpred_pi : T_vstore_qpred_pi_64B <0, 1>, + V6_vS32b_nt_qpred_pi_enc; + def V6_vS32b_nt_nqpred_pi : T_vstore_qpred_pi_64B <1, 1>, + V6_vS32b_nt_nqpred_pi_enc; + // 128B + def V6_vS32b_nt_qpred_pi_128B : T_vstore_qpred_pi_128B<0, 1>, + V6_vS32b_nt_qpred_pi_128B_enc; + def V6_vS32b_nt_nqpred_pi_128B : T_vstore_qpred_pi_128B<1, 1>, + V6_vS32b_nt_nqpred_pi_128B_enc; +} + +//===----------------------------------------------------------------------===// +// Post increment conditional .new vector stores with immediate offset +//===----------------------------------------------------------------------===// +let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isPredicated = 1, + isNewValue = 1, opNewValue = 4, addrMode = PostInc, isNVStore = 1 in +class T_vstore_new_pred_pi <string baseOp, Operand ImmOp, RegisterClass RC, + bit isPredNot, bit isNT> + : V6_STInst <(outs IntRegs:$_dst_), + (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4), + "if("#!if(isPredNot, "!", "")#"$src1) vmem($src2++#$src3)" + #!if(isNT, ":nt", "")#" = $src4.new", [], + "$src2 = $_dst_"> , NewValueRel { + let isPredicatedFalse = isPredNot; + let BaseOpcode = baseOp; +} + +let accessSize = Vector64Access in +class T_vstore_new_pred_pi_64B <string baseOp, bit isPredNot = 0, bit isNT = 0> + : T_vstore_new_pred_pi <baseOp, s3_6Imm, VectorRegs, isPredNot, isNT>; + +let isCodeGenOnly = 1, accessSize = Vector128Access in +class T_vstore_new_pred_pi_128B <string baseOp, bit isPredNot = 0, bit isNT = 0> + : T_vstore_new_pred_pi <baseOp#"128B", s3_7Imm, VectorRegs128B, + isPredNot, isNT>; + +def V6_vS32b_new_pred_pi : T_vstore_new_pred_pi_64B <"vS32b_pi">, + V6_vS32b_new_pred_pi_enc; +def V6_vS32b_new_npred_pi : T_vstore_new_pred_pi_64B <"vS32b_pi", 1>, + V6_vS32b_new_npred_pi_enc; +// 128B +def V6_vS32b_new_pred_pi_128B : T_vstore_new_pred_pi_128B <"vS32b_pi">, + V6_vS32b_new_pred_pi_128B_enc; +def V6_vS32b_new_npred_pi_128B : T_vstore_new_pred_pi_128B <"vS32b_pi", 1>, + V6_vS32b_new_npred_pi_128B_enc; +let isNonTemporal = 1 in { + def V6_vS32b_nt_new_pred_pi : T_vstore_new_pred_pi_64B <"vS32b_pi", 0, 1>, + V6_vS32b_nt_new_pred_pi_enc; + def V6_vS32b_nt_new_npred_pi : T_vstore_new_pred_pi_64B <"vS32b_pi", 1, 1>, + V6_vS32b_nt_new_npred_pi_enc; + // 128B + def V6_vS32b_nt_new_pred_pi_128B : T_vstore_new_pred_pi_128B + <"vS32b_pi", 0, 1>, + V6_vS32b_nt_new_pred_pi_128B_enc; + def V6_vS32b_nt_new_npred_pi_128B : T_vstore_new_pred_pi_128B + <"vS32b_pi", 1, 1>, + V6_vS32b_nt_new_npred_pi_128B_enc; +} + +//===----------------------------------------------------------------------===// +// Post increment vector loads with register offset +//===----------------------------------------------------------------------===// +let hasNewValue = 1 in +class T_vload_ppu<string asmStr> + : V6_LDInst <(outs VectorRegs:$dst, IntRegs:$_dst_), + (ins IntRegs:$src1, ModRegs:$src2), asmStr, [], + "$src1 = $_dst_">, NewValueRel; + +let isCVLoadable = 1 in { + def V6_vL32b_ppu : T_vload_ppu <"$dst = vmem($src1++$src2)">, + V6_vL32b_ppu_enc; + def V6_vL32b_nt_ppu : T_vload_ppu <"$dst = vmem($src1++$src2):nt">, + V6_vL32b_nt_ppu_enc; +} + +let Itinerary = CVI_VM_VP_LDU, Type = TypeCVI_VM_VP_LDU in +def V6_vL32Ub_ppu : T_vload_ppu <"$dst = vmemu($src1++$src2)">, + V6_vL32Ub_ppu_enc; + +let isCVLoad = 1, Itinerary = CVI_VM_CUR_LD, Type = TypeCVI_VM_CUR_LD in { + def V6_vL32b_cur_ppu : T_vload_ppu <"$dst.cur = vmem($src1++$src2)">, + V6_vL32b_cur_ppu_enc; + def V6_vL32b_nt_cur_ppu : T_vload_ppu <"$dst.cur = vmem($src1++$src2):nt">, + V6_vL32b_nt_cur_ppu_enc; +} + +let Itinerary = CVI_VM_TMP_LD, Type = TypeCVI_VM_TMP_LD in { + def V6_vL32b_tmp_ppu : T_vload_ppu <"$dst.tmp = vmem($src1++$src2)">, + V6_vL32b_tmp_ppu_enc; + def V6_vL32b_nt_tmp_ppu : T_vload_ppu <"$dst.tmp = vmem($src1++$src2):nt">, + V6_vL32b_nt_tmp_ppu_enc; +} + +//===----------------------------------------------------------------------===// +// Post increment vector stores with register offset +//===----------------------------------------------------------------------===// +class T_vstore_ppu <string mnemonic, bit isNT = 0> + : V6_STInst <(outs IntRegs:$_dst_), + (ins IntRegs:$src1, ModRegs:$src2, VectorRegs:$src3), + mnemonic#"($src1++$src2)"#!if(isNT, ":nt", "")#" = $src3", [], + "$src1 = $_dst_">, NewValueRel; + +let isNVStorable = 1, BaseOpcode = "vS32b_ppu" in { + def V6_vS32b_ppu : T_vstore_ppu <"vmem">, + V6_vS32b_ppu_enc; + let isNonTemporal = 1, BaseOpcode = "vS32b_ppu" in + def V6_vS32b_nt_ppu : T_vstore_ppu <"vmem", 1>, + V6_vS32b_nt_ppu_enc; +} + +let BaseOpcode = "vS32Ub_ppu", Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in +def V6_vS32Ub_ppu : T_vstore_ppu <"vmemu">, V6_vS32Ub_ppu_enc; + +//===----------------------------------------------------------------------===// +// Post increment .new vector stores with register offset +//===----------------------------------------------------------------------===// +let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isNewValue = 1, + opNewValue = 3, isNVStore = 1 in +class T_vstore_new_ppu <bit isNT = 0> + : V6_STInst <(outs IntRegs:$_dst_), + (ins IntRegs:$src1, ModRegs:$src2, VectorRegs:$src3), + "vmem($src1++$src2)"#!if(isNT, ":nt", "")#" = $src3.new", [], + "$src1 = $_dst_">, NewValueRel; + +let BaseOpcode = "vS32b_ppu" in +def V6_vS32b_new_ppu : T_vstore_new_ppu, V6_vS32b_new_ppu_enc; + +let BaseOpcode = "vS32b_ppu", isNonTemporal = 1 in +def V6_vS32b_nt_new_ppu : T_vstore_new_ppu<1>, V6_vS32b_nt_new_ppu_enc; + +//===----------------------------------------------------------------------===// +// Post increment conditional .new vector stores with register offset +//===----------------------------------------------------------------------===// +let isPredicated = 1 in +class T_vstore_pred_ppu <string mnemonic, bit isPredNot = 0, bit isNT = 0> + : V6_STInst<(outs IntRegs:$_dst_), + (ins PredRegs:$src1, IntRegs:$src2, ModRegs:$src3, VectorRegs:$src4), + "if ("#!if(isPredNot, "!", "")#"$src1) "#mnemonic#"($src2++$src3)" + #!if(isNT, ":nt", "")#" = $src4", [], + "$src2 = $_dst_">, NewValueRel { + let isPredicatedFalse = isPredNot; +} + +let isNVStorable = 1, BaseOpcode = "vS32b_ppu" in { + def V6_vS32b_pred_ppu : T_vstore_pred_ppu<"vmem">, V6_vS32b_pred_ppu_enc; + def V6_vS32b_npred_ppu: T_vstore_pred_ppu<"vmem", 1>, V6_vS32b_npred_ppu_enc; +} + +let isNVStorable = 1, BaseOpcode = "vS32b_ppu", isNonTemporal = 1 in { + def V6_vS32b_nt_pred_ppu : T_vstore_pred_ppu <"vmem", 0, 1>, + V6_vS32b_nt_pred_ppu_enc; + def V6_vS32b_nt_npred_ppu : T_vstore_pred_ppu <"vmem", 1, 1>, + V6_vS32b_nt_npred_ppu_enc; +} + +let BaseOpcode = "vS32Ub_ppu", Itinerary = CVI_VM_STU, + Type = TypeCVI_VM_STU in { + def V6_vS32Ub_pred_ppu : T_vstore_pred_ppu <"vmemu">, + V6_vS32Ub_pred_ppu_enc; + def V6_vS32Ub_npred_ppu : T_vstore_pred_ppu <"vmemu", 1>, + V6_vS32Ub_npred_ppu_enc; +} + +//===----------------------------------------------------------------------===// +// Post increment vector stores with register offset - byte-enabled aligned +//===----------------------------------------------------------------------===// +class T_vstore_qpred_ppu <bit isPredNot = 0, bit isNT = 0> + : V6_STInst <(outs IntRegs:$_dst_), + (ins VecPredRegs:$src1, IntRegs:$src2, ModRegs:$src3, VectorRegs:$src4), + "if ("#!if(isPredNot, "!", "")#"$src1) vmem($src2++$src3)" + #!if(isNT, ":nt", "")#" = $src4", [], + "$src2 = $_dst_">, NewValueRel; + +def V6_vS32b_qpred_ppu : T_vstore_qpred_ppu, V6_vS32b_qpred_ppu_enc; +def V6_vS32b_nqpred_ppu : T_vstore_qpred_ppu<1>, V6_vS32b_nqpred_ppu_enc; +def V6_vS32b_nt_qpred_ppu : T_vstore_qpred_ppu<0, 1>, + V6_vS32b_nt_qpred_ppu_enc; +def V6_vS32b_nt_nqpred_ppu : T_vstore_qpred_ppu<1, 1>, + V6_vS32b_nt_nqpred_ppu_enc; + +//===----------------------------------------------------------------------===// +// Post increment conditional .new vector stores with register offset +//===----------------------------------------------------------------------===// +let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isPredicated = 1, + isNewValue = 1, opNewValue = 4, isNVStore = 1 in +class T_vstore_new_pred_ppu <bit isPredNot = 0, bit isNT = 0> + : V6_STInst <(outs IntRegs:$_dst_), + (ins PredRegs:$src1, IntRegs:$src2, ModRegs:$src3, VectorRegs:$src4), + "if("#!if(isPredNot, "!", "")#"$src1) vmem($src2++$src3)" + #!if(isNT, ":nt", "")#" = $src4.new", [], + "$src2 = $_dst_">, NewValueRel { + let isPredicatedFalse = isPredNot; +} + +let BaseOpcode = "vS32b_ppu" in { + def V6_vS32b_new_pred_ppu : T_vstore_new_pred_ppu, + V6_vS32b_new_pred_ppu_enc; + def V6_vS32b_new_npred_ppu : T_vstore_new_pred_ppu<1>, + V6_vS32b_new_npred_ppu_enc; +} + +let BaseOpcode = "vS32b_ppu", isNonTemporal = 1 in { +def V6_vS32b_nt_new_pred_ppu : T_vstore_new_pred_ppu<0, 1>, + V6_vS32b_nt_new_pred_ppu_enc; +def V6_vS32b_nt_new_npred_ppu : T_vstore_new_pred_ppu<1, 1>, + V6_vS32b_nt_new_npred_ppu_enc; +} + +let isPseudo = 1, validSubTargets = HasV60SubT in +class STrivv_template<string mnemonic, Operand ImmOp, RegisterClass RC>: + VSTInst<(outs), (ins IntRegs:$addr, ImmOp:$off, RC:$src), + #mnemonic#"($addr+#$off) = $src", []>; + +def STrivv_indexed: STrivv_template<"vvmem", s4_6Imm, VecDblRegs>, + Requires<[HasV60T, UseHVXSgl]>; +def STrivv_indexed_128B: STrivv_template<"vvmem", s4_7Imm, VecDblRegs128B>, + Requires<[HasV60T, UseHVXDbl]>; + +multiclass STrivv_pats <ValueType VTSgl, ValueType VTDbl> { + def : Pat<(store (VTSgl VecDblRegs:$src1), IntRegs:$addr), + (STrivv_indexed IntRegs:$addr, #0, (VTSgl VecDblRegs:$src1))>, + Requires<[UseHVXSgl]>; + + def : Pat<(store (VTDbl VecDblRegs128B:$src1), IntRegs:$addr), + (STrivv_indexed_128B IntRegs:$addr, #0, + (VTDbl VecDblRegs128B:$src1))>, + Requires<[UseHVXDbl]>; +} + +defm : STrivv_pats <v128i8, v256i8>; +defm : STrivv_pats <v64i16, v128i16>; +defm : STrivv_pats <v32i32, v64i32>; +defm : STrivv_pats <v16i64, v32i64>; + + +multiclass vS32b_ai_pats <ValueType VTSgl, ValueType VTDbl> { + // Aligned stores + def : Pat<(store (VTSgl VectorRegs:$src1), IntRegs:$addr), + (V6_vS32b_ai IntRegs:$addr, #0, (VTSgl VectorRegs:$src1))>, + Requires<[UseHVXSgl]>; + + // 128B Aligned stores + def : Pat<(store (VTDbl VectorRegs128B:$src1), IntRegs:$addr), + (V6_vS32b_ai_128B IntRegs:$addr, #0, (VTDbl VectorRegs128B:$src1))>, + Requires<[UseHVXDbl]>; + + // Fold Add R+IFF into vector store. + let AddedComplexity = 10 in + def : Pat<(store (VTSgl VectorRegs:$src1), + (add IntRegs:$src2, s4_6ImmPred:$offset)), + (V6_vS32b_ai IntRegs:$src2, s4_6ImmPred:$offset, + (VTSgl VectorRegs:$src1))>, + Requires<[UseHVXSgl]>; + + // Fold Add R+IFF into vector store 128B. + let AddedComplexity = 10 in + def : Pat<(store (VTDbl VectorRegs128B:$src1), + (add IntRegs:$src2, s4_7ImmPred:$offset)), + (V6_vS32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset, + (VTDbl VectorRegs128B:$src1))>, + Requires<[UseHVXDbl]>; +} + +defm : vS32b_ai_pats <v64i8, v128i8>; +defm : vS32b_ai_pats <v32i16, v64i16>; +defm : vS32b_ai_pats <v16i32, v32i32>; +defm : vS32b_ai_pats <v8i64, v16i64>; + +let isPseudo = 1, validSubTargets = HasV60SubT in +class LDrivv_template<string mnemonic, Operand ImmOp, RegisterClass RC> + : V6_LDInst <(outs RC:$dst), (ins IntRegs:$addr, ImmOp:$off), + "$dst="#mnemonic#"($addr+#$off)", + []>, + Requires<[HasV60T,UseHVXSgl]>; + +def LDrivv_indexed: LDrivv_template<"vvmem", s4_6Imm, VecDblRegs>; +def LDrivv_indexed_128B: LDrivv_template<"vvmem", s4_7Imm, VecDblRegs128B>; + +multiclass LDrivv_pats <ValueType VTSgl, ValueType VTDbl> { + def : Pat < (VTSgl (load IntRegs:$addr)), + (LDrivv_indexed IntRegs:$addr, #0) >, + Requires<[UseHVXSgl]>; + + def : Pat < (VTDbl (load IntRegs:$addr)), + (LDrivv_indexed_128B IntRegs:$addr, #0) >, + Requires<[UseHVXDbl]>; +} + +defm : LDrivv_pats <v128i8, v256i8>; +defm : LDrivv_pats <v64i16, v128i16>; +defm : LDrivv_pats <v32i32, v64i32>; +defm : LDrivv_pats <v16i64, v32i64>; + +multiclass vL32b_ai_pats <ValueType VTSgl, ValueType VTDbl> { + // Aligned loads + def : Pat < (VTSgl (load IntRegs:$addr)), + (V6_vL32b_ai IntRegs:$addr, #0) >, + Requires<[UseHVXSgl]>; + + // 128B Load + def : Pat < (VTDbl (load IntRegs:$addr)), + (V6_vL32b_ai_128B IntRegs:$addr, #0) >, + Requires<[UseHVXDbl]>; + + // Fold Add R+IFF into vector load. + let AddedComplexity = 10 in + def : Pat<(VTDbl (load (add IntRegs:$src2, s4_7ImmPred:$offset))), + (V6_vL32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>, + Requires<[UseHVXDbl]>; + + let AddedComplexity = 10 in + def : Pat<(VTSgl (load (add IntRegs:$src2, s4_6ImmPred:$offset))), + (V6_vL32b_ai IntRegs:$src2, s4_6ImmPred:$offset)>, + Requires<[UseHVXSgl]>; +} + +defm : vL32b_ai_pats <v64i8, v128i8>; +defm : vL32b_ai_pats <v32i16, v64i16>; +defm : vL32b_ai_pats <v16i32, v32i32>; +defm : vL32b_ai_pats <v8i64, v16i64>; + +// Store vector predicate pseudo. +let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13, + isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in { +def STriq_pred_V6 : STInst<(outs), + (ins IntRegs:$base, s32Imm:$offset, VecPredRegs:$src1), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXSgl]>; + +def STriq_pred_vec_V6 : STInst<(outs), + (ins IntRegs:$base, s32Imm:$offset, VectorRegs:$src1), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXSgl]>; + +def STriq_pred_V6_128B : STInst<(outs), + (ins IntRegs:$base, s32Imm:$offset, VecPredRegs128B:$src1), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXDbl]>; + +def STriq_pred_vec_V6_128B : STInst<(outs), + (ins IntRegs:$base, s32Imm:$offset, VectorRegs128B:$src1), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXDbl]>; +} + +// Load vector predicate pseudo. +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13, + opExtentAlign = 2, isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in { +def LDriq_pred_V6 : LDInst<(outs VecPredRegs:$dst), + (ins IntRegs:$base, s32Imm:$offset), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXSgl]>; +def LDriq_pred_vec_V6 : LDInst<(outs VectorRegs:$dst), + (ins IntRegs:$base, s32Imm:$offset), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXSgl]>; +def LDriq_pred_V6_128B : LDInst<(outs VecPredRegs128B:$dst), + (ins IntRegs:$base, s32Imm:$offset), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXDbl]>; +def LDriq_pred_vec_V6_128B : LDInst<(outs VectorRegs128B:$dst), + (ins IntRegs:$base, s32Imm:$offset), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXDbl]>; +} + +// Store vector pseudo. +let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13, + isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in { +def STriv_pseudo_V6 : STInst<(outs), + (ins IntRegs:$base, s32Imm:$offset, VectorRegs:$src1), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXSgl]>; +def STriv_pseudo_V6_128B : STInst<(outs), + (ins IntRegs:$base, s32Imm:$offset, VectorRegs128B:$src1), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXDbl]>; +} + +let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13, + isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in { +def STrivv_pseudo_V6 : STInst<(outs), + (ins IntRegs:$base, s32Imm:$offset, VecDblRegs:$src1), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXSgl]>; +def STrivv_pseudo_V6_128B : STInst<(outs), + (ins IntRegs:$base, s32Imm:$offset, VecDblRegs128B:$src1), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXDbl]>; +} + +// Load vector pseudo. +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13, + opExtentAlign = 2, isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in { +def LDriv_pseudo_V6 : LDInst<(outs VectorRegs:$dst), + (ins IntRegs:$base, s32Imm:$offset), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXSgl]>; +def LDriv_pseudo_V6_128B : LDInst<(outs VectorRegs128B:$dst), + (ins IntRegs:$base, s32Imm:$offset), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXDbl]>; +} + +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13, + opExtentAlign = 2, isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in { +def LDrivv_pseudo_V6 : LDInst<(outs VecDblRegs:$dst), + (ins IntRegs:$base, s32Imm:$offset), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXSgl]>; +def LDrivv_pseudo_V6_128B : LDInst<(outs VecDblRegs128B:$dst), + (ins IntRegs:$base, s32Imm:$offset), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXDbl]>; +} + +class VSELInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = CVI_VA_DV, + IType type = TypeCVI_VA_DV> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>; + +let isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in { +def VSelectPseudo_V6 : VSELInst<(outs VectorRegs:$dst), + (ins PredRegs:$src1, VectorRegs:$src2, VectorRegs:$src3), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXSgl]>; +def VSelectDblPseudo_V6 : VSELInst<(outs VecDblRegs:$dst), + (ins PredRegs:$src1, VecDblRegs:$src2, VecDblRegs:$src3), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXSgl]>; +} + +def : Pat <(v16i32 (selectcc (i32 IntRegs:$lhs), (i32 IntRegs:$rhs), + (v16i32 VectorRegs:$tval), + (v16i32 VectorRegs:$fval), SETEQ)), + (v16i32 (VSelectPseudo_V6 (i32 (C2_cmpeq (i32 IntRegs:$lhs), + (i32 IntRegs:$rhs))), + (v16i32 VectorRegs:$tval), + (v16i32 VectorRegs:$fval)))>; + + +let hasNewValue = 1 in +class T_vmpy <string asmString, RegisterClass RCout, RegisterClass RCin> + : CVI_VX_DV_Resource1<(outs RCout:$dst), (ins RCin:$src1, IntRegs:$src2), + asmString >; + +multiclass T_vmpy <string asmString, RegisterClass RCout, + RegisterClass RCin> { + def NAME : T_vmpy <asmString, RCout, RCin>; + let isCodeGenOnly = 1 in + def NAME#_128B : T_vmpy <asmString, !cast<RegisterClass>(RCout#"128B"), + !cast<RegisterClass>(RCin#"128B")>; +} + +multiclass T_vmpy_VV <string asmString>: + T_vmpy <asmString, VectorRegs, VectorRegs>; + +multiclass T_vmpy_WW <string asmString>: + T_vmpy <asmString, VecDblRegs, VecDblRegs>; + +multiclass T_vmpy_VW <string asmString>: + T_vmpy <asmString, VectorRegs, VecDblRegs>; + +multiclass T_vmpy_WV <string asmString>: + T_vmpy <asmString, VecDblRegs, VectorRegs>; + +defm V6_vtmpyb :T_vmpy_WW<"$dst.h = vtmpy($src1.b,$src2.b)">, V6_vtmpyb_enc; +defm V6_vtmpybus :T_vmpy_WW<"$dst.h = vtmpy($src1.ub,$src2.b)">, V6_vtmpybus_enc; +defm V6_vdsaduh :T_vmpy_WW<"$dst.uw = vdsad($src1.uh,$src2.uh)">, V6_vdsaduh_enc; +defm V6_vmpybus :T_vmpy_WV<"$dst.h = vmpy($src1.ub,$src2.b)">, V6_vmpybus_enc; +defm V6_vmpabus :T_vmpy_WW<"$dst.h = vmpa($src1.ub,$src2.b)">, V6_vmpabus_enc; +defm V6_vmpahb :T_vmpy_WW<"$dst.w = vmpa($src1.h,$src2.b)">, V6_vmpahb_enc; +defm V6_vmpyh :T_vmpy_WV<"$dst.w = vmpy($src1.h,$src2.h)">, V6_vmpyh_enc; +defm V6_vmpyuh :T_vmpy_WV<"$dst.uw = vmpy($src1.uh,$src2.uh)">, V6_vmpyuh_enc; +defm V6_vmpyiwh :T_vmpy_VV<"$dst.w = vmpyi($src1.w,$src2.h)">, V6_vmpyiwh_enc; +defm V6_vtmpyhb :T_vmpy_WW<"$dst.w = vtmpy($src1.h,$src2.b)">, V6_vtmpyhb_enc; +defm V6_vmpyub :T_vmpy_WV<"$dst.uh = vmpy($src1.ub,$src2.ub)">, V6_vmpyub_enc; + +let Itinerary = CVI_VX_LONG, Type = TypeCVI_VX in +defm V6_vmpyihb :T_vmpy_VV<"$dst.h = vmpyi($src1.h,$src2.b)">, V6_vmpyihb_enc; + +defm V6_vdmpybus_dv : + T_vmpy_WW <"$dst.h = vdmpy($src1.ub,$src2.b)">, V6_vdmpybus_dv_enc; +defm V6_vdmpyhsusat : + T_vmpy_VV <"$dst.w = vdmpy($src1.h,$src2.uh):sat">, V6_vdmpyhsusat_enc; +defm V6_vdmpyhsuisat : + T_vmpy_VW <"$dst.w = vdmpy($src1.h,$src2.uh,#1):sat">, V6_vdmpyhsuisat_enc; +defm V6_vdmpyhsat : + T_vmpy_VV <"$dst.w = vdmpy($src1.h,$src2.h):sat">, V6_vdmpyhsat_enc; +defm V6_vdmpyhisat : + T_vmpy_VW <"$dst.w = vdmpy($src1.h,$src2.h):sat">, V6_vdmpyhisat_enc; +defm V6_vdmpyhb_dv : + T_vmpy_WW <"$dst.w = vdmpy($src1.h,$src2.b)">, V6_vdmpyhb_dv_enc; +defm V6_vmpyhss : + T_vmpy_VV <"$dst.h = vmpy($src1.h,$src2.h):<<1:sat">, V6_vmpyhss_enc; +defm V6_vmpyhsrs : + T_vmpy_VV <"$dst.h = vmpy($src1.h,$src2.h):<<1:rnd:sat">, V6_vmpyhsrs_enc; + +let Itinerary = CVI_VP, Type = TypeCVI_VP in +defm V6_vror : T_vmpy_VV <"$dst = vror($src1,$src2)">, V6_vror_enc; + +let Itinerary = CVI_VX, Type = TypeCVI_VX in { +defm V6_vdmpyhb : T_vmpy_VV<"$dst.w = vdmpy($src1.h,$src2.b)">, V6_vdmpyhb_enc; +defm V6_vrmpybus : T_vmpy_VV<"$dst.w = vrmpy($src1.ub,$src2.b)">, V6_vrmpybus_enc; +defm V6_vdmpybus : T_vmpy_VV<"$dst.h = vdmpy($src1.ub,$src2.b)">, V6_vdmpybus_enc; +defm V6_vmpyiwb : T_vmpy_VV<"$dst.w = vmpyi($src1.w,$src2.b)">, V6_vmpyiwb_enc; +defm V6_vrmpyub : T_vmpy_VV<"$dst.uw = vrmpy($src1.ub,$src2.ub)">, V6_vrmpyub_enc; +} + +let Itinerary = CVI_VS, Type = TypeCVI_VS in { +defm V6_vasrw : T_vmpy_VV <"$dst.w = vasr($src1.w,$src2)">, V6_vasrw_enc; +defm V6_vasrh : T_vmpy_VV <"$dst.h = vasr($src1.h,$src2)">, V6_vasrh_enc; +defm V6_vaslw : T_vmpy_VV <"$dst.w = vasl($src1.w,$src2)">, V6_vaslw_enc; +defm V6_vaslh : T_vmpy_VV <"$dst.h = vasl($src1.h,$src2)">, V6_vaslh_enc; +defm V6_vlsrw : T_vmpy_VV <"$dst.uw = vlsr($src1.uw,$src2)">, V6_vlsrw_enc; +defm V6_vlsrh : T_vmpy_VV <"$dst.uh = vlsr($src1.uh,$src2)">, V6_vlsrh_enc; +} + +let hasNewValue = 1 in +class T_HVX_alu <string asmString, InstrItinClass itin, + RegisterClass RCout, RegisterClass RCin> + : CVI_VA_Resource1 <(outs RCout:$dst), (ins RCin:$src1, RCin:$src2), + asmString >{ + let Itinerary = itin; + let Type = !cast<IType>("Type"#itin); +} + +multiclass T_HVX_alu <string asmString, RegisterClass RCout, + RegisterClass RCin, InstrItinClass itin> { + def NAME : T_HVX_alu <asmString, itin, RCout, RCin>; + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_alu <asmString, itin, + !cast<RegisterClass>(RCout#"128B"), + !cast<RegisterClass>(RCin#"128B")>; +} + +multiclass T_HVX_alu_VV <string asmString>: + T_HVX_alu <asmString, VectorRegs, VectorRegs, CVI_VA>; + +multiclass T_HVX_alu_WW <string asmString>: + T_HVX_alu <asmString, VecDblRegs, VecDblRegs, CVI_VA_DV>; + +multiclass T_HVX_alu_WV <string asmString>: + T_HVX_alu <asmString, VecDblRegs, VectorRegs, CVI_VX_DV>; + + +let Itinerary = CVI_VX, Type = TypeCVI_VX in { +defm V6_vrmpyubv : + T_HVX_alu_VV <"$dst.uw = vrmpy($src1.ub,$src2.ub)">, V6_vrmpyubv_enc; +defm V6_vrmpybv : + T_HVX_alu_VV <"$dst.w = vrmpy($src1.b,$src2.b)">, V6_vrmpybv_enc; +defm V6_vrmpybusv : + T_HVX_alu_VV <"$dst.w = vrmpy($src1.ub,$src2.b)">, V6_vrmpybusv_enc; +defm V6_vabsdiffub : + T_HVX_alu_VV <"$dst.ub = vabsdiff($src1.ub,$src2.ub)">, V6_vabsdiffub_enc; +defm V6_vabsdiffh : + T_HVX_alu_VV <"$dst.uh = vabsdiff($src1.h,$src2.h)">, V6_vabsdiffh_enc; +defm V6_vabsdiffuh : + T_HVX_alu_VV <"$dst.uh = vabsdiff($src1.uh,$src2.uh)">, V6_vabsdiffuh_enc; +defm V6_vabsdiffw : + T_HVX_alu_VV <"$dst.uw = vabsdiff($src1.w,$src2.w)">, V6_vabsdiffw_enc; +} + +let Itinerary = CVI_VX_DV, Type = TypeCVI_VX_DV in { +defm V6_vdmpyhvsat : + T_HVX_alu_VV <"$dst.w = vdmpy($src1.h,$src2.h):sat">, V6_vdmpyhvsat_enc; +defm V6_vmpyhvsrs : + T_HVX_alu_VV<"$dst.h = vmpy($src1.h,$src2.h):<<1:rnd:sat">, V6_vmpyhvsrs_enc; +defm V6_vmpyih : + T_HVX_alu_VV <"$dst.h = vmpyi($src1.h,$src2.h)">, V6_vmpyih_enc; +} + +defm V6_vand : + T_HVX_alu_VV <"$dst = vand($src1,$src2)">, V6_vand_enc; +defm V6_vor : + T_HVX_alu_VV <"$dst = vor($src1,$src2)">, V6_vor_enc; +defm V6_vxor : + T_HVX_alu_VV <"$dst = vxor($src1,$src2)">, V6_vxor_enc; +defm V6_vaddw : + T_HVX_alu_VV <"$dst.w = vadd($src1.w,$src2.w)">, V6_vaddw_enc; +defm V6_vaddubsat : + T_HVX_alu_VV <"$dst.ub = vadd($src1.ub,$src2.ub):sat">, V6_vaddubsat_enc; +defm V6_vadduhsat : + T_HVX_alu_VV <"$dst.uh = vadd($src1.uh,$src2.uh):sat">, V6_vadduhsat_enc; +defm V6_vaddhsat : + T_HVX_alu_VV <"$dst.h = vadd($src1.h,$src2.h):sat">, V6_vaddhsat_enc; +defm V6_vaddwsat : + T_HVX_alu_VV <"$dst.w = vadd($src1.w,$src2.w):sat">, V6_vaddwsat_enc; +defm V6_vsubb : + T_HVX_alu_VV <"$dst.b = vsub($src1.b,$src2.b)">, V6_vsubb_enc; +defm V6_vsubh : + T_HVX_alu_VV <"$dst.h = vsub($src1.h,$src2.h)">, V6_vsubh_enc; +defm V6_vsubw : + T_HVX_alu_VV <"$dst.w = vsub($src1.w,$src2.w)">, V6_vsubw_enc; +defm V6_vsububsat : + T_HVX_alu_VV <"$dst.ub = vsub($src1.ub,$src2.ub):sat">, V6_vsububsat_enc; +defm V6_vsubuhsat : + T_HVX_alu_VV <"$dst.uh = vsub($src1.uh,$src2.uh):sat">, V6_vsubuhsat_enc; +defm V6_vsubhsat : + T_HVX_alu_VV <"$dst.h = vsub($src1.h,$src2.h):sat">, V6_vsubhsat_enc; +defm V6_vsubwsat : + T_HVX_alu_VV <"$dst.w = vsub($src1.w,$src2.w):sat">, V6_vsubwsat_enc; +defm V6_vavgub : + T_HVX_alu_VV <"$dst.ub = vavg($src1.ub,$src2.ub)">, V6_vavgub_enc; +defm V6_vavguh : + T_HVX_alu_VV <"$dst.uh = vavg($src1.uh,$src2.uh)">, V6_vavguh_enc; +defm V6_vavgh : + T_HVX_alu_VV <"$dst.h = vavg($src1.h,$src2.h)">, V6_vavgh_enc; +defm V6_vavgw : + T_HVX_alu_VV <"$dst.w = vavg($src1.w,$src2.w)">, V6_vavgw_enc; +defm V6_vnavgub : + T_HVX_alu_VV <"$dst.b = vnavg($src1.ub,$src2.ub)">, V6_vnavgub_enc; +defm V6_vnavgh : + T_HVX_alu_VV <"$dst.h = vnavg($src1.h,$src2.h)">, V6_vnavgh_enc; +defm V6_vnavgw : + T_HVX_alu_VV <"$dst.w = vnavg($src1.w,$src2.w)">, V6_vnavgw_enc; +defm V6_vavgubrnd : + T_HVX_alu_VV <"$dst.ub = vavg($src1.ub,$src2.ub):rnd">, V6_vavgubrnd_enc; +defm V6_vavguhrnd : + T_HVX_alu_VV <"$dst.uh = vavg($src1.uh,$src2.uh):rnd">, V6_vavguhrnd_enc; +defm V6_vavghrnd : + T_HVX_alu_VV <"$dst.h = vavg($src1.h,$src2.h):rnd">, V6_vavghrnd_enc; +defm V6_vavgwrnd : + T_HVX_alu_VV <"$dst.w = vavg($src1.w,$src2.w):rnd">, V6_vavgwrnd_enc; + +defm V6_vmpybv : + T_HVX_alu_WV <"$dst.h = vmpy($src1.b,$src2.b)">, V6_vmpybv_enc; +defm V6_vmpyubv : + T_HVX_alu_WV <"$dst.uh = vmpy($src1.ub,$src2.ub)">, V6_vmpyubv_enc; +defm V6_vmpybusv : + T_HVX_alu_WV <"$dst.h = vmpy($src1.ub,$src2.b)">, V6_vmpybusv_enc; +defm V6_vmpyhv : + T_HVX_alu_WV <"$dst.w = vmpy($src1.h,$src2.h)">, V6_vmpyhv_enc; +defm V6_vmpyuhv : + T_HVX_alu_WV <"$dst.uw = vmpy($src1.uh,$src2.uh)">, V6_vmpyuhv_enc; +defm V6_vmpyhus : + T_HVX_alu_WV <"$dst.w = vmpy($src1.h,$src2.uh)">, V6_vmpyhus_enc; +defm V6_vaddubh : + T_HVX_alu_WV <"$dst.h = vadd($src1.ub,$src2.ub)">, V6_vaddubh_enc; +defm V6_vadduhw : + T_HVX_alu_WV <"$dst.w = vadd($src1.uh,$src2.uh)">, V6_vadduhw_enc; +defm V6_vaddhw : + T_HVX_alu_WV <"$dst.w = vadd($src1.h,$src2.h)">, V6_vaddhw_enc; +defm V6_vsububh : + T_HVX_alu_WV <"$dst.h = vsub($src1.ub,$src2.ub)">, V6_vsububh_enc; +defm V6_vsubuhw : + T_HVX_alu_WV <"$dst.w = vsub($src1.uh,$src2.uh)">, V6_vsubuhw_enc; +defm V6_vsubhw : + T_HVX_alu_WV <"$dst.w = vsub($src1.h,$src2.h)">, V6_vsubhw_enc; + +defm V6_vaddb_dv : + T_HVX_alu_WW <"$dst.b = vadd($src1.b,$src2.b)">, V6_vaddb_dv_enc; +defm V6_vaddh_dv : + T_HVX_alu_WW <"$dst.h = vadd($src1.h,$src2.h)">, V6_vaddh_dv_enc; +defm V6_vaddw_dv : + T_HVX_alu_WW <"$dst.w = vadd($src1.w,$src2.w)">, V6_vaddw_dv_enc; +defm V6_vaddubsat_dv : + T_HVX_alu_WW <"$dst.ub = vadd($src1.ub,$src2.ub):sat">, V6_vaddubsat_dv_enc; +defm V6_vadduhsat_dv : + T_HVX_alu_WW <"$dst.uh = vadd($src1.uh,$src2.uh):sat">, V6_vadduhsat_dv_enc; +defm V6_vaddhsat_dv : + T_HVX_alu_WW <"$dst.h = vadd($src1.h,$src2.h):sat">, V6_vaddhsat_dv_enc; +defm V6_vaddwsat_dv : + T_HVX_alu_WW <"$dst.w = vadd($src1.w,$src2.w):sat">, V6_vaddwsat_dv_enc; +defm V6_vsubb_dv : + T_HVX_alu_WW <"$dst.b = vsub($src1.b,$src2.b)">, V6_vsubb_dv_enc; +defm V6_vsubh_dv : + T_HVX_alu_WW <"$dst.h = vsub($src1.h,$src2.h)">, V6_vsubh_dv_enc; +defm V6_vsubw_dv : + T_HVX_alu_WW <"$dst.w = vsub($src1.w,$src2.w)">, V6_vsubw_dv_enc; +defm V6_vsububsat_dv : + T_HVX_alu_WW <"$dst.ub = vsub($src1.ub,$src2.ub):sat">, V6_vsububsat_dv_enc; +defm V6_vsubuhsat_dv : + T_HVX_alu_WW <"$dst.uh = vsub($src1.uh,$src2.uh):sat">, V6_vsubuhsat_dv_enc; +defm V6_vsubhsat_dv : + T_HVX_alu_WW <"$dst.h = vsub($src1.h,$src2.h):sat">, V6_vsubhsat_dv_enc; +defm V6_vsubwsat_dv : + T_HVX_alu_WW <"$dst.w = vsub($src1.w,$src2.w):sat">, V6_vsubwsat_dv_enc; + +let Itinerary = CVI_VX_DV_LONG, Type = TypeCVI_VX_DV in { +defm V6_vmpabusv : + T_HVX_alu_WW <"$dst.h = vmpa($src1.ub,$src2.b)">, V6_vmpabusv_enc; +defm V6_vmpabuuv : + T_HVX_alu_WW <"$dst.h = vmpa($src1.ub,$src2.ub)">, V6_vmpabuuv_enc; +} + +let isAccumulator = 1, hasNewValue = 1 in +class T_HVX_vmpyacc <string asmString, InstrItinClass itin, RegisterClass RCout, + RegisterClass RCin1, RegisterClass RCin2> + : CVI_VA_Resource1 <(outs RCout:$dst), + (ins RCout:$_src_, RCin1:$src1, RCin2:$src2), asmString, + [], "$dst = $_src_" > { + let Itinerary = itin; + let Type = !cast<IType>("Type"#itin); +} + +multiclass T_HVX_vmpyacc_both <string asmString, RegisterClass RCout, + RegisterClass RCin1, RegisterClass RCin2, InstrItinClass itin > { + def NAME : T_HVX_vmpyacc <asmString, itin, RCout, RCin1, RCin2>; + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_vmpyacc <asmString, itin, + !cast<RegisterClass>(RCout#"128B"), + !cast<RegisterClass>(RCin1#"128B"), + !cast<RegisterClass>(RCin2# + !if(!eq (!cast<string>(RCin2), "IntRegs"), "", "128B"))>; +} + +multiclass T_HVX_vmpyacc_VVR <string asmString>: + T_HVX_vmpyacc_both <asmString, VectorRegs, VectorRegs, IntRegs, CVI_VX>; + +multiclass T_HVX_vmpyacc_VWR <string asmString>: + T_HVX_vmpyacc_both <asmString, VectorRegs, VecDblRegs, IntRegs, CVI_VX_DV>; + +multiclass T_HVX_vmpyacc_WVR <string asmString>: + T_HVX_vmpyacc_both <asmString, VecDblRegs, VectorRegs, IntRegs, CVI_VX_DV>; + +multiclass T_HVX_vmpyacc_WWR <string asmString>: + T_HVX_vmpyacc_both <asmString, VecDblRegs, VecDblRegs, IntRegs, CVI_VX_DV>; + +multiclass T_HVX_vmpyacc_VVV <string asmString>: + T_HVX_vmpyacc_both <asmString, VectorRegs, VectorRegs, VectorRegs, CVI_VX_DV>; + +multiclass T_HVX_vmpyacc_WVV <string asmString>: + T_HVX_vmpyacc_both <asmString, VecDblRegs, VectorRegs, VectorRegs, CVI_VX_DV>; + + +defm V6_vtmpyb_acc : + T_HVX_vmpyacc_WWR <"$dst.h += vtmpy($src1.b,$src2.b)">, + V6_vtmpyb_acc_enc; +defm V6_vtmpybus_acc : + T_HVX_vmpyacc_WWR <"$dst.h += vtmpy($src1.ub,$src2.b)">, + V6_vtmpybus_acc_enc; +defm V6_vtmpyhb_acc : + T_HVX_vmpyacc_WWR <"$dst.w += vtmpy($src1.h,$src2.b)">, + V6_vtmpyhb_acc_enc; +defm V6_vdmpyhb_acc : + T_HVX_vmpyacc_VVR <"$dst.w += vdmpy($src1.h,$src2.b)">, + V6_vdmpyhb_acc_enc; +defm V6_vrmpyub_acc : + T_HVX_vmpyacc_VVR <"$dst.uw += vrmpy($src1.ub,$src2.ub)">, + V6_vrmpyub_acc_enc; +defm V6_vrmpybus_acc : + T_HVX_vmpyacc_VVR <"$dst.w += vrmpy($src1.ub,$src2.b)">, + V6_vrmpybus_acc_enc; +defm V6_vdmpybus_acc : + T_HVX_vmpyacc_VVR <"$dst.h += vdmpy($src1.ub,$src2.b)">, + V6_vdmpybus_acc_enc; +defm V6_vdmpybus_dv_acc : + T_HVX_vmpyacc_WWR <"$dst.h += vdmpy($src1.ub,$src2.b)">, + V6_vdmpybus_dv_acc_enc; +defm V6_vdmpyhsuisat_acc : + T_HVX_vmpyacc_VWR <"$dst.w += vdmpy($src1.h,$src2.uh,#1):sat">, + V6_vdmpyhsuisat_acc_enc; +defm V6_vdmpyhisat_acc : + T_HVX_vmpyacc_VWR <"$dst.w += vdmpy($src1.h,$src2.h):sat">, + V6_vdmpyhisat_acc_enc; +defm V6_vdmpyhb_dv_acc : + T_HVX_vmpyacc_WWR <"$dst.w += vdmpy($src1.h,$src2.b)">, + V6_vdmpyhb_dv_acc_enc; +defm V6_vmpybus_acc : + T_HVX_vmpyacc_WVR <"$dst.h += vmpy($src1.ub,$src2.b)">, + V6_vmpybus_acc_enc; +defm V6_vmpabus_acc : + T_HVX_vmpyacc_WWR <"$dst.h += vmpa($src1.ub,$src2.b)">, + V6_vmpabus_acc_enc; +defm V6_vmpahb_acc : + T_HVX_vmpyacc_WWR <"$dst.w += vmpa($src1.h,$src2.b)">, + V6_vmpahb_acc_enc; +defm V6_vmpyhsat_acc : + T_HVX_vmpyacc_WVR <"$dst.w += vmpy($src1.h,$src2.h):sat">, + V6_vmpyhsat_acc_enc; +defm V6_vmpyuh_acc : + T_HVX_vmpyacc_WVR <"$dst.uw += vmpy($src1.uh,$src2.uh)">, + V6_vmpyuh_acc_enc; +defm V6_vmpyiwb_acc : + T_HVX_vmpyacc_VVR <"$dst.w += vmpyi($src1.w,$src2.b)">, + V6_vmpyiwb_acc_enc; +defm V6_vdsaduh_acc : + T_HVX_vmpyacc_WWR <"$dst.uw += vdsad($src1.uh,$src2.uh)">, + V6_vdsaduh_acc_enc; +defm V6_vmpyihb_acc : + T_HVX_vmpyacc_VVR <"$dst.h += vmpyi($src1.h,$src2.b)">, + V6_vmpyihb_acc_enc; +defm V6_vmpyub_acc : + T_HVX_vmpyacc_WVR <"$dst.uh += vmpy($src1.ub,$src2.ub)">, + V6_vmpyub_acc_enc; + +let Itinerary = CVI_VX_DV, Type = TypeCVI_VX_DV in { +defm V6_vdmpyhsusat_acc : + T_HVX_vmpyacc_VVR <"$dst.w += vdmpy($src1.h,$src2.uh):sat">, + V6_vdmpyhsusat_acc_enc; +defm V6_vdmpyhsat_acc : + T_HVX_vmpyacc_VVR <"$dst.w += vdmpy($src1.h,$src2.h):sat">, + V6_vdmpyhsat_acc_enc; +defm V6_vmpyiwh_acc : T_HVX_vmpyacc_VVR + <"$dst.w += vmpyi($src1.w,$src2.h)">, V6_vmpyiwh_acc_enc; +} + +let Itinerary = CVI_VS, Type = TypeCVI_VS in { +defm V6_vaslw_acc : + T_HVX_vmpyacc_VVR <"$dst.w += vasl($src1.w,$src2)">, V6_vaslw_acc_enc; +defm V6_vasrw_acc : + T_HVX_vmpyacc_VVR <"$dst.w += vasr($src1.w,$src2)">, V6_vasrw_acc_enc; +} + +defm V6_vdmpyhvsat_acc : + T_HVX_vmpyacc_VVV <"$dst.w += vdmpy($src1.h,$src2.h):sat">, + V6_vdmpyhvsat_acc_enc; +defm V6_vmpybusv_acc : + T_HVX_vmpyacc_WVV <"$dst.h += vmpy($src1.ub,$src2.b)">, + V6_vmpybusv_acc_enc; +defm V6_vmpybv_acc : + T_HVX_vmpyacc_WVV <"$dst.h += vmpy($src1.b,$src2.b)">, V6_vmpybv_acc_enc; +defm V6_vmpyhus_acc : + T_HVX_vmpyacc_WVV <"$dst.w += vmpy($src1.h,$src2.uh)">, V6_vmpyhus_acc_enc; +defm V6_vmpyhv_acc : + T_HVX_vmpyacc_WVV <"$dst.w += vmpy($src1.h,$src2.h)">, V6_vmpyhv_acc_enc; +defm V6_vmpyiewh_acc : + T_HVX_vmpyacc_VVV <"$dst.w += vmpyie($src1.w,$src2.h)">, + V6_vmpyiewh_acc_enc; +defm V6_vmpyiewuh_acc : + T_HVX_vmpyacc_VVV <"$dst.w += vmpyie($src1.w,$src2.uh)">, + V6_vmpyiewuh_acc_enc; +defm V6_vmpyih_acc : + T_HVX_vmpyacc_VVV <"$dst.h += vmpyi($src1.h,$src2.h)">, V6_vmpyih_acc_enc; +defm V6_vmpyowh_rnd_sacc : + T_HVX_vmpyacc_VVV <"$dst.w += vmpyo($src1.w,$src2.h):<<1:rnd:sat:shift">, + V6_vmpyowh_rnd_sacc_enc; +defm V6_vmpyowh_sacc : + T_HVX_vmpyacc_VVV <"$dst.w += vmpyo($src1.w,$src2.h):<<1:sat:shift">, + V6_vmpyowh_sacc_enc; +defm V6_vmpyubv_acc : + T_HVX_vmpyacc_WVV <"$dst.uh += vmpy($src1.ub,$src2.ub)">, + V6_vmpyubv_acc_enc; +defm V6_vmpyuhv_acc : + T_HVX_vmpyacc_WVV <"$dst.uw += vmpy($src1.uh,$src2.uh)">, + V6_vmpyuhv_acc_enc; +defm V6_vrmpybusv_acc : + T_HVX_vmpyacc_VVV <"$dst.w += vrmpy($src1.ub,$src2.b)">, + V6_vrmpybusv_acc_enc; +defm V6_vrmpybv_acc : + T_HVX_vmpyacc_VVV <"$dst.w += vrmpy($src1.b,$src2.b)">, V6_vrmpybv_acc_enc; +defm V6_vrmpyubv_acc : + T_HVX_vmpyacc_VVV <"$dst.uw += vrmpy($src1.ub,$src2.ub)">, + V6_vrmpyubv_acc_enc; + + +class T_HVX_vcmp <string asmString, RegisterClass RCout, RegisterClass RCin> + : CVI_VA_Resource1 <(outs RCout:$dst), + (ins RCout:$_src_, RCin:$src1, RCin:$src2), asmString, + [], "$dst = $_src_" > { + let Itinerary = CVI_VA; + let Type = TypeCVI_VA; +} + +multiclass T_HVX_vcmp <string asmString> { + def NAME : T_HVX_vcmp <asmString, VecPredRegs, VectorRegs>; + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_vcmp <asmString, VecPredRegs128B, VectorRegs128B>; +} + +defm V6_veqb_and : + T_HVX_vcmp <"$dst &= vcmp.eq($src1.b,$src2.b)">, V6_veqb_and_enc; +defm V6_veqh_and : + T_HVX_vcmp <"$dst &= vcmp.eq($src1.h,$src2.h)">, V6_veqh_and_enc; +defm V6_veqw_and : + T_HVX_vcmp <"$dst &= vcmp.eq($src1.w,$src2.w)">, V6_veqw_and_enc; +defm V6_vgtb_and : + T_HVX_vcmp <"$dst &= vcmp.gt($src1.b,$src2.b)">, V6_vgtb_and_enc; +defm V6_vgth_and : + T_HVX_vcmp <"$dst &= vcmp.gt($src1.h,$src2.h)">, V6_vgth_and_enc; +defm V6_vgtw_and : + T_HVX_vcmp <"$dst &= vcmp.gt($src1.w,$src2.w)">, V6_vgtw_and_enc; +defm V6_vgtub_and : + T_HVX_vcmp <"$dst &= vcmp.gt($src1.ub,$src2.ub)">, V6_vgtub_and_enc; +defm V6_vgtuh_and : + T_HVX_vcmp <"$dst &= vcmp.gt($src1.uh,$src2.uh)">, V6_vgtuh_and_enc; +defm V6_vgtuw_and : + T_HVX_vcmp <"$dst &= vcmp.gt($src1.uw,$src2.uw)">, V6_vgtuw_and_enc; +defm V6_veqb_or : + T_HVX_vcmp <"$dst |= vcmp.eq($src1.b,$src2.b)">, V6_veqb_or_enc; +defm V6_veqh_or : + T_HVX_vcmp <"$dst |= vcmp.eq($src1.h,$src2.h)">, V6_veqh_or_enc; +defm V6_veqw_or : + T_HVX_vcmp <"$dst |= vcmp.eq($src1.w,$src2.w)">, V6_veqw_or_enc; +defm V6_vgtb_or : + T_HVX_vcmp <"$dst |= vcmp.gt($src1.b,$src2.b)">, V6_vgtb_or_enc; +defm V6_vgth_or : + T_HVX_vcmp <"$dst |= vcmp.gt($src1.h,$src2.h)">, V6_vgth_or_enc; +defm V6_vgtw_or : + T_HVX_vcmp <"$dst |= vcmp.gt($src1.w,$src2.w)">, V6_vgtw_or_enc; +defm V6_vgtub_or : + T_HVX_vcmp <"$dst |= vcmp.gt($src1.ub,$src2.ub)">, V6_vgtub_or_enc; +defm V6_vgtuh_or : + T_HVX_vcmp <"$dst |= vcmp.gt($src1.uh,$src2.uh)">, V6_vgtuh_or_enc; +defm V6_vgtuw_or : + T_HVX_vcmp <"$dst |= vcmp.gt($src1.uw,$src2.uw)">, V6_vgtuw_or_enc; +defm V6_veqb_xor : + T_HVX_vcmp <"$dst ^= vcmp.eq($src1.b,$src2.b)">, V6_veqb_xor_enc; +defm V6_veqh_xor : + T_HVX_vcmp <"$dst ^= vcmp.eq($src1.h,$src2.h)">, V6_veqh_xor_enc; +defm V6_veqw_xor : + T_HVX_vcmp <"$dst ^= vcmp.eq($src1.w,$src2.w)">, V6_veqw_xor_enc; +defm V6_vgtb_xor : + T_HVX_vcmp <"$dst ^= vcmp.gt($src1.b,$src2.b)">, V6_vgtb_xor_enc; +defm V6_vgth_xor : + T_HVX_vcmp <"$dst ^= vcmp.gt($src1.h,$src2.h)">, V6_vgth_xor_enc; +defm V6_vgtw_xor : + T_HVX_vcmp <"$dst ^= vcmp.gt($src1.w,$src2.w)">, V6_vgtw_xor_enc; +defm V6_vgtub_xor : + T_HVX_vcmp <"$dst ^= vcmp.gt($src1.ub,$src2.ub)">, V6_vgtub_xor_enc; +defm V6_vgtuh_xor : + T_HVX_vcmp <"$dst ^= vcmp.gt($src1.uh,$src2.uh)">, V6_vgtuh_xor_enc; +defm V6_vgtuw_xor : + T_HVX_vcmp <"$dst ^= vcmp.gt($src1.uw,$src2.uw)">, V6_vgtuw_xor_enc; + +defm V6_vminub : + T_HVX_alu_VV <"$dst.ub = vmin($src1.ub,$src2.ub)">, V6_vminub_enc; +defm V6_vminuh : + T_HVX_alu_VV <"$dst.uh = vmin($src1.uh,$src2.uh)">, V6_vminuh_enc; +defm V6_vminh : + T_HVX_alu_VV <"$dst.h = vmin($src1.h,$src2.h)">, V6_vminh_enc; +defm V6_vminw : + T_HVX_alu_VV <"$dst.w = vmin($src1.w,$src2.w)">, V6_vminw_enc; +defm V6_vmaxub : + T_HVX_alu_VV <"$dst.ub = vmax($src1.ub,$src2.ub)">, V6_vmaxub_enc; +defm V6_vmaxuh : + T_HVX_alu_VV <"$dst.uh = vmax($src1.uh,$src2.uh)">, V6_vmaxuh_enc; +defm V6_vmaxh : + T_HVX_alu_VV <"$dst.h = vmax($src1.h,$src2.h)">, V6_vmaxh_enc; +defm V6_vmaxw : + T_HVX_alu_VV <"$dst.w = vmax($src1.w,$src2.w)">, V6_vmaxw_enc; +defm V6_vshuffeb : + T_HVX_alu_VV <"$dst.b = vshuffe($src1.b,$src2.b)">, V6_vshuffeb_enc; +defm V6_vshuffob : + T_HVX_alu_VV <"$dst.b = vshuffo($src1.b,$src2.b)">, V6_vshuffob_enc; +defm V6_vshufeh : + T_HVX_alu_VV <"$dst.h = vshuffe($src1.h,$src2.h)">, V6_vshufeh_enc; +defm V6_vshufoh : + T_HVX_alu_VV <"$dst.h = vshuffo($src1.h,$src2.h)">, V6_vshufoh_enc; + +let Itinerary = CVI_VX_DV, Type = TypeCVI_VX_DV in { +defm V6_vmpyowh_rnd : + T_HVX_alu_VV <"$dst.w = vmpyo($src1.w,$src2.h):<<1:rnd:sat">, + V6_vmpyowh_rnd_enc; +defm V6_vmpyiewuh : + T_HVX_alu_VV <"$dst.w = vmpyie($src1.w,$src2.uh)">, V6_vmpyiewuh_enc; +defm V6_vmpyewuh : + T_HVX_alu_VV <"$dst.w = vmpye($src1.w,$src2.uh)">, V6_vmpyewuh_enc; +defm V6_vmpyowh : + T_HVX_alu_VV <"$dst.w = vmpyo($src1.w,$src2.h):<<1:sat">, V6_vmpyowh_enc; +defm V6_vmpyiowh : + T_HVX_alu_VV <"$dst.w = vmpyio($src1.w,$src2.h)">, V6_vmpyiowh_enc; +} +let Itinerary = CVI_VX, Type = TypeCVI_VX in +defm V6_vmpyieoh : + T_HVX_alu_VV <"$dst.w = vmpyieo($src1.h,$src2.h)">, V6_vmpyieoh_enc; + +let Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV in { +defm V6_vshufoeh : + T_HVX_alu_WV <"$dst.h = vshuffoe($src1.h,$src2.h)">, V6_vshufoeh_enc; +defm V6_vshufoeb : + T_HVX_alu_WV <"$dst.b = vshuffoe($src1.b,$src2.b)">, V6_vshufoeb_enc; +} + +let isRegSequence = 1, Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV in +defm V6_vcombine : + T_HVX_alu_WV <"$dst = vcombine($src1,$src2)">, V6_vcombine_enc; + +def SDTHexagonVCOMBINE: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>, + SDTCisSubVecOfVec<1, 0>]>; + +def HexagonVCOMBINE: SDNode<"HexagonISD::VCOMBINE", SDTHexagonVCOMBINE>; + +def: Pat<(v32i32 (HexagonVCOMBINE (v16i32 VectorRegs:$Vs), + (v16i32 VectorRegs:$Vt))), + (V6_vcombine VectorRegs:$Vs, VectorRegs:$Vt)>, + Requires<[UseHVXSgl]>; +def: Pat<(v64i32 (HexagonVCOMBINE (v32i32 VecDblRegs:$Vs), + (v32i32 VecDblRegs:$Vt))), + (V6_vcombine_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>, + Requires<[UseHVXDbl]>; + +let Itinerary = CVI_VINLANESAT, Type = TypeCVI_VINLANESAT in { +defm V6_vsathub : + T_HVX_alu_VV <"$dst.ub = vsat($src1.h,$src2.h)">, V6_vsathub_enc; +defm V6_vsatwh : + T_HVX_alu_VV <"$dst.h = vsat($src1.w,$src2.w)">, V6_vsatwh_enc; +} + +let Itinerary = CVI_VS, Type = TypeCVI_VS in { +defm V6_vroundwh : + T_HVX_alu_VV <"$dst.h = vround($src1.w,$src2.w):sat">, V6_vroundwh_enc; +defm V6_vroundwuh : + T_HVX_alu_VV <"$dst.uh = vround($src1.w,$src2.w):sat">, V6_vroundwuh_enc; +defm V6_vroundhb : + T_HVX_alu_VV <"$dst.b = vround($src1.h,$src2.h):sat">, V6_vroundhb_enc; +defm V6_vroundhub : + T_HVX_alu_VV <"$dst.ub = vround($src1.h,$src2.h):sat">, V6_vroundhub_enc; +defm V6_vasrwv : + T_HVX_alu_VV <"$dst.w = vasr($src1.w,$src2.w)">, V6_vasrwv_enc; +defm V6_vlsrwv : + T_HVX_alu_VV <"$dst.w = vlsr($src1.w,$src2.w)">, V6_vlsrwv_enc; +defm V6_vlsrhv : + T_HVX_alu_VV <"$dst.h = vlsr($src1.h,$src2.h)">, V6_vlsrhv_enc; +defm V6_vasrhv : + T_HVX_alu_VV <"$dst.h = vasr($src1.h,$src2.h)">, V6_vasrhv_enc; +defm V6_vaslwv : + T_HVX_alu_VV <"$dst.w = vasl($src1.w,$src2.w)">, V6_vaslwv_enc; +defm V6_vaslhv : + T_HVX_alu_VV <"$dst.h = vasl($src1.h,$src2.h)">, V6_vaslhv_enc; +} + +defm V6_vaddb : + T_HVX_alu_VV <"$dst.b = vadd($src1.b,$src2.b)">, V6_vaddb_enc; +defm V6_vaddh : + T_HVX_alu_VV <"$dst.h = vadd($src1.h,$src2.h)">, V6_vaddh_enc; + +let Itinerary = CVI_VP, Type = TypeCVI_VP in { +defm V6_vdelta : + T_HVX_alu_VV <"$dst = vdelta($src1,$src2)">, V6_vdelta_enc; +defm V6_vrdelta : + T_HVX_alu_VV <"$dst = vrdelta($src1,$src2)">, V6_vrdelta_enc; +defm V6_vdealb4w : + T_HVX_alu_VV <"$dst.b = vdeale($src1.b,$src2.b)">, V6_vdealb4w_enc; +defm V6_vpackeb : + T_HVX_alu_VV <"$dst.b = vpacke($src1.h,$src2.h)">, V6_vpackeb_enc; +defm V6_vpackeh : + T_HVX_alu_VV <"$dst.h = vpacke($src1.w,$src2.w)">, V6_vpackeh_enc; +defm V6_vpackhub_sat : + T_HVX_alu_VV <"$dst.ub = vpack($src1.h,$src2.h):sat">, V6_vpackhub_sat_enc; +defm V6_vpackhb_sat : + T_HVX_alu_VV <"$dst.b = vpack($src1.h,$src2.h):sat">, V6_vpackhb_sat_enc; +defm V6_vpackwuh_sat : + T_HVX_alu_VV <"$dst.uh = vpack($src1.w,$src2.w):sat">, V6_vpackwuh_sat_enc; +defm V6_vpackwh_sat : + T_HVX_alu_VV <"$dst.h = vpack($src1.w,$src2.w):sat">, V6_vpackwh_sat_enc; +defm V6_vpackob : + T_HVX_alu_VV <"$dst.b = vpacko($src1.h,$src2.h)">, V6_vpackob_enc; +defm V6_vpackoh : + T_HVX_alu_VV <"$dst.h = vpacko($src1.w,$src2.w)">, V6_vpackoh_enc; +} + +let hasNewValue = 1, hasSideEffects = 0 in +class T_HVX_condALU <string asmString, RegisterClass RC1, RegisterClass RC2> + : CVI_VA_Resource1 <(outs RC2:$dst), + (ins RC1:$src1, RC2:$_src_, RC2:$src2), asmString, + [], "$dst = $_src_" > { + let Itinerary = CVI_VA; + let Type = TypeCVI_VA; +} + +multiclass T_HVX_condALU <string asmString> { + def NAME : T_HVX_condALU <asmString, VecPredRegs, VectorRegs>; + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_condALU <asmString, VecPredRegs128B, VectorRegs128B>; +} + +defm V6_vaddbq : T_HVX_condALU <"if ($src1) $dst.b += $src2.b">, + V6_vaddbq_enc; +defm V6_vaddhq : T_HVX_condALU <"if ($src1) $dst.h += $src2.h">, + V6_vaddhq_enc; +defm V6_vaddwq : T_HVX_condALU <"if ($src1) $dst.w += $src2.w">, + V6_vaddwq_enc; +defm V6_vsubbq : T_HVX_condALU <"if ($src1) $dst.b -= $src2.b">, + V6_vsubbq_enc; +defm V6_vsubhq : T_HVX_condALU <"if ($src1) $dst.h -= $src2.h">, + V6_vsubhq_enc; +defm V6_vsubwq : T_HVX_condALU <"if ($src1) $dst.w -= $src2.w">, + V6_vsubwq_enc; +defm V6_vaddbnq : T_HVX_condALU <"if (!$src1) $dst.b += $src2.b">, + V6_vaddbnq_enc; +defm V6_vaddhnq : T_HVX_condALU <"if (!$src1) $dst.h += $src2.h">, + V6_vaddhnq_enc; +defm V6_vaddwnq : T_HVX_condALU <"if (!$src1) $dst.w += $src2.w">, + V6_vaddwnq_enc; +defm V6_vsubbnq : T_HVX_condALU <"if (!$src1) $dst.b -= $src2.b">, + V6_vsubbnq_enc; +defm V6_vsubhnq : T_HVX_condALU <"if (!$src1) $dst.h -= $src2.h">, + V6_vsubhnq_enc; +defm V6_vsubwnq : T_HVX_condALU <"if (!$src1) $dst.w -= $src2.w">, + V6_vsubwnq_enc; + +let hasNewValue = 1 in +class T_HVX_alu_2op <string asmString, InstrItinClass itin, + RegisterClass RCout, RegisterClass RCin> + : CVI_VA_Resource1 <(outs RCout:$dst), (ins RCin:$src1), + asmString >{ + let Itinerary = itin; + let Type = !cast<IType>("Type"#itin); +} + +multiclass T_HVX_alu_2op <string asmString, RegisterClass RCout, + RegisterClass RCin, InstrItinClass itin> { + def NAME : T_HVX_alu_2op <asmString, itin, RCout, RCin>; + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_alu_2op <asmString, itin, + !cast<RegisterClass>(RCout#"128B"), + !cast<RegisterClass>(RCin#"128B")>; +} + +let hasNewValue = 1 in +multiclass T_HVX_alu_2op_VV <string asmString>: + T_HVX_alu_2op <asmString, VectorRegs, VectorRegs, CVI_VA>; + +multiclass T_HVX_alu_2op_WV <string asmString>: + T_HVX_alu_2op <asmString, VecDblRegs, VectorRegs, CVI_VA_DV>; + + +defm V6_vabsh : T_HVX_alu_2op_VV <"$dst.h = vabs($src1.h)">, + V6_vabsh_enc; +defm V6_vabsw : T_HVX_alu_2op_VV <"$dst.w = vabs($src1.w)">, + V6_vabsw_enc; +defm V6_vabsh_sat : T_HVX_alu_2op_VV <"$dst.h = vabs($src1.h):sat">, + V6_vabsh_sat_enc; +defm V6_vabsw_sat : T_HVX_alu_2op_VV <"$dst.w = vabs($src1.w):sat">, + V6_vabsw_sat_enc; +defm V6_vnot : T_HVX_alu_2op_VV <"$dst = vnot($src1)">, + V6_vnot_enc; +defm V6_vassign : T_HVX_alu_2op_VV <"$dst = $src1">, + V6_vassign_enc; + +defm V6_vzb : T_HVX_alu_2op_WV <"$dst.uh = vzxt($src1.ub)">, + V6_vzb_enc; +defm V6_vzh : T_HVX_alu_2op_WV <"$dst.uw = vzxt($src1.uh)">, + V6_vzh_enc; +defm V6_vsb : T_HVX_alu_2op_WV <"$dst.h = vsxt($src1.b)">, + V6_vsb_enc; +defm V6_vsh : T_HVX_alu_2op_WV <"$dst.w = vsxt($src1.h)">, + V6_vsh_enc; + +let Itinerary = CVI_VP, Type = TypeCVI_VP in { +defm V6_vdealh : T_HVX_alu_2op_VV <"$dst.h = vdeal($src1.h)">, + V6_vdealh_enc; +defm V6_vdealb : T_HVX_alu_2op_VV <"$dst.b = vdeal($src1.b)">, + V6_vdealb_enc; +defm V6_vshuffh : T_HVX_alu_2op_VV <"$dst.h = vshuff($src1.h)">, + V6_vshuffh_enc; +defm V6_vshuffb : T_HVX_alu_2op_VV <"$dst.b = vshuff($src1.b)">, + V6_vshuffb_enc; +} + +let Itinerary = CVI_VP_VS, Type = TypeCVI_VP_VS in { +defm V6_vunpackub : T_HVX_alu_2op_WV <"$dst.uh = vunpack($src1.ub)">, + V6_vunpackub_enc; +defm V6_vunpackuh : T_HVX_alu_2op_WV <"$dst.uw = vunpack($src1.uh)">, + V6_vunpackuh_enc; +defm V6_vunpackb : T_HVX_alu_2op_WV <"$dst.h = vunpack($src1.b)">, + V6_vunpackb_enc; +defm V6_vunpackh : T_HVX_alu_2op_WV <"$dst.w = vunpack($src1.h)">, + V6_vunpackh_enc; +} + +let Itinerary = CVI_VS, Type = TypeCVI_VS in { +defm V6_vcl0w : T_HVX_alu_2op_VV <"$dst.uw = vcl0($src1.uw)">, + V6_vcl0w_enc; +defm V6_vcl0h : T_HVX_alu_2op_VV <"$dst.uh = vcl0($src1.uh)">, + V6_vcl0h_enc; +defm V6_vnormamtw : T_HVX_alu_2op_VV <"$dst.w = vnormamt($src1.w)">, + V6_vnormamtw_enc; +defm V6_vnormamth : T_HVX_alu_2op_VV <"$dst.h = vnormamt($src1.h)">, + V6_vnormamth_enc; +defm V6_vpopcounth : T_HVX_alu_2op_VV <"$dst.h = vpopcount($src1.h)">, + V6_vpopcounth_enc; +} + +let isAccumulator = 1, hasNewValue = 1, Itinerary = CVI_VX_DV_LONG, + Type = TypeCVI_VX_DV in +class T_HVX_vmpyacc2 <string asmString, RegisterClass RC> + : CVI_VA_Resource1 <(outs RC:$dst), + (ins RC:$_src_, RC:$src1, IntRegs:$src2, u1Imm:$src3), + asmString, [], "$dst = $_src_" > ; + + +multiclass T_HVX_vmpyacc2 <string asmString> { + def NAME : T_HVX_vmpyacc2 <asmString, VecDblRegs>; + + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_vmpyacc2 <asmString, VecDblRegs128B>; +} + +defm V6_vrmpybusi_acc : + T_HVX_vmpyacc2<"$dst.w += vrmpy($src1.ub,$src2.b,#$src3)">, + V6_vrmpybusi_acc_enc; +defm V6_vrsadubi_acc : + T_HVX_vmpyacc2<"$dst.uw += vrsad($src1.ub,$src2.ub,#$src3)">, + V6_vrsadubi_acc_enc; +defm V6_vrmpyubi_acc : + T_HVX_vmpyacc2<"$dst.uw += vrmpy($src1.ub,$src2.ub,#$src3)">, + V6_vrmpyubi_acc_enc; + + +let Itinerary = CVI_VX_DV_LONG, Type = TypeCVI_VX_DV, hasNewValue = 1 in +class T_HVX_vmpy2 <string asmString, RegisterClass RC> + : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1, IntRegs:$src2, u1Imm:$src3), + asmString>; + + +multiclass T_HVX_vmpy2 <string asmString> { + def NAME : T_HVX_vmpy2 <asmString, VecDblRegs>; + + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_vmpy2 <asmString, VecDblRegs128B>; +} + +defm V6_vrmpybusi : + T_HVX_vmpy2 <"$dst.w = vrmpy($src1.ub,$src2.b,#$src3)">, V6_vrmpybusi_enc; +defm V6_vrsadubi : + T_HVX_vmpy2 <"$dst.uw = vrsad($src1.ub,$src2.ub,#$src3)">, V6_vrsadubi_enc; +defm V6_vrmpyubi : + T_HVX_vmpy2 <"$dst.uw = vrmpy($src1.ub,$src2.ub,#$src3)">, V6_vrmpyubi_enc; + + +let Itinerary = CVI_VP_VS_LONG_EARLY, Type = TypeCVI_VP_VS, + hasSideEffects = 0, hasNewValue2 = 1, opNewValue2 = 1 in +class T_HVX_perm <string asmString, RegisterClass RC> + : CVI_VA_Resource1 <(outs RC:$_dst1_, RC:$_dst2_), + (ins RC:$src1, RC:$src2, IntRegs:$src3), + asmString, [], "$_dst1_ = $src1, $_dst2_ = $src2" >; + +multiclass T_HVX_perm <string asmString> { + def NAME : T_HVX_perm <asmString, VectorRegs>; + + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_perm <asmString, VectorRegs128B>; +} + +let hasNewValue = 1, opNewValue = 0, hasNewValue2 = 1, opNewValue2 = 1 in { + defm V6_vshuff : T_HVX_perm <"vshuff($src1,$src2,$src3)">, V6_vshuff_enc; + defm V6_vdeal : T_HVX_perm <"vdeal($src1,$src2,$src3)">, V6_vdeal_enc; +} + +// Conditional vector move. +let isPredicated = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +class T_HVX_cmov <bit isPredNot, RegisterClass RC> + : CVI_VA_Resource1 <(outs RC:$dst), (ins PredRegs:$src1, RC:$src2), + "if ("#!if(isPredNot, "!", "")#"$src1) $dst = $src2"> { + let isPredicatedFalse = isPredNot; +} + +multiclass T_HVX_cmov <bit isPredNot = 0> { + def NAME : T_HVX_cmov <isPredNot, VectorRegs>; + + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_cmov <isPredNot, VectorRegs128B>; +} + +defm V6_vcmov : T_HVX_cmov, V6_vcmov_enc; +defm V6_vncmov : T_HVX_cmov<1>, V6_vncmov_enc; + +// Conditional vector combine. +let Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV, isPredicated = 1, + hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +class T_HVX_ccombine <bit isPredNot, RegisterClass RCout, RegisterClass RCin> + : CVI_VA_Resource1 < (outs RCout:$dst), + (ins PredRegs:$src1, RCin:$src2, RCin:$src3), + "if ("#!if(isPredNot, "!", "")#"$src1) $dst = vcombine($src2,$src3)"> { + let isPredicatedFalse = isPredNot; +} + +multiclass T_HVX_ccombine <bit isPredNot = 0> { + def NAME : T_HVX_ccombine <isPredNot, VecDblRegs, VectorRegs>; + + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_ccombine <isPredNot, VecDblRegs128B, VectorRegs128B>; +} + +defm V6_vccombine : T_HVX_ccombine, V6_vccombine_enc; +defm V6_vnccombine : T_HVX_ccombine<1>, V6_vnccombine_enc; + +let hasNewValue = 1 in +class T_HVX_shift <string asmString, RegisterClass RCout, RegisterClass RCin> + : CVI_VX_DV_Resource1<(outs RCout:$dst), + (ins RCin:$src1, RCin:$src2, IntRegsLow8:$src3), + asmString >; + +multiclass T_HVX_shift <string asmString, RegisterClass RCout, + RegisterClass RCin> { + def NAME : T_HVX_shift <asmString, RCout, RCin>; + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_shift <asmString, !cast<RegisterClass>(RCout#"128B"), + !cast<RegisterClass>(RCin#"128B")>; +} + +multiclass T_HVX_shift_VV <string asmString>: + T_HVX_shift <asmString, VectorRegs, VectorRegs>; + +multiclass T_HVX_shift_WV <string asmString>: + T_HVX_shift <asmString, VecDblRegs, VectorRegs>; + +let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP in { +defm V6_valignb : + T_HVX_shift_VV <"$dst = valign($src1,$src2,$src3)">, V6_valignb_enc; +defm V6_vlalignb : + T_HVX_shift_VV <"$dst = vlalign($src1,$src2,$src3)">, V6_vlalignb_enc; +} + +let Itinerary = CVI_VS, Type = TypeCVI_VS in { +defm V6_vasrwh : + T_HVX_shift_VV <"$dst.h = vasr($src1.w,$src2.w,$src3)">, V6_vasrwh_enc; +defm V6_vasrwhsat : + T_HVX_shift_VV <"$dst.h = vasr($src1.w,$src2.w,$src3):sat">, + V6_vasrwhsat_enc; +defm V6_vasrwhrndsat : + T_HVX_shift_VV <"$dst.h = vasr($src1.w,$src2.w,$src3):rnd:sat">, + V6_vasrwhrndsat_enc; +defm V6_vasrwuhsat : + T_HVX_shift_VV <"$dst.uh = vasr($src1.w,$src2.w,$src3):sat">, + V6_vasrwuhsat_enc; +defm V6_vasrhubsat : + T_HVX_shift_VV <"$dst.ub = vasr($src1.h,$src2.h,$src3):sat">, + V6_vasrhubsat_enc; +defm V6_vasrhubrndsat : + T_HVX_shift_VV <"$dst.ub = vasr($src1.h,$src2.h,$src3):rnd:sat">, + V6_vasrhubrndsat_enc; +defm V6_vasrhbrndsat : + T_HVX_shift_VV <"$dst.b = vasr($src1.h,$src2.h,$src3):rnd:sat">, + V6_vasrhbrndsat_enc; +} + +// Assembler mapped -- alias? +//defm V6_vtran2x2vdd : T_HVX_shift_VV <"">, V6_vtran2x2vdd_enc; +let Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS in { +defm V6_vshuffvdd : + T_HVX_shift_WV <"$dst = vshuff($src1,$src2,$src3)">, V6_vshuffvdd_enc; +defm V6_vdealvdd : + T_HVX_shift_WV <"$dst = vdeal($src1,$src2,$src3)">, V6_vdealvdd_enc; +} + +let hasNewValue = 1, Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS in +class T_HVX_unpack <string asmString, RegisterClass RCout, RegisterClass RCin> + : CVI_VX_DV_Resource1<(outs RCout:$dst), (ins RCout:$_src_, RCin:$src1), + asmString, [], "$dst = $_src_">; + +multiclass T_HVX_unpack <string asmString> { + def NAME : T_HVX_unpack <asmString, VecDblRegs, VectorRegs>; + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_unpack <asmString, VecDblRegs128B, VectorRegs128B>; +} + +defm V6_vunpackob : T_HVX_unpack <"$dst.h |= vunpacko($src1.b)">, V6_vunpackob_enc; +defm V6_vunpackoh : T_HVX_unpack <"$dst.w |= vunpacko($src1.h)">, V6_vunpackoh_enc; + +let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP, hasNewValue = 1, + hasSideEffects = 0 in +class T_HVX_valign <string asmString, RegisterClass RC> + : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1, RC:$src2, u3Imm:$src3), + asmString>; + +multiclass T_HVX_valign <string asmString> { + def NAME : T_HVX_valign <asmString, VectorRegs>; + + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_valign <asmString, VectorRegs128B>; +} + +defm V6_valignbi : + T_HVX_valign <"$dst = valign($src1,$src2,#$src3)">, V6_valignbi_enc; +defm V6_vlalignbi : + T_HVX_valign <"$dst = vlalign($src1,$src2,#$src3)">, V6_vlalignbi_enc; + +let Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV in +class T_HVX_predAlu <string asmString, RegisterClass RC> + : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1, RC:$src2), + asmString>; + +multiclass T_HVX_predAlu <string asmString> { + def NAME : T_HVX_predAlu <asmString, VecPredRegs>; + + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_predAlu <asmString, VecPredRegs128B>; +} + +defm V6_pred_and : T_HVX_predAlu <"$dst = and($src1,$src2)">, V6_pred_and_enc; +defm V6_pred_or : T_HVX_predAlu <"$dst = or($src1,$src2)">, V6_pred_or_enc; +defm V6_pred_xor : T_HVX_predAlu <"$dst = xor($src1,$src2)">, V6_pred_xor_enc; +defm V6_pred_or_n : T_HVX_predAlu <"$dst = or($src1,!$src2)">, V6_pred_or_n_enc; +defm V6_pred_and_n : + T_HVX_predAlu <"$dst = and($src1,!$src2)">, V6_pred_and_n_enc; + +let Itinerary = CVI_VA, Type = TypeCVI_VA in +class T_HVX_prednot <RegisterClass RC> + : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1), + "$dst = not($src1)">, V6_pred_not_enc; + +def V6_pred_not : T_HVX_prednot <VecPredRegs>; +let isCodeGenOnly = 1 in +def V6_pred_not_128B : T_HVX_prednot <VecPredRegs128B>; + +let Itinerary = CVI_VA, Type = TypeCVI_VA in +class T_HVX_vcmp2 <string asmString, RegisterClass RCout, RegisterClass RCin> + : CVI_VA_Resource1 <(outs RCout:$dst), (ins RCin:$src1, RCin:$src2), + asmString >; + +multiclass T_HVX_vcmp2 <string asmString> { + def NAME : T_HVX_vcmp2 <asmString, VecPredRegs, VectorRegs>; + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_vcmp2 <asmString, VecPredRegs128B, VectorRegs128B>; +} + +defm V6_veqb : T_HVX_vcmp2 <"$dst = vcmp.eq($src1.b,$src2.b)">, V6_veqb_enc; +defm V6_veqh : T_HVX_vcmp2 <"$dst = vcmp.eq($src1.h,$src2.h)">, V6_veqh_enc; +defm V6_veqw : T_HVX_vcmp2 <"$dst = vcmp.eq($src1.w,$src2.w)">, V6_veqw_enc; +defm V6_vgtb : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.b,$src2.b)">, V6_vgtb_enc; +defm V6_vgth : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.h,$src2.h)">, V6_vgth_enc; +defm V6_vgtw : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.w,$src2.w)">, V6_vgtw_enc; +defm V6_vgtub : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.ub,$src2.ub)">, V6_vgtub_enc; +defm V6_vgtuh : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.uh,$src2.uh)">, V6_vgtuh_enc; +defm V6_vgtuw : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.uw,$src2.uw)">, V6_vgtuw_enc; + +let isAccumulator = 1, hasNewValue = 1, hasSideEffects = 0 in +class T_V6_vandqrt_acc <RegisterClass RCout, RegisterClass RCin> + : CVI_VX_Resource_late<(outs RCout:$dst), + (ins RCout:$_src_, RCin:$src1, IntRegs:$src2), + "$dst |= vand($src1,$src2)", [], "$dst = $_src_">, V6_vandqrt_acc_enc; + +def V6_vandqrt_acc : T_V6_vandqrt_acc <VectorRegs, VecPredRegs>; +let isCodeGenOnly = 1 in +def V6_vandqrt_acc_128B : T_V6_vandqrt_acc <VectorRegs128B, VecPredRegs128B>; + +let isAccumulator = 1 in +class T_V6_vandvrt_acc <RegisterClass RCout, RegisterClass RCin> + : CVI_VX_Resource_late<(outs RCout:$dst), + (ins RCout:$_src_, RCin:$src1, IntRegs:$src2), + "$dst |= vand($src1,$src2)", [], "$dst = $_src_">, V6_vandvrt_acc_enc; + +def V6_vandvrt_acc : T_V6_vandvrt_acc <VecPredRegs, VectorRegs>; +let isCodeGenOnly = 1 in +def V6_vandvrt_acc_128B : T_V6_vandvrt_acc <VecPredRegs128B, VectorRegs128B>; + +let hasNewValue = 1, hasSideEffects = 0 in +class T_V6_vandqrt <RegisterClass RCout, RegisterClass RCin> + : CVI_VX_Resource_late<(outs RCout:$dst), + (ins RCin:$src1, IntRegs:$src2), + "$dst = vand($src1,$src2)" >, V6_vandqrt_enc; + +def V6_vandqrt : T_V6_vandqrt <VectorRegs, VecPredRegs>; +let isCodeGenOnly = 1 in +def V6_vandqrt_128B : T_V6_vandqrt <VectorRegs128B, VecPredRegs128B>; + +let hasNewValue = 1, hasSideEffects = 0 in +class T_V6_lvsplatw <RegisterClass RC> + : CVI_VX_Resource_late<(outs RC:$dst), (ins IntRegs:$src1), + "$dst = vsplat($src1)" >, V6_lvsplatw_enc; + +def V6_lvsplatw : T_V6_lvsplatw <VectorRegs>; +let isCodeGenOnly = 1 in +def V6_lvsplatw_128B : T_V6_lvsplatw <VectorRegs128B>; + + +let hasNewValue = 1 in +class T_V6_vinsertwr <RegisterClass RC> + : CVI_VX_Resource_late<(outs RC:$dst), (ins RC:$_src_, IntRegs:$src1), + "$dst.w = vinsert($src1)", [], "$dst = $_src_">, + V6_vinsertwr_enc; + +def V6_vinsertwr : T_V6_vinsertwr <VectorRegs>; +let isCodeGenOnly = 1 in +def V6_vinsertwr_128B : T_V6_vinsertwr <VectorRegs128B>; + + +let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP in +class T_V6_pred_scalar2 <RegisterClass RC> + : CVI_VA_Resource1<(outs RC:$dst), (ins IntRegs:$src1), + "$dst = vsetq($src1)">, V6_pred_scalar2_enc; + +def V6_pred_scalar2 : T_V6_pred_scalar2 <VecPredRegs>; +let isCodeGenOnly = 1 in +def V6_pred_scalar2_128B : T_V6_pred_scalar2 <VecPredRegs128B>; + +class T_V6_vandvrt <RegisterClass RCout, RegisterClass RCin> + : CVI_VX_Resource_late<(outs RCout:$dst), (ins RCin:$src1, IntRegs:$src2), + "$dst = vand($src1,$src2)">, V6_vandvrt_enc; + +def V6_vandvrt : T_V6_vandvrt <VecPredRegs, VectorRegs>; +let isCodeGenOnly = 1 in +def V6_vandvrt_128B : T_V6_vandvrt <VecPredRegs128B, VectorRegs128B>; + +let validSubTargets = HasV60SubT in +class T_HVX_rol <string asmString, RegisterClass RC, Operand ImmOp > + : SInst2 <(outs RC:$dst), (ins RC:$src1, ImmOp:$src2), asmString>; + +class T_HVX_rol_R <string asmString> + : T_HVX_rol <asmString, IntRegs, u5Imm>; +class T_HVX_rol_P <string asmString> + : T_HVX_rol <asmString, DoubleRegs, u6Imm>; + +def S6_rol_i_p : T_HVX_rol_P <"$dst = rol($src1,#$src2)">, S6_rol_i_p_enc; +let hasNewValue = 1, opNewValue = 0 in +def S6_rol_i_r : T_HVX_rol_R <"$dst = rol($src1,#$src2)">, S6_rol_i_r_enc; + +let validSubTargets = HasV60SubT in +class T_HVX_rol_acc <string asmString, RegisterClass RC, Operand ImmOp> + : SInst2 <(outs RC:$dst), (ins RC:$_src_, RC:$src1, ImmOp:$src2), + asmString, [], "$dst = $_src_" >; + +class T_HVX_rol_acc_P <string asmString> + : T_HVX_rol_acc <asmString, DoubleRegs, u6Imm>; + +class T_HVX_rol_acc_R <string asmString> + : T_HVX_rol_acc <asmString, IntRegs, u5Imm>; + +def S6_rol_i_p_nac : + T_HVX_rol_acc_P <"$dst -= rol($src1,#$src2)">, S6_rol_i_p_nac_enc; +def S6_rol_i_p_acc : + T_HVX_rol_acc_P <"$dst += rol($src1,#$src2)">, S6_rol_i_p_acc_enc; +def S6_rol_i_p_and : + T_HVX_rol_acc_P <"$dst &= rol($src1,#$src2)">, S6_rol_i_p_and_enc; +def S6_rol_i_p_or : + T_HVX_rol_acc_P <"$dst |= rol($src1,#$src2)">, S6_rol_i_p_or_enc; +def S6_rol_i_p_xacc : + T_HVX_rol_acc_P<"$dst ^= rol($src1,#$src2)">, S6_rol_i_p_xacc_enc; + +let hasNewValue = 1, opNewValue = 0 in { +def S6_rol_i_r_nac : + T_HVX_rol_acc_R <"$dst -= rol($src1,#$src2)">, S6_rol_i_r_nac_enc; +def S6_rol_i_r_acc : + T_HVX_rol_acc_R <"$dst += rol($src1,#$src2)">, S6_rol_i_r_acc_enc; +def S6_rol_i_r_and : + T_HVX_rol_acc_R <"$dst &= rol($src1,#$src2)">, S6_rol_i_r_and_enc; +def S6_rol_i_r_or : + T_HVX_rol_acc_R <"$dst |= rol($src1,#$src2)">, S6_rol_i_r_or_enc; +def S6_rol_i_r_xacc : + T_HVX_rol_acc_R <"$dst ^= rol($src1,#$src2)">, S6_rol_i_r_xacc_enc; +} + +let isSolo = 1, Itinerary = LD_tc_ld_SLOT0, Type = TypeLD in +class T_V6_extractw <RegisterClass RC> + : LD1Inst <(outs IntRegs:$dst), (ins RC:$src1, IntRegs:$src2), + "$dst = vextract($src1,$src2)">, V6_extractw_enc; + +def V6_extractw : T_V6_extractw <VectorRegs>; +let isCodeGenOnly = 1 in +def V6_extractw_128B : T_V6_extractw <VectorRegs128B>; + +let Itinerary = ST_tc_st_SLOT0, validSubTargets = HasV55SubT in +class T_sys0op <string asmString> + : ST1Inst <(outs), (ins), asmString>; + +let isSolo = 1, validSubTargets = HasV55SubT in { +def Y5_l2gunlock : T_sys0op <"l2gunlock">, Y5_l2gunlock_enc; +def Y5_l2gclean : T_sys0op <"l2gclean">, Y5_l2gclean_enc; +def Y5_l2gcleaninv : T_sys0op <"l2gcleaninv">, Y5_l2gcleaninv_enc; +} + +class T_sys1op <string asmString, RegisterClass RC> + : ST1Inst <(outs), (ins RC:$src1), asmString>; + +class T_sys1op_R <string asmString> : T_sys1op <asmString, IntRegs>; +class T_sys1op_P <string asmString> : T_sys1op <asmString, DoubleRegs>; + +let isSoloAX = 1, validSubTargets = HasV55SubT in +def Y5_l2unlocka : T_sys1op_R <"l2unlocka($src1)">, Y5_l2unlocka_enc; + +let isSolo = 1, validSubTargets = HasV60SubT in { +def Y6_l2gcleanpa : T_sys1op_P <"l2gclean($src1)">, Y6_l2gcleanpa_enc; +def Y6_l2gcleaninvpa : T_sys1op_P <"l2gcleaninv($src1)">, Y6_l2gcleaninvpa_enc; +} + +let Itinerary = ST_tc_3stall_SLOT0, isPredicateLate = 1, isSoloAX = 1, + validSubTargets = HasV55SubT in +def Y5_l2locka : ST1Inst <(outs PredRegs:$dst), (ins IntRegs:$src1), + "$dst = l2locka($src1)">, Y5_l2locka_enc; + +// not defined on etc side. why? +// defm S2_cabacencbin : _VV <"Rdd=encbin(Rss,$src2,Pu)">, S2_cabacencbin_enc; + +let Defs = [USR_OVF], Itinerary = M_tc_3stall_SLOT23, isPredicateLate = 1, + hasSideEffects = 0, +validSubTargets = HasV55SubT in +def A5_ACS : MInst2 <(outs DoubleRegs:$dst1, PredRegs:$dst2), + (ins DoubleRegs:$_src_, DoubleRegs:$src1, DoubleRegs:$src2), + "$dst1,$dst2 = vacsh($src1,$src2)", [], + "$dst1 = $_src_" >, Requires<[HasV55T]>, A5_ACS_enc; + +let Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV, hasNewValue = 1, + hasSideEffects = 0 in +class T_HVX_alu2 <string asmString, RegisterClass RCout, RegisterClass RCin1, + RegisterClass RCin2> + : CVI_VA_Resource1<(outs RCout:$dst), + (ins RCin1:$src1, RCin2:$src2, RCin2:$src3), asmString>; + +multiclass T_HVX_alu2 <string asmString, RegisterClass RC > { + def NAME : T_HVX_alu2 <asmString, RC, VecPredRegs, VectorRegs>; + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_alu2 <asmString, !cast<RegisterClass>(RC#"128B"), + VecPredRegs128B, VectorRegs128B>; +} + +multiclass T_HVX_alu2_V <string asmString> : + T_HVX_alu2 <asmString, VectorRegs>; + +multiclass T_HVX_alu2_W <string asmString> : + T_HVX_alu2 <asmString, VecDblRegs>; + +defm V6_vswap : T_HVX_alu2_W <"$dst = vswap($src1,$src2,$src3)">, V6_vswap_enc; + +let Itinerary = CVI_VA, Type = TypeCVI_VA, hasNewValue = 1, + hasSideEffects = 0 in +defm V6_vmux : T_HVX_alu2_V <"$dst = vmux($src1,$src2,$src3)">, V6_vmux_enc; + +class T_HVX_vlutb <string asmString, RegisterClass RCout, RegisterClass RCin> + : CVI_VA_Resource1<(outs RCout:$dst), + (ins RCin:$src1, RCin:$src2, IntRegsLow8:$src3), asmString>; + +multiclass T_HVX_vlutb <string asmString, RegisterClass RCout, + RegisterClass RCin> { + def NAME : T_HVX_vlutb <asmString, RCout, RCin>; + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_vlutb <asmString, !cast<RegisterClass>(RCout#"128B"), + !cast<RegisterClass>(RCin#"128B")>; +} + +multiclass T_HVX_vlutb_V <string asmString> : + T_HVX_vlutb <asmString, VectorRegs, VectorRegs>; + +multiclass T_HVX_vlutb_W <string asmString> : + T_HVX_vlutb <asmString, VecDblRegs, VectorRegs>; + +let Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS, isAccumulator = 1 in +class T_HVX_vlutb_acc <string asmString, RegisterClass RCout, + RegisterClass RCin> + : CVI_VA_Resource1<(outs RCout:$dst), + (ins RCout:$_src_, RCin:$src1, RCin:$src2, IntRegsLow8:$src3), + asmString, [], "$dst = $_src_">; + +multiclass T_HVX_vlutb_acc <string asmString, RegisterClass RCout, + RegisterClass RCin> { + def NAME : T_HVX_vlutb_acc <asmString, RCout, RCin>; + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_vlutb_acc<asmString, + !cast<RegisterClass>(RCout#"128B"), + !cast<RegisterClass>(RCin#"128B")>; +} + +multiclass T_HVX_vlutb_acc_V <string asmString> : + T_HVX_vlutb_acc <asmString, VectorRegs, VectorRegs>; + +multiclass T_HVX_vlutb_acc_W <string asmString> : + T_HVX_vlutb_acc <asmString, VecDblRegs, VectorRegs>; + + +let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP, hasNewValue = 1 in +defm V6_vlutvvb: + T_HVX_vlutb_V <"$dst.b = vlut32($src1.b,$src2.b,$src3)">, V6_vlutvvb_enc; + +let Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS, hasNewValue = 1 in +defm V6_vlutvwh: + T_HVX_vlutb_W <"$dst.h = vlut16($src1.b,$src2.h,$src3)">, V6_vlutvwh_enc; + +let hasNewValue = 1 in { + defm V6_vlutvvb_oracc: + T_HVX_vlutb_acc_V <"$dst.b |= vlut32($src1.b,$src2.b,$src3)">, + V6_vlutvvb_oracc_enc; + defm V6_vlutvwh_oracc: + T_HVX_vlutb_acc_W <"$dst.h |= vlut16($src1.b,$src2.h,$src3)">, + V6_vlutvwh_oracc_enc; +} + +// It's a fake instruction and should not be defined? +def S2_cabacencbin + : SInst2<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3), + "$dst = encbin($src1,$src2,$src3)">, S2_cabacencbin_enc; + +// Vhist instructions +def V6_vhistq + : CVI_HIST_Resource1 <(outs), (ins VecPredRegs:$src1), + "vhist($src1)">, V6_vhistq_enc; + +def V6_vhist + : CVI_HIST_Resource1 <(outs), (ins), + "vhist" >, V6_vhist_enc; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td index f4fb946d5bad..96dd5315b87f 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td @@ -35,6 +35,34 @@ multiclass bitconvert_64<ValueType a, ValueType b> { (a DoubleRegs:$src)>; } +multiclass bitconvert_vec<ValueType a, ValueType b> { + def : Pat <(b (bitconvert (a VectorRegs:$src))), + (b VectorRegs:$src)>; + def : Pat <(a (bitconvert (b VectorRegs:$src))), + (a VectorRegs:$src)>; +} + +multiclass bitconvert_dblvec<ValueType a, ValueType b> { + def : Pat <(b (bitconvert (a VecDblRegs:$src))), + (b VecDblRegs:$src)>; + def : Pat <(a (bitconvert (b VecDblRegs:$src))), + (a VecDblRegs:$src)>; +} + +multiclass bitconvert_predvec<ValueType a, ValueType b> { + def : Pat <(b (bitconvert (a VecPredRegs:$src))), + (b VectorRegs:$src)>; + def : Pat <(a (bitconvert (b VectorRegs:$src))), + (a VecPredRegs:$src)>; +} + +multiclass bitconvert_dblvec128B<ValueType a, ValueType b> { + def : Pat <(b (bitconvert (a VecDblRegs128B:$src))), + (b VecDblRegs128B:$src)>; + def : Pat <(a (bitconvert (b VecDblRegs128B:$src))), + (a VecDblRegs128B:$src)>; +} + // Bit convert vector types. defm : bitconvert_32<v4i8, i32>; defm : bitconvert_32<v2i16, i32>; @@ -47,6 +75,21 @@ defm : bitconvert_64<v8i8, v4i16>; defm : bitconvert_64<v8i8, v2i32>; defm : bitconvert_64<v4i16, v2i32>; +defm : bitconvert_vec<v64i8, v16i32>; +defm : bitconvert_vec<v8i64 , v16i32>; +defm : bitconvert_vec<v32i16, v16i32>; + +defm : bitconvert_dblvec<v16i64, v128i8>; +defm : bitconvert_dblvec<v32i32, v128i8>; +defm : bitconvert_dblvec<v64i16, v128i8>; + +defm : bitconvert_dblvec128B<v64i32, v128i16>; +defm : bitconvert_dblvec128B<v256i8, v128i16>; +defm : bitconvert_dblvec128B<v32i64, v128i16>; + +defm : bitconvert_dblvec128B<v64i32, v256i8>; +defm : bitconvert_dblvec128B<v32i64, v256i8>; +defm : bitconvert_dblvec128B<v128i16, v256i8>; // Vector shift support. Vector shifting in Hexagon is rather different // from internal representation of LLVM. diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td index 1d0d015f798b..b207aaf392f4 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td @@ -691,15 +691,15 @@ def: T_RR_pat<A2_combine_hl, int_hexagon_A2_combine_hl>; def: T_RR_pat<A2_combine_lh, int_hexagon_A2_combine_lh>; def: T_RR_pat<A2_combine_ll, int_hexagon_A2_combine_ll>; -def: T_II_pat<A2_combineii, int_hexagon_A2_combineii, s8ExtPred, s8ImmPred>; +def: T_II_pat<A2_combineii, int_hexagon_A2_combineii, s32ImmPred, s8ImmPred>; def: Pat<(i32 (int_hexagon_C2_mux (I32:$Rp), (I32:$Rs), (I32:$Rt))), (i32 (C2_mux (C2_tfrrp IntRegs:$Rp), IntRegs:$Rs, IntRegs:$Rt))>; // Mux -def : T_QRI_pat<C2_muxir, int_hexagon_C2_muxir, s8ExtPred>; -def : T_QIR_pat<C2_muxri, int_hexagon_C2_muxri, s8ExtPred>; -def : T_QII_pat<C2_muxii, int_hexagon_C2_muxii, s8ExtPred, s8ImmPred>; +def : T_QRI_pat<C2_muxir, int_hexagon_C2_muxir, s32ImmPred>; +def : T_QIR_pat<C2_muxri, int_hexagon_C2_muxri, s32ImmPred>; +def : T_QII_pat<C2_muxii, int_hexagon_C2_muxii, s32ImmPred, s8ImmPred>; // Shift halfword def : T_R_pat<A2_aslh, int_hexagon_A2_aslh>; @@ -720,17 +720,17 @@ def : T_RR_pat<C2_cmpeq, int_hexagon_C2_cmpeq>; def : T_RR_pat<C2_cmpgt, int_hexagon_C2_cmpgt>; def : T_RR_pat<C2_cmpgtu, int_hexagon_C2_cmpgtu>; -def : T_RI_pat<C2_cmpeqi, int_hexagon_C2_cmpeqi, s10ExtPred>; -def : T_RI_pat<C2_cmpgti, int_hexagon_C2_cmpgti, s10ExtPred>; -def : T_RI_pat<C2_cmpgtui, int_hexagon_C2_cmpgtui, u9ExtPred>; +def : T_RI_pat<C2_cmpeqi, int_hexagon_C2_cmpeqi, s32ImmPred>; +def : T_RI_pat<C2_cmpgti, int_hexagon_C2_cmpgti, s32ImmPred>; +def : T_RI_pat<C2_cmpgtui, int_hexagon_C2_cmpgtui, u32ImmPred>; -def : Pat <(i32 (int_hexagon_C2_cmpgei (I32:$src1), s8ExtPred:$src2)), +def : Pat <(i32 (int_hexagon_C2_cmpgei (I32:$src1), s32ImmPred:$src2)), (i32 (C2_cmpgti (I32:$src1), - (DEC_CONST_SIGNED s8ExtPred:$src2)))>; + (DEC_CONST_SIGNED s32ImmPred:$src2)))>; -def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), u8ExtPred:$src2)), +def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), u32ImmPred:$src2)), (i32 (C2_cmpgtui (I32:$src1), - (DEC_CONST_UNSIGNED u8ExtPred:$src2)))>; + (DEC_CONST_UNSIGNED u32ImmPred:$src2)))>; // The instruction, Pd=cmp.geu(Rs, #u8) -> Pd=cmp.eq(Rs,Rs) when #u8 == 0. def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), 0)), @@ -1289,3 +1289,5 @@ def: T_stc_pat<S2_storerf_pci_pseudo, int_hexagon_circ_sthhi, s4_1ImmPred, I32>; include "HexagonIntrinsicsV3.td" include "HexagonIntrinsicsV4.td" include "HexagonIntrinsicsV5.td" +include "HexagonIntrinsicsV60.td" + diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td new file mode 100644 index 000000000000..24a3e4d36de9 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td @@ -0,0 +1,836 @@ +//=- HexagonIntrinsicsV60.td - Target Description for Hexagon -*- tablegen *-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hexagon V60 Compiler Intrinsics in TableGen format. +// +//===----------------------------------------------------------------------===// + +let isCodeGenOnly = 1 in { +def HEXAGON_V6_vd0_pseudo : CVI_VA_Resource<(outs VectorRegs:$dst), + (ins ), + "$dst=#0", + [(set VectorRegs:$dst, (int_hexagon_V6_vd0 ))]>; + +def HEXAGON_V6_vd0_pseudo_128B : CVI_VA_Resource<(outs VectorRegs128B:$dst), + (ins ), + "$dst=#0", + [(set VectorRegs128B:$dst, (int_hexagon_V6_vd0_128B ))]>; +} +let isPseudo = 1 in +def HEXAGON_V6_vassignp : CVI_VA_Resource<(outs VecDblRegs:$dst), + (ins VecDblRegs:$src1), + "$dst=vassignp_W($src1)", + [(set VecDblRegs:$dst, (int_hexagon_V6_vassignp VecDblRegs:$src1))]>; + +let isPseudo = 1 in +def HEXAGON_V6_vassignp_128B : CVI_VA_Resource<(outs VecDblRegs128B:$dst), + (ins VecDblRegs128B:$src1), + "$dst=vassignp_W_128B($src1)", + [(set VecDblRegs128B:$dst, (int_hexagon_V6_vassignp_128B + VecDblRegs128B:$src1))]>; + +let isPseudo = 1 in +def HEXAGON_V6_lo : CVI_VA_Resource<(outs VectorRegs:$dst), + (ins VecDblRegs:$src1), + "$dst=lo_W($src1)", + [(set VectorRegs:$dst, (int_hexagon_V6_lo VecDblRegs:$src1))]>; + +let isPseudo = 1 in +def HEXAGON_V6_hi : CVI_VA_Resource<(outs VectorRegs:$dst), + (ins VecDblRegs:$src1), + "$dst=hi_W($src1)", + [(set VectorRegs:$dst, (int_hexagon_V6_hi VecDblRegs:$src1))]>; + +let isPseudo = 1 in +def HEXAGON_V6_lo_128B : CVI_VA_Resource<(outs VectorRegs128B:$dst), + (ins VecDblRegs128B:$src1), + "$dst=lo_W($src1)", + [(set VectorRegs128B:$dst, (int_hexagon_V6_lo_128B VecDblRegs128B:$src1))]>; + +let isPseudo = 1 in +def HEXAGON_V6_hi_128B : CVI_VA_Resource<(outs VectorRegs128B:$dst), + (ins VecDblRegs128B:$src1), + "$dst=hi_W($src1)", + [(set VectorRegs128B:$dst, (int_hexagon_V6_hi_128B VecDblRegs128B:$src1))]>; + +let AddedComplexity = 100 in { +def : Pat < (v16i32 (int_hexagon_V6_lo (v32i32 VecDblRegs:$src1))), + (v16i32 (EXTRACT_SUBREG (v32i32 VecDblRegs:$src1), subreg_loreg)) >, + Requires<[UseHVXSgl]>; + +def : Pat < (v16i32 (int_hexagon_V6_hi (v32i32 VecDblRegs:$src1))), + (v16i32 (EXTRACT_SUBREG (v32i32 VecDblRegs:$src1), subreg_hireg)) >, + Requires<[UseHVXSgl]>; + +def : Pat < (v32i32 (int_hexagon_V6_lo_128B (v64i32 VecDblRegs128B:$src1))), + (v32i32 (EXTRACT_SUBREG (v64i32 VecDblRegs128B:$src1), + subreg_loreg)) >, + Requires<[UseHVXDbl]>; + +def : Pat < (v32i32 (int_hexagon_V6_hi_128B (v64i32 VecDblRegs128B:$src1))), + (v32i32 (EXTRACT_SUBREG (v64i32 VecDblRegs128B:$src1), + subreg_hireg)) >, + Requires<[UseHVXDbl]>; +} + +def : Pat <(v512i1 (bitconvert (v16i32 VectorRegs:$src1))), + (v512i1 (V6_vandvrt(v16i32 VectorRegs:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXSgl]>; + +def : Pat <(v512i1 (bitconvert (v32i16 VectorRegs:$src1))), + (v512i1 (V6_vandvrt(v32i16 VectorRegs:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXSgl]>; + +def : Pat <(v512i1 (bitconvert (v64i8 VectorRegs:$src1))), + (v512i1 (V6_vandvrt(v64i8 VectorRegs:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXSgl]>; + +def : Pat <(v512i1 (bitconvert (v8i64 VectorRegs:$src1))), + (v512i1 (V6_vandvrt(v8i64 VectorRegs:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXSgl]>; + +def : Pat <(v16i32 (bitconvert (v512i1 VecPredRegs:$src1))), + (v16i32 (V6_vandqrt(v512i1 VecPredRegs:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXSgl]>; + +def : Pat <(v32i16 (bitconvert (v512i1 VecPredRegs:$src1))), + (v32i16 (V6_vandqrt(v512i1 VecPredRegs:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXSgl]>; + +def : Pat <(v64i8 (bitconvert (v512i1 VecPredRegs:$src1))), + (v64i8 (V6_vandqrt(v512i1 VecPredRegs:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXSgl]>; + +def : Pat <(v8i64 (bitconvert (v512i1 VecPredRegs:$src1))), + (v8i64 (V6_vandqrt(v512i1 VecPredRegs:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXSgl]>; + +def : Pat <(v1024i1 (bitconvert (v32i32 VectorRegs128B:$src1))), + (v1024i1 (V6_vandvrt_128B(v32i32 VectorRegs128B:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXDbl]>; + +def : Pat <(v1024i1 (bitconvert (v64i16 VectorRegs128B:$src1))), + (v1024i1 (V6_vandvrt_128B(v64i16 VectorRegs128B:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXDbl]>; + +def : Pat <(v1024i1 (bitconvert (v128i8 VectorRegs128B:$src1))), + (v1024i1 (V6_vandvrt_128B(v128i8 VectorRegs128B:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXDbl]>; + +def : Pat <(v1024i1 (bitconvert (v16i64 VectorRegs128B:$src1))), + (v1024i1 (V6_vandvrt_128B(v16i64 VectorRegs128B:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXDbl]>; + +def : Pat <(v32i32 (bitconvert (v1024i1 VecPredRegs128B:$src1))), + (v32i32 (V6_vandqrt_128B(v1024i1 VecPredRegs128B:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXDbl]>; + +def : Pat <(v64i16 (bitconvert (v1024i1 VecPredRegs128B:$src1))), + (v64i16 (V6_vandqrt_128B(v1024i1 VecPredRegs128B:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXDbl]>; + +def : Pat <(v128i8 (bitconvert (v1024i1 VecPredRegs128B:$src1))), + (v128i8 (V6_vandqrt_128B(v1024i1 VecPredRegs128B:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXDbl]>; + +def : Pat <(v16i64 (bitconvert (v1024i1 VecPredRegs128B:$src1))), + (v16i64 (V6_vandqrt_128B(v1024i1 VecPredRegs128B:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXDbl]>; + +let AddedComplexity = 140 in { +def : Pat <(store (v512i1 VecPredRegs:$src1), (i32 IntRegs:$addr)), + (V6_vS32b_ai IntRegs:$addr, 0, + (v16i32 (V6_vandqrt (v512i1 VecPredRegs:$src1), + (A2_tfrsi 0x01010101))))>, + Requires<[UseHVXSgl]>; + +def : Pat <(v512i1 (load (i32 IntRegs:$addr))), + (v512i1 (V6_vandvrt + (v16i32 (V6_vL32b_ai IntRegs:$addr, 0)), (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXSgl]>; + +def : Pat <(store (v1024i1 VecPredRegs128B:$src1), (i32 IntRegs:$addr)), + (V6_vS32b_ai_128B IntRegs:$addr, 0, + (v32i32 (V6_vandqrt_128B (v1024i1 VecPredRegs128B:$src1), + (A2_tfrsi 0x01010101))))>, + Requires<[UseHVXDbl]>; + +def : Pat <(v1024i1 (load (i32 IntRegs:$addr))), + (v1024i1 (V6_vandvrt_128B + (v32i32 (V6_vL32b_ai_128B IntRegs:$addr, 0)), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXDbl]>; +} + +multiclass T_R_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID IntRegs:$src1), (MI IntRegs:$src1)>, + Requires<[UseHVXSgl]>; + def: Pat<(!cast<Intrinsic>(IntID#"_128B") IntRegs:$src1), + (!cast<InstHexagon>(MI#"_128B") IntRegs:$src1)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_V_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VectorRegs:$src1), + (MI VectorRegs:$src1)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1), + (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_Q_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VecPredRegs:$src1), + (MI VecPredRegs:$src1)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1), + (!cast<InstHexagon>(MI#"_128B") VecPredRegs128B:$src1)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_WR_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VecDblRegs:$src1, IntRegs:$src2), + (MI VecDblRegs:$src1, IntRegs:$src2)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B")VecDblRegs128B:$src1, IntRegs:$src2), + (!cast<InstHexagon>(MI#"_128B")VecDblRegs128B:$src1, IntRegs:$src2)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_VR_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VectorRegs:$src1, IntRegs:$src2), + (MI VectorRegs:$src1, IntRegs:$src2)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B")VectorRegs128B:$src1, IntRegs:$src2), + (!cast<InstHexagon>(MI#"_128B")VectorRegs128B:$src1, IntRegs:$src2)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_WV_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VecDblRegs:$src1, VectorRegs:$src2), + (MI VecDblRegs:$src1, VectorRegs:$src2)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1, + VectorRegs128B:$src2), + (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1, + VectorRegs128B:$src2)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_WW_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VecDblRegs:$src1, VecDblRegs:$src2), + (MI VecDblRegs:$src1, VecDblRegs:$src2)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1, + VecDblRegs128B:$src2), + (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1, + VecDblRegs128B:$src2)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_VV_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2), + (MI VectorRegs:$src1, VectorRegs:$src2)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1, + VectorRegs128B:$src2), + (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1, + VectorRegs128B:$src2)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_QR_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VecPredRegs:$src1, IntRegs:$src2), + (MI VecPredRegs:$src1, IntRegs:$src2)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1, + IntRegs:$src2), + (!cast<InstHexagon>(MI#"_128B") VecPredRegs128B:$src1, + IntRegs:$src2)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_QQ_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VecPredRegs:$src1, VecPredRegs:$src2), + (MI VecPredRegs:$src1, VecPredRegs:$src2)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1, + VecPredRegs128B:$src2), + (!cast<InstHexagon>(MI#"_128B") VecPredRegs128B:$src1, + VecPredRegs128B:$src2)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_WWR_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VecDblRegs:$src1, VecDblRegs:$src2, IntRegs:$src3), + (MI VecDblRegs:$src1, VecDblRegs:$src2, IntRegs:$src3)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1, + VecDblRegs128B:$src2, + IntRegs:$src3), + (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1, + VecDblRegs128B:$src2, + IntRegs:$src3)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_VVR_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2, IntRegs:$src3), + (MI VectorRegs:$src1, VectorRegs:$src2, IntRegs:$src3)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1, + VectorRegs128B:$src2, + IntRegs:$src3), + (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1, + VectorRegs128B:$src2, + IntRegs:$src3)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_WVR_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VecDblRegs:$src1, VectorRegs:$src2, IntRegs:$src3), + (MI VecDblRegs:$src1, VectorRegs:$src2, IntRegs:$src3)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1, + VectorRegs128B:$src2, + IntRegs:$src3), + (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1, + VectorRegs128B:$src2, + IntRegs:$src3)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_VWR_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VectorRegs:$src1, VecDblRegs:$src2, IntRegs:$src3), + (MI VectorRegs:$src1, VecDblRegs:$src2, IntRegs:$src3)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1, + VecDblRegs128B:$src2, + IntRegs:$src3), + (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1, + VecDblRegs128B:$src2, + IntRegs:$src3)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_VVV_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2, VectorRegs:$src3), + (MI VectorRegs:$src1, VectorRegs:$src2, VectorRegs:$src3)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1, + VectorRegs128B:$src2, + VectorRegs128B:$src3), + (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1, + VectorRegs128B:$src2, + VectorRegs128B:$src3)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_WVV_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VecDblRegs:$src1, VectorRegs:$src2, VectorRegs:$src3), + (MI VecDblRegs:$src1, VectorRegs:$src2, VectorRegs:$src3)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1, + VectorRegs128B:$src2, + VectorRegs128B:$src3), + (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1, + VectorRegs128B:$src2, + VectorRegs128B:$src3)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_QVV_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VecPredRegs:$src1, VectorRegs:$src2, VectorRegs:$src3), + (MI VecPredRegs:$src1, VectorRegs:$src2, VectorRegs:$src3)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1, + VectorRegs128B:$src2, + VectorRegs128B:$src3), + (!cast<InstHexagon>(MI#"_128B") VecPredRegs128B:$src1, + VectorRegs128B:$src2, + VectorRegs128B:$src3)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_VQR_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VectorRegs:$src1, VecPredRegs:$src2, IntRegs:$src3), + (MI VectorRegs:$src1, VecPredRegs:$src2, IntRegs:$src3)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1, + VecPredRegs128B:$src2, + IntRegs:$src3), + (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1, + VecPredRegs128B:$src2, + IntRegs:$src3)>, + Requires<[UseHVXDbl]>; +} + + +multiclass T_QVR_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VecPredRegs:$src1, VectorRegs:$src2, IntRegs:$src3), + (MI VecPredRegs:$src1, VectorRegs:$src2, IntRegs:$src3)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1, + VectorRegs128B:$src2, + IntRegs:$src3), + (!cast<InstHexagon>(MI#"_128B") VecPredRegs128B:$src1, + VectorRegs128B:$src2, + IntRegs:$src3)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_VVI_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2, imm:$src3), + (MI VectorRegs:$src1, VectorRegs:$src2, imm:$src3)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1, + VectorRegs128B:$src2, imm:$src3), + (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1, + VectorRegs128B:$src2, imm:$src3)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_WRI_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VecDblRegs:$src1, IntRegs:$src2, imm:$src3), + (MI VecDblRegs:$src1, IntRegs:$src2, imm:$src3)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1, + IntRegs:$src2, imm:$src3), + (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1, + IntRegs:$src2, imm:$src3)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_WWRI_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VecDblRegs:$src1, VecDblRegs:$src2, IntRegs:$src3, imm:$src4), + (MI VecDblRegs:$src1, VecDblRegs:$src2, IntRegs:$src3, imm:$src4)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1, + VecDblRegs128B:$src2, + IntRegs:$src3, imm:$src4), + (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1, + VecDblRegs128B:$src2, + IntRegs:$src3, imm:$src4)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_VVVR_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2, VectorRegs:$src3, + IntRegs:$src4), + (MI VectorRegs:$src1, VectorRegs:$src2, VectorRegs:$src3, + IntRegs:$src4)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1, + VectorRegs128B:$src2, + VectorRegs128B:$src3, + IntRegs:$src4), + (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1, + VectorRegs128B:$src2, + VectorRegs128B:$src3, + IntRegs:$src4)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_WVVR_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VecDblRegs:$src1, VectorRegs:$src2, VectorRegs:$src3, + IntRegs:$src4), + (MI VecDblRegs:$src1, VectorRegs:$src2, VectorRegs:$src3, + IntRegs:$src4)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1, + VectorRegs128B:$src2, + VectorRegs128B:$src3, + IntRegs:$src4), + (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1, + VectorRegs128B:$src2, + VectorRegs128B:$src3, + IntRegs:$src4)>, + Requires<[UseHVXDbl]>; +} + +defm : T_WR_pat<V6_vtmpyb, int_hexagon_V6_vtmpyb>; +defm : T_WR_pat <V6_vtmpybus, int_hexagon_V6_vtmpybus>; +defm : T_VR_pat <V6_vdmpyhb, int_hexagon_V6_vdmpyhb>; +defm : T_VR_pat <V6_vrmpyub, int_hexagon_V6_vrmpyub>; +defm : T_VR_pat <V6_vrmpybus, int_hexagon_V6_vrmpybus>; +defm : T_WR_pat <V6_vdsaduh, int_hexagon_V6_vdsaduh>; +defm : T_VR_pat <V6_vdmpybus, int_hexagon_V6_vdmpybus>; +defm : T_WR_pat <V6_vdmpybus_dv, int_hexagon_V6_vdmpybus_dv>; +defm : T_VR_pat <V6_vdmpyhsusat, int_hexagon_V6_vdmpyhsusat>; +defm : T_WR_pat <V6_vdmpyhsuisat, int_hexagon_V6_vdmpyhsuisat>; +defm : T_VR_pat <V6_vdmpyhsat, int_hexagon_V6_vdmpyhsat>; +defm : T_WR_pat <V6_vdmpyhisat, int_hexagon_V6_vdmpyhisat>; +defm : T_WR_pat <V6_vdmpyhb_dv, int_hexagon_V6_vdmpyhb_dv>; +defm : T_VR_pat <V6_vmpybus, int_hexagon_V6_vmpybus>; +defm : T_WR_pat <V6_vmpabus, int_hexagon_V6_vmpabus>; +defm : T_WR_pat <V6_vmpahb, int_hexagon_V6_vmpahb>; +defm : T_VR_pat <V6_vmpyh, int_hexagon_V6_vmpyh>; +defm : T_VR_pat <V6_vmpyhss, int_hexagon_V6_vmpyhss>; +defm : T_VR_pat <V6_vmpyhsrs, int_hexagon_V6_vmpyhsrs>; +defm : T_VR_pat <V6_vmpyuh, int_hexagon_V6_vmpyuh>; +defm : T_VR_pat <V6_vmpyihb, int_hexagon_V6_vmpyihb>; +defm : T_VR_pat <V6_vror, int_hexagon_V6_vror>; +defm : T_VR_pat <V6_vasrw, int_hexagon_V6_vasrw>; +defm : T_VR_pat <V6_vasrh, int_hexagon_V6_vasrh>; +defm : T_VR_pat <V6_vaslw, int_hexagon_V6_vaslw>; +defm : T_VR_pat <V6_vaslh, int_hexagon_V6_vaslh>; +defm : T_VR_pat <V6_vlsrw, int_hexagon_V6_vlsrw>; +defm : T_VR_pat <V6_vlsrh, int_hexagon_V6_vlsrh>; +defm : T_VR_pat <V6_vmpyiwh, int_hexagon_V6_vmpyiwh>; +defm : T_VR_pat <V6_vmpyiwb, int_hexagon_V6_vmpyiwb>; +defm : T_WR_pat <V6_vtmpyhb, int_hexagon_V6_vtmpyhb>; +defm : T_VR_pat <V6_vmpyub, int_hexagon_V6_vmpyub>; + +defm : T_VV_pat <V6_vrmpyubv, int_hexagon_V6_vrmpyubv>; +defm : T_VV_pat <V6_vrmpybv, int_hexagon_V6_vrmpybv>; +defm : T_VV_pat <V6_vrmpybusv, int_hexagon_V6_vrmpybusv>; +defm : T_VV_pat <V6_vdmpyhvsat, int_hexagon_V6_vdmpyhvsat>; +defm : T_VV_pat <V6_vmpybv, int_hexagon_V6_vmpybv>; +defm : T_VV_pat <V6_vmpyubv, int_hexagon_V6_vmpyubv>; +defm : T_VV_pat <V6_vmpybusv, int_hexagon_V6_vmpybusv>; +defm : T_VV_pat <V6_vmpyhv, int_hexagon_V6_vmpyhv>; +defm : T_VV_pat <V6_vmpyuhv, int_hexagon_V6_vmpyuhv>; +defm : T_VV_pat <V6_vmpyhvsrs, int_hexagon_V6_vmpyhvsrs>; +defm : T_VV_pat <V6_vmpyhus, int_hexagon_V6_vmpyhus>; +defm : T_WW_pat <V6_vmpabusv, int_hexagon_V6_vmpabusv>; +defm : T_VV_pat <V6_vmpyih, int_hexagon_V6_vmpyih>; +defm : T_VV_pat <V6_vand, int_hexagon_V6_vand>; +defm : T_VV_pat <V6_vor, int_hexagon_V6_vor>; +defm : T_VV_pat <V6_vxor, int_hexagon_V6_vxor>; +defm : T_VV_pat <V6_vaddw, int_hexagon_V6_vaddw>; +defm : T_VV_pat <V6_vaddubsat, int_hexagon_V6_vaddubsat>; +defm : T_VV_pat <V6_vadduhsat, int_hexagon_V6_vadduhsat>; +defm : T_VV_pat <V6_vaddhsat, int_hexagon_V6_vaddhsat>; +defm : T_VV_pat <V6_vaddwsat, int_hexagon_V6_vaddwsat>; +defm : T_VV_pat <V6_vsubb, int_hexagon_V6_vsubb>; +defm : T_VV_pat <V6_vsubh, int_hexagon_V6_vsubh>; +defm : T_VV_pat <V6_vsubw, int_hexagon_V6_vsubw>; +defm : T_VV_pat <V6_vsububsat, int_hexagon_V6_vsububsat>; +defm : T_VV_pat <V6_vsubuhsat, int_hexagon_V6_vsubuhsat>; +defm : T_VV_pat <V6_vsubhsat, int_hexagon_V6_vsubhsat>; +defm : T_VV_pat <V6_vsubwsat, int_hexagon_V6_vsubwsat>; +defm : T_WW_pat <V6_vaddb_dv, int_hexagon_V6_vaddb_dv>; +defm : T_WW_pat <V6_vaddh_dv, int_hexagon_V6_vaddh_dv>; +defm : T_WW_pat <V6_vaddw_dv, int_hexagon_V6_vaddw_dv>; +defm : T_WW_pat <V6_vaddubsat_dv, int_hexagon_V6_vaddubsat_dv>; +defm : T_WW_pat <V6_vadduhsat_dv, int_hexagon_V6_vadduhsat_dv>; +defm : T_WW_pat <V6_vaddhsat_dv, int_hexagon_V6_vaddhsat_dv>; +defm : T_WW_pat <V6_vaddwsat_dv, int_hexagon_V6_vaddwsat_dv>; +defm : T_WW_pat <V6_vsubb_dv, int_hexagon_V6_vsubb_dv>; +defm : T_WW_pat <V6_vsubh_dv, int_hexagon_V6_vsubh_dv>; +defm : T_WW_pat <V6_vsubw_dv, int_hexagon_V6_vsubw_dv>; +defm : T_WW_pat <V6_vsububsat_dv, int_hexagon_V6_vsububsat_dv>; +defm : T_WW_pat <V6_vsubuhsat_dv, int_hexagon_V6_vsubuhsat_dv>; +defm : T_WW_pat <V6_vsubhsat_dv, int_hexagon_V6_vsubhsat_dv>; +defm : T_WW_pat <V6_vsubwsat_dv, int_hexagon_V6_vsubwsat_dv>; +defm : T_VV_pat <V6_vaddubh, int_hexagon_V6_vaddubh>; +defm : T_VV_pat <V6_vadduhw, int_hexagon_V6_vadduhw>; +defm : T_VV_pat <V6_vaddhw, int_hexagon_V6_vaddhw>; +defm : T_VV_pat <V6_vsububh, int_hexagon_V6_vsububh>; +defm : T_VV_pat <V6_vsubuhw, int_hexagon_V6_vsubuhw>; +defm : T_VV_pat <V6_vsubhw, int_hexagon_V6_vsubhw>; +defm : T_VV_pat <V6_vabsdiffub, int_hexagon_V6_vabsdiffub>; +defm : T_VV_pat <V6_vabsdiffh, int_hexagon_V6_vabsdiffh>; +defm : T_VV_pat <V6_vabsdiffuh, int_hexagon_V6_vabsdiffuh>; +defm : T_VV_pat <V6_vabsdiffw, int_hexagon_V6_vabsdiffw>; +defm : T_VV_pat <V6_vavgub, int_hexagon_V6_vavgub>; +defm : T_VV_pat <V6_vavguh, int_hexagon_V6_vavguh>; +defm : T_VV_pat <V6_vavgh, int_hexagon_V6_vavgh>; +defm : T_VV_pat <V6_vavgw, int_hexagon_V6_vavgw>; +defm : T_VV_pat <V6_vnavgub, int_hexagon_V6_vnavgub>; +defm : T_VV_pat <V6_vnavgh, int_hexagon_V6_vnavgh>; +defm : T_VV_pat <V6_vnavgw, int_hexagon_V6_vnavgw>; +defm : T_VV_pat <V6_vavgubrnd, int_hexagon_V6_vavgubrnd>; +defm : T_VV_pat <V6_vavguhrnd, int_hexagon_V6_vavguhrnd>; +defm : T_VV_pat <V6_vavghrnd, int_hexagon_V6_vavghrnd>; +defm : T_VV_pat <V6_vavgwrnd, int_hexagon_V6_vavgwrnd>; +defm : T_WW_pat <V6_vmpabuuv, int_hexagon_V6_vmpabuuv>; + +defm : T_VVR_pat <V6_vdmpyhb_acc, int_hexagon_V6_vdmpyhb_acc>; +defm : T_VVR_pat <V6_vrmpyub_acc, int_hexagon_V6_vrmpyub_acc>; +defm : T_VVR_pat <V6_vrmpybus_acc, int_hexagon_V6_vrmpybus_acc>; +defm : T_VVR_pat <V6_vdmpybus_acc, int_hexagon_V6_vdmpybus_acc>; +defm : T_VVR_pat <V6_vdmpyhsusat_acc, int_hexagon_V6_vdmpyhsusat_acc>; +defm : T_VVR_pat <V6_vdmpyhsat_acc, int_hexagon_V6_vdmpyhsat_acc>; +defm : T_VVR_pat <V6_vmpyiwb_acc, int_hexagon_V6_vmpyiwb_acc>; +defm : T_VVR_pat <V6_vmpyiwh_acc, int_hexagon_V6_vmpyiwh_acc>; +defm : T_VVR_pat <V6_vmpyihb_acc, int_hexagon_V6_vmpyihb_acc>; +defm : T_VVR_pat <V6_vaslw_acc, int_hexagon_V6_vaslw_acc>; +defm : T_VVR_pat <V6_vasrw_acc, int_hexagon_V6_vasrw_acc>; + +defm : T_VWR_pat <V6_vdmpyhsuisat_acc, int_hexagon_V6_vdmpyhsuisat_acc>; +defm : T_VWR_pat <V6_vdmpyhisat_acc, int_hexagon_V6_vdmpyhisat_acc>; + +defm : T_WVR_pat <V6_vmpybus_acc, int_hexagon_V6_vmpybus_acc>; +defm : T_WVR_pat <V6_vmpyhsat_acc, int_hexagon_V6_vmpyhsat_acc>; +defm : T_WVR_pat <V6_vmpyuh_acc, int_hexagon_V6_vmpyuh_acc>; +defm : T_WVR_pat <V6_vmpyub_acc, int_hexagon_V6_vmpyub_acc>; + +defm : T_WWR_pat <V6_vtmpyb_acc, int_hexagon_V6_vtmpyb_acc>; +defm : T_WWR_pat <V6_vtmpybus_acc, int_hexagon_V6_vtmpybus_acc>; +defm : T_WWR_pat <V6_vtmpyhb_acc, int_hexagon_V6_vtmpyhb_acc>; +defm : T_WWR_pat <V6_vdmpybus_dv_acc, int_hexagon_V6_vdmpybus_dv_acc>; +defm : T_WWR_pat <V6_vdmpyhb_dv_acc, int_hexagon_V6_vdmpyhb_dv_acc>; +defm : T_WWR_pat <V6_vmpabus_acc, int_hexagon_V6_vmpabus_acc>; +defm : T_WWR_pat <V6_vmpahb_acc, int_hexagon_V6_vmpahb_acc>; +defm : T_WWR_pat <V6_vdsaduh_acc, int_hexagon_V6_vdsaduh_acc>; + +defm : T_VVV_pat <V6_vdmpyhvsat_acc, int_hexagon_V6_vdmpyhvsat_acc>; +defm : T_WVV_pat <V6_vmpybusv_acc, int_hexagon_V6_vmpybusv_acc>; +defm : T_WVV_pat <V6_vmpybv_acc, int_hexagon_V6_vmpybv_acc>; +defm : T_WVV_pat <V6_vmpyhus_acc, int_hexagon_V6_vmpyhus_acc>; +defm : T_WVV_pat <V6_vmpyhv_acc, int_hexagon_V6_vmpyhv_acc>; +defm : T_VVV_pat <V6_vmpyiewh_acc, int_hexagon_V6_vmpyiewh_acc>; +defm : T_VVV_pat <V6_vmpyiewuh_acc, int_hexagon_V6_vmpyiewuh_acc>; +defm : T_VVV_pat <V6_vmpyih_acc, int_hexagon_V6_vmpyih_acc>; +defm : T_VVV_pat <V6_vmpyowh_rnd_sacc, int_hexagon_V6_vmpyowh_rnd_sacc>; +defm : T_VVV_pat <V6_vmpyowh_sacc, int_hexagon_V6_vmpyowh_sacc>; +defm : T_WVV_pat <V6_vmpyubv_acc, int_hexagon_V6_vmpyubv_acc>; +defm : T_WVV_pat <V6_vmpyuhv_acc, int_hexagon_V6_vmpyuhv_acc>; +defm : T_VVV_pat <V6_vrmpybusv_acc, int_hexagon_V6_vrmpybusv_acc>; +defm : T_VVV_pat <V6_vrmpybv_acc, int_hexagon_V6_vrmpybv_acc>; +defm : T_VVV_pat <V6_vrmpyubv_acc, int_hexagon_V6_vrmpyubv_acc>; + +// Compare instructions +defm : T_QVV_pat <V6_veqb_and, int_hexagon_V6_veqb_and>; +defm : T_QVV_pat <V6_veqh_and, int_hexagon_V6_veqh_and>; +defm : T_QVV_pat <V6_veqw_and, int_hexagon_V6_veqw_and>; +defm : T_QVV_pat <V6_vgtb_and, int_hexagon_V6_vgtb_and>; +defm : T_QVV_pat <V6_vgth_and, int_hexagon_V6_vgth_and>; +defm : T_QVV_pat <V6_vgtw_and, int_hexagon_V6_vgtw_and>; +defm : T_QVV_pat <V6_vgtub_and, int_hexagon_V6_vgtub_and>; +defm : T_QVV_pat <V6_vgtuh_and, int_hexagon_V6_vgtuh_and>; +defm : T_QVV_pat <V6_vgtuw_and, int_hexagon_V6_vgtuw_and>; +defm : T_QVV_pat <V6_veqb_or, int_hexagon_V6_veqb_or>; +defm : T_QVV_pat <V6_veqh_or, int_hexagon_V6_veqh_or>; +defm : T_QVV_pat <V6_veqw_or, int_hexagon_V6_veqw_or>; +defm : T_QVV_pat <V6_vgtb_or, int_hexagon_V6_vgtb_or>; +defm : T_QVV_pat <V6_vgth_or, int_hexagon_V6_vgth_or>; +defm : T_QVV_pat <V6_vgtw_or, int_hexagon_V6_vgtw_or>; +defm : T_QVV_pat <V6_vgtub_or, int_hexagon_V6_vgtub_or>; +defm : T_QVV_pat <V6_vgtuh_or, int_hexagon_V6_vgtuh_or>; +defm : T_QVV_pat <V6_vgtuw_or, int_hexagon_V6_vgtuw_or>; +defm : T_QVV_pat <V6_veqb_xor, int_hexagon_V6_veqb_xor>; +defm : T_QVV_pat <V6_veqh_xor, int_hexagon_V6_veqh_xor>; +defm : T_QVV_pat <V6_veqw_xor, int_hexagon_V6_veqw_xor>; +defm : T_QVV_pat <V6_vgtb_xor, int_hexagon_V6_vgtb_xor>; +defm : T_QVV_pat <V6_vgth_xor, int_hexagon_V6_vgth_xor>; +defm : T_QVV_pat <V6_vgtw_xor, int_hexagon_V6_vgtw_xor>; +defm : T_QVV_pat <V6_vgtub_xor, int_hexagon_V6_vgtub_xor>; +defm : T_QVV_pat <V6_vgtuh_xor, int_hexagon_V6_vgtuh_xor>; +defm : T_QVV_pat <V6_vgtuw_xor, int_hexagon_V6_vgtuw_xor>; + +defm : T_VV_pat <V6_vminub, int_hexagon_V6_vminub>; +defm : T_VV_pat <V6_vminuh, int_hexagon_V6_vminuh>; +defm : T_VV_pat <V6_vminh, int_hexagon_V6_vminh>; +defm : T_VV_pat <V6_vminw, int_hexagon_V6_vminw>; +defm : T_VV_pat <V6_vmaxub, int_hexagon_V6_vmaxub>; +defm : T_VV_pat <V6_vmaxuh, int_hexagon_V6_vmaxuh>; +defm : T_VV_pat <V6_vmaxh, int_hexagon_V6_vmaxh>; +defm : T_VV_pat <V6_vmaxw, int_hexagon_V6_vmaxw>; +defm : T_VV_pat <V6_vdelta, int_hexagon_V6_vdelta>; +defm : T_VV_pat <V6_vrdelta, int_hexagon_V6_vrdelta>; +defm : T_VV_pat <V6_vdealb4w, int_hexagon_V6_vdealb4w>; +defm : T_VV_pat <V6_vmpyowh_rnd, int_hexagon_V6_vmpyowh_rnd>; +defm : T_VV_pat <V6_vshuffeb, int_hexagon_V6_vshuffeb>; +defm : T_VV_pat <V6_vshuffob, int_hexagon_V6_vshuffob>; +defm : T_VV_pat <V6_vshufeh, int_hexagon_V6_vshufeh>; +defm : T_VV_pat <V6_vshufoh, int_hexagon_V6_vshufoh>; +defm : T_VV_pat <V6_vshufoeh, int_hexagon_V6_vshufoeh>; +defm : T_VV_pat <V6_vshufoeb, int_hexagon_V6_vshufoeb>; +defm : T_VV_pat <V6_vcombine, int_hexagon_V6_vcombine>; +defm : T_VV_pat <V6_vmpyieoh, int_hexagon_V6_vmpyieoh>; +defm : T_VV_pat <V6_vsathub, int_hexagon_V6_vsathub>; +defm : T_VV_pat <V6_vsatwh, int_hexagon_V6_vsatwh>; +defm : T_VV_pat <V6_vroundwh, int_hexagon_V6_vroundwh>; +defm : T_VV_pat <V6_vroundwuh, int_hexagon_V6_vroundwuh>; +defm : T_VV_pat <V6_vroundhb, int_hexagon_V6_vroundhb>; +defm : T_VV_pat <V6_vroundhub, int_hexagon_V6_vroundhub>; +defm : T_VV_pat <V6_vasrwv, int_hexagon_V6_vasrwv>; +defm : T_VV_pat <V6_vlsrwv, int_hexagon_V6_vlsrwv>; +defm : T_VV_pat <V6_vlsrhv, int_hexagon_V6_vlsrhv>; +defm : T_VV_pat <V6_vasrhv, int_hexagon_V6_vasrhv>; +defm : T_VV_pat <V6_vaslwv, int_hexagon_V6_vaslwv>; +defm : T_VV_pat <V6_vaslhv, int_hexagon_V6_vaslhv>; +defm : T_VV_pat <V6_vaddb, int_hexagon_V6_vaddb>; +defm : T_VV_pat <V6_vaddh, int_hexagon_V6_vaddh>; +defm : T_VV_pat <V6_vmpyiewuh, int_hexagon_V6_vmpyiewuh>; +defm : T_VV_pat <V6_vmpyiowh, int_hexagon_V6_vmpyiowh>; +defm : T_VV_pat <V6_vpackeb, int_hexagon_V6_vpackeb>; +defm : T_VV_pat <V6_vpackeh, int_hexagon_V6_vpackeh>; +defm : T_VV_pat <V6_vpackhub_sat, int_hexagon_V6_vpackhub_sat>; +defm : T_VV_pat <V6_vpackhb_sat, int_hexagon_V6_vpackhb_sat>; +defm : T_VV_pat <V6_vpackwuh_sat, int_hexagon_V6_vpackwuh_sat>; +defm : T_VV_pat <V6_vpackwh_sat, int_hexagon_V6_vpackwh_sat>; +defm : T_VV_pat <V6_vpackob, int_hexagon_V6_vpackob>; +defm : T_VV_pat <V6_vpackoh, int_hexagon_V6_vpackoh>; +defm : T_VV_pat <V6_vmpyewuh, int_hexagon_V6_vmpyewuh>; +defm : T_VV_pat <V6_vmpyowh, int_hexagon_V6_vmpyowh>; + +defm : T_QVV_pat <V6_vaddbq, int_hexagon_V6_vaddbq>; +defm : T_QVV_pat <V6_vaddhq, int_hexagon_V6_vaddhq>; +defm : T_QVV_pat <V6_vaddwq, int_hexagon_V6_vaddwq>; +defm : T_QVV_pat <V6_vaddbnq, int_hexagon_V6_vaddbnq>; +defm : T_QVV_pat <V6_vaddhnq, int_hexagon_V6_vaddhnq>; +defm : T_QVV_pat <V6_vaddwnq, int_hexagon_V6_vaddwnq>; +defm : T_QVV_pat <V6_vsubbq, int_hexagon_V6_vsubbq>; +defm : T_QVV_pat <V6_vsubhq, int_hexagon_V6_vsubhq>; +defm : T_QVV_pat <V6_vsubwq, int_hexagon_V6_vsubwq>; +defm : T_QVV_pat <V6_vsubbnq, int_hexagon_V6_vsubbnq>; +defm : T_QVV_pat <V6_vsubhnq, int_hexagon_V6_vsubhnq>; +defm : T_QVV_pat <V6_vsubwnq, int_hexagon_V6_vsubwnq>; + +defm : T_V_pat <V6_vabsh, int_hexagon_V6_vabsh>; +defm : T_V_pat <V6_vabsw, int_hexagon_V6_vabsw>; +defm : T_V_pat <V6_vabsw_sat, int_hexagon_V6_vabsw_sat>; +defm : T_V_pat <V6_vabsh_sat, int_hexagon_V6_vabsh_sat>; +defm : T_V_pat <V6_vnot, int_hexagon_V6_vnot>; +defm : T_V_pat <V6_vassign, int_hexagon_V6_vassign>; +defm : T_V_pat <V6_vzb, int_hexagon_V6_vzb>; +defm : T_V_pat <V6_vzh, int_hexagon_V6_vzh>; +defm : T_V_pat <V6_vsb, int_hexagon_V6_vsb>; +defm : T_V_pat <V6_vsh, int_hexagon_V6_vsh>; +defm : T_V_pat <V6_vdealh, int_hexagon_V6_vdealh>; +defm : T_V_pat <V6_vdealb, int_hexagon_V6_vdealb>; +defm : T_V_pat <V6_vunpackub, int_hexagon_V6_vunpackub>; +defm : T_V_pat <V6_vunpackuh, int_hexagon_V6_vunpackuh>; +defm : T_V_pat <V6_vunpackb, int_hexagon_V6_vunpackb>; +defm : T_V_pat <V6_vunpackh, int_hexagon_V6_vunpackh>; +defm : T_V_pat <V6_vshuffh, int_hexagon_V6_vshuffh>; +defm : T_V_pat <V6_vshuffb, int_hexagon_V6_vshuffb>; +defm : T_V_pat <V6_vcl0w, int_hexagon_V6_vcl0w>; +defm : T_V_pat <V6_vpopcounth, int_hexagon_V6_vpopcounth>; +defm : T_V_pat <V6_vcl0h, int_hexagon_V6_vcl0h>; +defm : T_V_pat <V6_vnormamtw, int_hexagon_V6_vnormamtw>; +defm : T_V_pat <V6_vnormamth, int_hexagon_V6_vnormamth>; + +defm : T_WRI_pat <V6_vrmpybusi, int_hexagon_V6_vrmpybusi>; +defm : T_WRI_pat <V6_vrsadubi, int_hexagon_V6_vrsadubi>; +defm : T_WRI_pat <V6_vrmpyubi, int_hexagon_V6_vrmpyubi>; + +defm : T_WWRI_pat <V6_vrmpybusi_acc, int_hexagon_V6_vrmpybusi_acc>; +defm : T_WWRI_pat <V6_vrsadubi_acc, int_hexagon_V6_vrsadubi_acc>; +defm : T_WWRI_pat <V6_vrmpyubi_acc, int_hexagon_V6_vrmpyubi_acc>; + +// assembler mapped. +//defm : T_V_pat <V6_vtran2x2, int_hexagon_V6_vtran2x2>; +// not present earlier.. need to add intrinsic +defm : T_VVR_pat <V6_valignb, int_hexagon_V6_valignb>; +defm : T_VVR_pat <V6_vlalignb, int_hexagon_V6_vlalignb>; +defm : T_VVR_pat <V6_vasrwh, int_hexagon_V6_vasrwh>; +defm : T_VVR_pat <V6_vasrwhsat, int_hexagon_V6_vasrwhsat>; +defm : T_VVR_pat <V6_vasrwhrndsat, int_hexagon_V6_vasrwhrndsat>; +defm : T_VVR_pat <V6_vasrwuhsat, int_hexagon_V6_vasrwuhsat>; +defm : T_VVR_pat <V6_vasrhubsat, int_hexagon_V6_vasrhubsat>; +defm : T_VVR_pat <V6_vasrhubrndsat, int_hexagon_V6_vasrhubrndsat>; +defm : T_VVR_pat <V6_vasrhbrndsat, int_hexagon_V6_vasrhbrndsat>; + +defm : T_VVR_pat <V6_vshuffvdd, int_hexagon_V6_vshuffvdd>; +defm : T_VVR_pat <V6_vdealvdd, int_hexagon_V6_vdealvdd>; + +defm : T_WV_pat <V6_vunpackob, int_hexagon_V6_vunpackob>; +defm : T_WV_pat <V6_vunpackoh, int_hexagon_V6_vunpackoh>; +defm : T_VVI_pat <V6_valignbi, int_hexagon_V6_valignbi>; +defm : T_VVI_pat <V6_vlalignbi, int_hexagon_V6_vlalignbi>; + +defm : T_QVV_pat <V6_vswap, int_hexagon_V6_vswap>; +defm : T_QVV_pat <V6_vmux, int_hexagon_V6_vmux>; +defm : T_QQ_pat <V6_pred_and, int_hexagon_V6_pred_and>; +defm : T_QQ_pat <V6_pred_or, int_hexagon_V6_pred_or>; +defm : T_Q_pat <V6_pred_not, int_hexagon_V6_pred_not>; +defm : T_QQ_pat <V6_pred_xor, int_hexagon_V6_pred_xor>; +defm : T_QQ_pat <V6_pred_or_n, int_hexagon_V6_pred_or_n>; +defm : T_QQ_pat <V6_pred_and_n, int_hexagon_V6_pred_and_n>; +defm : T_VV_pat <V6_veqb, int_hexagon_V6_veqb>; +defm : T_VV_pat <V6_veqh, int_hexagon_V6_veqh>; +defm : T_VV_pat <V6_veqw, int_hexagon_V6_veqw>; +defm : T_VV_pat <V6_vgtb, int_hexagon_V6_vgtb>; +defm : T_VV_pat <V6_vgth, int_hexagon_V6_vgth>; +defm : T_VV_pat <V6_vgtw, int_hexagon_V6_vgtw>; +defm : T_VV_pat <V6_vgtub, int_hexagon_V6_vgtub>; +defm : T_VV_pat <V6_vgtuh, int_hexagon_V6_vgtuh>; +defm : T_VV_pat <V6_vgtuw, int_hexagon_V6_vgtuw>; + +defm : T_VQR_pat <V6_vandqrt_acc, int_hexagon_V6_vandqrt_acc>; +defm : T_QVR_pat <V6_vandvrt_acc, int_hexagon_V6_vandvrt_acc>; +defm : T_QR_pat <V6_vandqrt, int_hexagon_V6_vandqrt>; +defm : T_R_pat <V6_lvsplatw, int_hexagon_V6_lvsplatw>; +defm : T_R_pat <V6_pred_scalar2, int_hexagon_V6_pred_scalar2>; +defm : T_VR_pat <V6_vandvrt, int_hexagon_V6_vandvrt>; + +defm : T_VVR_pat <V6_vlutvvb, int_hexagon_V6_vlutvvb>; +defm : T_VVR_pat <V6_vlutvwh, int_hexagon_V6_vlutvwh>; +defm : T_VVVR_pat <V6_vlutvvb_oracc, int_hexagon_V6_vlutvvb_oracc>; +defm : T_WVVR_pat <V6_vlutvwh_oracc, int_hexagon_V6_vlutvwh_oracc>; + +defm : T_QVR_pat <V6_vandvrt_acc, int_hexagon_V6_vandvrt_acc>; +def : T_PI_pat <S6_rol_i_p, int_hexagon_S6_rol_i_p>; +def : T_RI_pat <S6_rol_i_r, int_hexagon_S6_rol_i_r>; +def : T_PPI_pat <S6_rol_i_p_nac, int_hexagon_S6_rol_i_p_nac>; +def : T_PPI_pat <S6_rol_i_p_acc, int_hexagon_S6_rol_i_p_acc>; +def : T_PPI_pat <S6_rol_i_p_and, int_hexagon_S6_rol_i_p_and>; +def : T_PPI_pat <S6_rol_i_p_or, int_hexagon_S6_rol_i_p_or>; +def : T_PPI_pat <S6_rol_i_p_xacc, int_hexagon_S6_rol_i_p_xacc>; +def : T_RRI_pat <S6_rol_i_r_nac, int_hexagon_S6_rol_i_r_nac>; +def : T_RRI_pat <S6_rol_i_r_acc, int_hexagon_S6_rol_i_r_acc>; +def : T_RRI_pat <S6_rol_i_r_and, int_hexagon_S6_rol_i_r_and>; +def : T_RRI_pat <S6_rol_i_r_or, int_hexagon_S6_rol_i_r_or>; +def : T_RRI_pat <S6_rol_i_r_xacc, int_hexagon_S6_rol_i_r_xacc>; + +defm : T_VR_pat <V6_extractw, int_hexagon_V6_extractw>; +defm : T_VR_pat <V6_vinsertwr, int_hexagon_V6_vinsertwr>; + +def : T_PPQ_pat <S2_cabacencbin, int_hexagon_S2_cabacencbin>; + +def: Pat<(v64i16 (trunc v64i32:$Vdd)), + (v64i16 (V6_vpackwh_sat_128B + (v32i32 (HEXAGON_V6_hi_128B VecDblRegs128B:$Vdd)), + (v32i32 (HEXAGON_V6_lo_128B VecDblRegs128B:$Vdd))))>, + Requires<[UseHVXDbl]>; + + diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp index 75189b696ea2..624c0f6cf49d 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp @@ -26,39 +26,71 @@ using namespace llvm; -static MCOperand GetSymbolRef(const MachineOperand& MO, const MCSymbol* Symbol, - HexagonAsmPrinter& Printer) { +namespace llvm { + void HexagonLowerToMC(const MCInstrInfo &MCII, const MachineInstr *MI, + MCInst &MCB, HexagonAsmPrinter &AP); +} + +static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, + HexagonAsmPrinter &Printer) { MCContext &MC = Printer.OutContext; const MCExpr *ME; - ME = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, MC); + // Populate the relocation type based on Hexagon target flags + // set on an operand + MCSymbolRefExpr::VariantKind RelocationType; + switch (MO.getTargetFlags()) { + default: + RelocationType = MCSymbolRefExpr::VK_None; + break; + case HexagonII::MO_PCREL: + RelocationType = MCSymbolRefExpr::VK_Hexagon_PCREL; + break; + case HexagonII::MO_GOT: + RelocationType = MCSymbolRefExpr::VK_GOT; + break; + case HexagonII::MO_LO16: + RelocationType = MCSymbolRefExpr::VK_Hexagon_LO16; + break; + case HexagonII::MO_HI16: + RelocationType = MCSymbolRefExpr::VK_Hexagon_HI16; + break; + case HexagonII::MO_GPREL: + RelocationType = MCSymbolRefExpr::VK_Hexagon_GPREL; + break; + } + + ME = MCSymbolRefExpr::create(Symbol, RelocationType, MC); if (!MO.isJTI() && MO.getOffset()) ME = MCBinaryExpr::createAdd(ME, MCConstantExpr::create(MO.getOffset(), MC), MC); - return (MCOperand::createExpr(ME)); + return MCOperand::createExpr(ME); } // Create an MCInst from a MachineInstr -void llvm::HexagonLowerToMC(MachineInstr const* MI, MCInst& MCB, - HexagonAsmPrinter& AP) { - if(MI->getOpcode() == Hexagon::ENDLOOP0){ +void llvm::HexagonLowerToMC(const MCInstrInfo &MCII, const MachineInstr *MI, + MCInst &MCB, HexagonAsmPrinter &AP) { + if (MI->getOpcode() == Hexagon::ENDLOOP0) { HexagonMCInstrInfo::setInnerLoop(MCB); return; } - if(MI->getOpcode() == Hexagon::ENDLOOP1){ + if (MI->getOpcode() == Hexagon::ENDLOOP1) { HexagonMCInstrInfo::setOuterLoop(MCB); return; } - MCInst* MCI = new (AP.OutContext) MCInst; + MCInst *MCI = new (AP.OutContext) MCInst; MCI->setOpcode(MI->getOpcode()); assert(MCI->getOpcode() == static_cast<unsigned>(MI->getOpcode()) && "MCI opcode should have been set on construction"); + bool MustExtend = false; for (unsigned i = 0, e = MI->getNumOperands(); i < e; i++) { const MachineOperand &MO = MI->getOperand(i); MCOperand MCO; + if (MO.getTargetFlags() & HexagonII::HMOTF_ConstExtended) + MustExtend = true; switch (MO.getType()) { default: @@ -73,11 +105,14 @@ void llvm::HexagonLowerToMC(MachineInstr const* MI, MCInst& MCB, APFloat Val = MO.getFPImm()->getValueAPF(); // FP immediates are used only when setting GPRs, so they may be dealt // with like regular immediates from this point on. - MCO = MCOperand::createImm(*Val.bitcastToAPInt().getRawData()); + MCO = MCOperand::createExpr( + MCConstantExpr::create(*Val.bitcastToAPInt().getRawData(), + AP.OutContext)); break; } case MachineOperand::MO_Immediate: - MCO = MCOperand::createImm(MO.getImm()); + MCO = MCOperand::createExpr( + MCConstantExpr::create(MO.getImm(), AP.OutContext)); break; case MachineOperand::MO_MachineBasicBlock: MCO = MCOperand::createExpr @@ -104,5 +139,8 @@ void llvm::HexagonLowerToMC(MachineInstr const* MI, MCInst& MCB, MCI->addOperand(MCO); } + AP.HexagonProcessInstruction(*MCI, *MI); + HexagonMCInstrInfo::extendIfNeeded(AP.OutContext, MCII, MCB, *MCI, + MustExtend); MCB.addOperand(MCOperand::createInst(MCI)); } diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp index 35f732cd6207..7a52d6874c33 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp @@ -179,7 +179,11 @@ void VLIWMachineScheduler::schedule() { initQueues(TopRoots, BotRoots); bool IsTopNode = false; - while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { + while (true) { + DEBUG(dbgs() << "** VLIWMachineScheduler::schedule picking next node\n"); + SUnit *SU = SchedImpl->pickNode(IsTopNode); + if (!SU) break; + if (!checkSchedLimit()) break; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp index 707bfdbb6ab6..20c4ab112b5f 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp @@ -92,6 +92,7 @@ namespace { /// \brief A handle to the branch probability pass. const MachineBranchProbabilityInfo *MBPI; + bool isNewValueJumpCandidate(const MachineInstr *MI) const; }; } // end of anonymous namespace @@ -280,9 +281,9 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII, return true; } -// Given a compare operator, return a matching New Value Jump -// compare operator. Make sure that MI here is included in -// HexagonInstrInfo.cpp::isNewValueJumpCandidate + +// Given a compare operator, return a matching New Value Jump compare operator. +// Make sure that MI here is included in isNewValueJumpCandidate. static unsigned getNewValueJumpOpcode(MachineInstr *MI, int reg, bool secondRegNewified, MachineBasicBlock *jmpTarget, @@ -341,6 +342,24 @@ static unsigned getNewValueJumpOpcode(MachineInstr *MI, int reg, return taken ? Hexagon::J4_cmpgtui_t_jumpnv_t : Hexagon::J4_cmpgtui_t_jumpnv_nt; + case Hexagon::C4_cmpneq: + return taken ? Hexagon::J4_cmpeq_f_jumpnv_t + : Hexagon::J4_cmpeq_f_jumpnv_nt; + + case Hexagon::C4_cmplte: + if (secondRegNewified) + return taken ? Hexagon::J4_cmplt_f_jumpnv_t + : Hexagon::J4_cmplt_f_jumpnv_nt; + return taken ? Hexagon::J4_cmpgt_f_jumpnv_t + : Hexagon::J4_cmpgt_f_jumpnv_nt; + + case Hexagon::C4_cmplteu: + if (secondRegNewified) + return taken ? Hexagon::J4_cmpltu_f_jumpnv_t + : Hexagon::J4_cmpltu_f_jumpnv_nt; + return taken ? Hexagon::J4_cmpgtu_f_jumpnv_t + : Hexagon::J4_cmpgtu_f_jumpnv_nt; + default: llvm_unreachable("Could not find matching New Value Jump instruction."); } @@ -348,6 +367,26 @@ static unsigned getNewValueJumpOpcode(MachineInstr *MI, int reg, return 0; } +bool HexagonNewValueJump::isNewValueJumpCandidate(const MachineInstr *MI) + const { + switch (MI->getOpcode()) { + case Hexagon::C2_cmpeq: + case Hexagon::C2_cmpeqi: + case Hexagon::C2_cmpgt: + case Hexagon::C2_cmpgti: + case Hexagon::C2_cmpgtu: + case Hexagon::C2_cmpgtui: + case Hexagon::C4_cmpneq: + case Hexagon::C4_cmplte: + case Hexagon::C4_cmplteu: + return true; + + default: + return false; + } +} + + bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** Hexagon New Value Jump **********\n" @@ -372,7 +411,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { // Loop through all the bb's of the function for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end(); MBBb != MBBe; ++MBBb) { - MachineBasicBlock* MBB = MBBb; + MachineBasicBlock *MBB = &*MBBb; DEBUG(dbgs() << "** dumping bb ** " << MBB->getNumber() << "\n"); @@ -468,7 +507,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { MI->getOperand(0).getReg() == predReg) { // Not all compares can be new value compare. Arch Spec: 7.6.1.1 - if (QII->isNewValueJumpCandidate(MI)) { + if (isNewValueJumpCandidate(MI)) { assert((MI->getDesc().isCompare()) && "Only compare instruction can be collapsed into New Value Jump"); @@ -591,8 +630,8 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { DebugLoc dl = MI->getDebugLoc(); MachineInstr *NewMI; - assert((QII->isNewValueJumpCandidate(cmpInstr)) && - "This compare is not a New Value Jump candidate."); + assert((isNewValueJumpCandidate(cmpInstr)) && + "This compare is not a New Value Jump candidate."); unsigned opc = getNewValueJumpOpcode(cmpInstr, cmpOp2, isSecondOpNewified, jmpTarget, MBPI); diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td b/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td index 2bece8f42f53..fbd29cd4d6d1 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td @@ -1,4 +1,4 @@ -//===- HexagonOperands.td - Hexagon immediate processing -*- tablegen -*-===// +//===- HexagonImmediates.td - Hexagon immediate processing -*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -7,59 +7,114 @@ // //===----------------------------------------------------------------------===// +def s32ImmOperand : AsmOperandClass { let Name = "s32Imm"; } +def s8ImmOperand : AsmOperandClass { let Name = "s8Imm"; } +def s8Imm64Operand : AsmOperandClass { let Name = "s8Imm64"; } +def s6ImmOperand : AsmOperandClass { let Name = "s6Imm"; } +def s4ImmOperand : AsmOperandClass { let Name = "s4Imm"; } def s4_0ImmOperand : AsmOperandClass { let Name = "s4_0Imm"; } def s4_1ImmOperand : AsmOperandClass { let Name = "s4_1Imm"; } def s4_2ImmOperand : AsmOperandClass { let Name = "s4_2Imm"; } def s4_3ImmOperand : AsmOperandClass { let Name = "s4_3Imm"; } - +def s4_6ImmOperand : AsmOperandClass { let Name = "s4_6Imm"; } +def s3_6ImmOperand : AsmOperandClass { let Name = "s3_6Imm"; } +def u64ImmOperand : AsmOperandClass { let Name = "u64Imm"; } +def u32ImmOperand : AsmOperandClass { let Name = "u32Imm"; } +def u26_6ImmOperand : AsmOperandClass { let Name = "u26_6Imm"; } +def u16ImmOperand : AsmOperandClass { let Name = "u16Imm"; } +def u16_0ImmOperand : AsmOperandClass { let Name = "u16_0Imm"; } +def u16_1ImmOperand : AsmOperandClass { let Name = "u16_1Imm"; } +def u16_2ImmOperand : AsmOperandClass { let Name = "u16_2Imm"; } +def u16_3ImmOperand : AsmOperandClass { let Name = "u16_3Imm"; } +def u11_3ImmOperand : AsmOperandClass { let Name = "u11_3Imm"; } +def u10ImmOperand : AsmOperandClass { let Name = "u10Imm"; } +def u9ImmOperand : AsmOperandClass { let Name = "u9Imm"; } +def u8ImmOperand : AsmOperandClass { let Name = "u8Imm"; } +def u7ImmOperand : AsmOperandClass { let Name = "u7Imm"; } +def u6ImmOperand : AsmOperandClass { let Name = "u6Imm"; } +def u6_0ImmOperand : AsmOperandClass { let Name = "u6_0Imm"; } +def u6_1ImmOperand : AsmOperandClass { let Name = "u6_1Imm"; } +def u6_2ImmOperand : AsmOperandClass { let Name = "u6_2Imm"; } +def u6_3ImmOperand : AsmOperandClass { let Name = "u6_3Imm"; } +def u5ImmOperand : AsmOperandClass { let Name = "u5Imm"; } +def u4ImmOperand : AsmOperandClass { let Name = "u4Imm"; } +def u3ImmOperand : AsmOperandClass { let Name = "u3Imm"; } +def u2ImmOperand : AsmOperandClass { let Name = "u2Imm"; } +def u1ImmOperand : AsmOperandClass { let Name = "u1Imm"; } +def n8ImmOperand : AsmOperandClass { let Name = "n8Imm"; } // Immediate operands. -let PrintMethod = "printImmOperand" in { - def s32Imm : Operand<i32>; - def s8Imm : Operand<i32>; - def s8Imm64 : Operand<i64>; - def s6Imm : Operand<i32>; +let OperandType = "OPERAND_IMMEDIATE", + DecoderMethod = "unsignedImmDecoder" in { + def s32Imm : Operand<i32> { let ParserMatchClass = s32ImmOperand; + let DecoderMethod = "s32ImmDecoder"; } + def s8Imm : Operand<i32> { let ParserMatchClass = s8ImmOperand; + let DecoderMethod = "s8ImmDecoder"; } + def s8Imm64 : Operand<i64> { let ParserMatchClass = s8Imm64Operand; + let DecoderMethod = "s8ImmDecoder"; } + def s6Imm : Operand<i32> { let ParserMatchClass = s6ImmOperand; + let DecoderMethod = "s6_0ImmDecoder"; } def s6_3Imm : Operand<i32>; - def s4Imm : Operand<i32>; - def s4_0Imm : Operand<i32> { let DecoderMethod = "s4_0ImmDecoder"; } - def s4_1Imm : Operand<i32> { let DecoderMethod = "s4_1ImmDecoder"; } - def s4_2Imm : Operand<i32> { let DecoderMethod = "s4_2ImmDecoder"; } - def s4_3Imm : Operand<i32> { let DecoderMethod = "s4_3ImmDecoder"; } - def u64Imm : Operand<i64>; - def u32Imm : Operand<i32>; - def u26_6Imm : Operand<i32>; - def u16Imm : Operand<i32>; - def u16_0Imm : Operand<i32>; - def u16_1Imm : Operand<i32>; - def u16_2Imm : Operand<i32>; - def u16_3Imm : Operand<i32>; - def u11_3Imm : Operand<i32>; - def u10Imm : Operand<i32>; - def u9Imm : Operand<i32>; - def u8Imm : Operand<i32>; - def u7Imm : Operand<i32>; - def u6Imm : Operand<i32>; - def u6_0Imm : Operand<i32>; - def u6_1Imm : Operand<i32>; - def u6_2Imm : Operand<i32>; - def u6_3Imm : Operand<i32>; - def u5Imm : Operand<i32>; + def s4Imm : Operand<i32> { let ParserMatchClass = s4ImmOperand; + let DecoderMethod = "s4_0ImmDecoder"; } + def s4_0Imm : Operand<i32> { let ParserMatchClass = s4_0ImmOperand; + let DecoderMethod = "s4_0ImmDecoder"; } + def s4_1Imm : Operand<i32> { let ParserMatchClass = s4_1ImmOperand; + let DecoderMethod = "s4_1ImmDecoder"; } + def s4_2Imm : Operand<i32> { let ParserMatchClass = s4_2ImmOperand; + let DecoderMethod = "s4_2ImmDecoder"; } + def s4_3Imm : Operand<i32> { let ParserMatchClass = s4_3ImmOperand; + let DecoderMethod = "s4_3ImmDecoder"; } + def u64Imm : Operand<i64> { let ParserMatchClass = u64ImmOperand; } + def u32Imm : Operand<i32> { let ParserMatchClass = u32ImmOperand; } + def u26_6Imm : Operand<i32> { let ParserMatchClass = u26_6ImmOperand; } + def u16Imm : Operand<i32> { let ParserMatchClass = u16ImmOperand; } + def u16_0Imm : Operand<i32> { let ParserMatchClass = u16_0ImmOperand; } + def u16_1Imm : Operand<i32> { let ParserMatchClass = u16_1ImmOperand; } + def u16_2Imm : Operand<i32> { let ParserMatchClass = u16_2ImmOperand; } + def u16_3Imm : Operand<i32> { let ParserMatchClass = u16_3ImmOperand; } + def u11_3Imm : Operand<i32> { let ParserMatchClass = u11_3ImmOperand; } + def u10Imm : Operand<i32> { let ParserMatchClass = u10ImmOperand; } + def u9Imm : Operand<i32> { let ParserMatchClass = u9ImmOperand; } + def u8Imm : Operand<i32> { let ParserMatchClass = u8ImmOperand; } + def u7Imm : Operand<i32> { let ParserMatchClass = u7ImmOperand; } + def u6Imm : Operand<i32> { let ParserMatchClass = u6ImmOperand; } + def u6_0Imm : Operand<i32> { let ParserMatchClass = u6_0ImmOperand; } + def u6_1Imm : Operand<i32> { let ParserMatchClass = u6_1ImmOperand; } + def u6_2Imm : Operand<i32> { let ParserMatchClass = u6_2ImmOperand; } + def u6_3Imm : Operand<i32> { let ParserMatchClass = u6_3ImmOperand; } + def u5Imm : Operand<i32> { let ParserMatchClass = u5ImmOperand; } + def u5_0Imm : Operand<i32>; + def u5_1Imm : Operand<i32>; def u5_2Imm : Operand<i32>; def u5_3Imm : Operand<i32>; - def u4Imm : Operand<i32>; + def u4Imm : Operand<i32> { let ParserMatchClass = u4ImmOperand; } def u4_0Imm : Operand<i32>; + def u4_1Imm : Operand<i32>; def u4_2Imm : Operand<i32>; - def u3Imm : Operand<i32>; + def u4_3Imm : Operand<i32>; + def u3Imm : Operand<i32> { let ParserMatchClass = u3ImmOperand; } def u3_0Imm : Operand<i32>; def u3_1Imm : Operand<i32>; - def u2Imm : Operand<i32>; - def u1Imm : Operand<i32>; - def n8Imm : Operand<i32>; - def m6Imm : Operand<i32>; + def u3_2Imm : Operand<i32>; + def u3_3Imm : Operand<i32>; + def u2Imm : Operand<i32> { let ParserMatchClass = u2ImmOperand; } + def u1Imm : Operand<i32> { let ParserMatchClass = u1ImmOperand; } + def n8Imm : Operand<i32> { let ParserMatchClass = n8ImmOperand; } } -let PrintMethod = "printNOneImmOperand" in -def nOneImm : Operand<i32>; +let OperandType = "OPERAND_IMMEDIATE" in { + def s4_6Imm : Operand<i32> { let ParserMatchClass = s4_6ImmOperand; + let PrintMethod = "prints4_6ImmOperand"; + let DecoderMethod = "s4_6ImmDecoder";} + def s4_7Imm : Operand<i32> { let PrintMethod = "prints4_7ImmOperand"; + let DecoderMethod = "s4_6ImmDecoder";} + def s3_6Imm : Operand<i32> { let ParserMatchClass = s3_6ImmOperand; + let PrintMethod = "prints3_6ImmOperand"; + let DecoderMethod = "s3_6ImmDecoder";} + def s3_7Imm : Operand<i32> { let PrintMethod = "prints3_7ImmOperand"; + let DecoderMethod = "s3_6ImmDecoder";} +} // // Immediate predicates @@ -81,32 +136,12 @@ def s31_1ImmPred : PatLeaf<(i32 imm), [{ def s30_2ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); - return isShiftedInt<31,1>(v); + return isShiftedInt<30,2>(v); }]>; def s29_3ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); - return isShiftedInt<31,1>(v); -}]>; - -def s22_10ImmPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - return isShiftedInt<22,10>(v); -}]>; - -def s8_24ImmPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - return isShiftedInt<8,24>(v); -}]>; - -def s16_16ImmPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - return isShiftedInt<16,16>(v); -}]>; - -def s26_6ImmPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - return isShiftedInt<26,6>(v); + return isShiftedInt<29,3>(v); }]>; def s16ImmPred : PatLeaf<(i32 imm), [{ @@ -114,16 +149,6 @@ def s16ImmPred : PatLeaf<(i32 imm), [{ return isInt<16>(v); }]>; -def s13ImmPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - return isInt<13>(v); -}]>; - -def s12ImmPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - return isInt<12>(v); -}]>; - def s11_0ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); return isInt<11>(v); @@ -149,16 +174,6 @@ def s10ImmPred : PatLeaf<(i32 imm), [{ return isInt<10>(v); }]>; -def s9ImmPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - return isInt<9>(v); -}]>; - -def m9ImmPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - return isInt<9>(v) && (v != -256); -}]>; - def s8ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); return isInt<8>(v); @@ -194,7 +209,6 @@ def s4_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4,3>(v); }]>; - def u64ImmPred : PatLeaf<(i64 imm), [{ // Adding "N ||" to suppress gcc unused warning. return (N || true); @@ -230,19 +244,19 @@ def u26_6ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<26,6>(v); }]>; -def u16ImmPred : PatLeaf<(i32 imm), [{ +def u16_0ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); return isUInt<16>(v); }]>; -def u16_s8ImmPred : PatLeaf<(i32 imm), [{ +def u16_1ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); - return isShiftedUInt<16,8>(v); + return isShiftedUInt<16,1>(v); }]>; -def u16_0ImmPred : PatLeaf<(i32 imm), [{ +def u16_2ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); - return isUInt<16>(v); + return isShiftedUInt<16,2>(v); }]>; def u11_3ImmPred : PatLeaf<(i32 imm), [{ @@ -250,6 +264,11 @@ def u11_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<11,3>(v); }]>; +def u10ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<10>(v); +}]>; + def u9ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); return isUInt<9>(v); @@ -321,6 +340,11 @@ def u1ImmPred : PatLeaf<(i1 imm), [{ return isUInt<1>(v); }]>; +def u1ImmPred32 : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<1>(v); +}]>; + def m5BImmPred : PatLeaf<(i32 imm), [{ // m5BImmPred predicate - True if the (char) number is in range -1 .. -31 // and will fit in a 5 bit field when made positive, for use in memops. @@ -379,7 +403,7 @@ def Clr5ImmPred : PatLeaf<(i32 imm), [{ }]>; def SetClr5ImmPred : PatLeaf<(i32 imm), [{ - // SetClr5ImmPred predicate - True if the immediate is in range 0..31. + // True if the immediate is in range 0..31. int32_t v = (int32_t)N->getSExtValue(); return (v >= 0 && v <= 31); }]>; @@ -404,14 +428,13 @@ def Clr4ImmPred : PatLeaf<(i32 imm), [{ }]>; def SetClr4ImmPred : PatLeaf<(i32 imm), [{ - // SetClr4ImmPred predicate - True if the immediate is in the range 0..15. + // True if the immediate is in the range 0..15. int16_t v = (int16_t)N->getSExtValue(); return (v >= 0 && v <= 15); }]>; def Set3ImmPred : PatLeaf<(i32 imm), [{ - // Set3ImmPred predicate - True if the number is in the series of values: - // [ 2^0, 2^1, ... 2^7 ]. + // True if the number is in the series of values: [ 2^0, 2^1, ... 2^7 ]. // For use in setbit immediate. uint8_t v = (int8_t)N->getSExtValue(); // Constrain to 8 bits, and then check for single bit. @@ -419,9 +442,7 @@ def Set3ImmPred : PatLeaf<(i32 imm), [{ }]>; def Clr3ImmPred : PatLeaf<(i32 imm), [{ - // Clr3ImmPred predicate - True if the number is in the series of - // bit negated values: - // [ 2^0, 2^1, ... 2^7 ]. + // True if the number is in the series of bit negated values: [ 2^0, 2^1, ... 2^7 ]. // For use in setbit and clrbit immediate. uint8_t v = ~ (int8_t)N->getSExtValue(); // Constrain to 8 bits, and then check for single bit. @@ -429,76 +450,109 @@ def Clr3ImmPred : PatLeaf<(i32 imm), [{ }]>; def SetClr3ImmPred : PatLeaf<(i32 imm), [{ - // SetClr3ImmPred predicate - True if the immediate is in the range 0..7. + // True if the immediate is in the range 0..7. int8_t v = (int8_t)N->getSExtValue(); return (v >= 0 && v <= 7); }]>; // Extendable immediate operands. - -let PrintMethod = "printExtOperand" in { - def f32Ext : Operand<f32>; - def s16Ext : Operand<i32> { let DecoderMethod = "s16ImmDecoder"; } - def s12Ext : Operand<i32> { let DecoderMethod = "s12ImmDecoder"; } - def s11_0Ext : Operand<i32> { let DecoderMethod = "s11_0ImmDecoder"; } - def s11_1Ext : Operand<i32> { let DecoderMethod = "s11_1ImmDecoder"; } - def s11_2Ext : Operand<i32> { let DecoderMethod = "s11_2ImmDecoder"; } - def s11_3Ext : Operand<i32> { let DecoderMethod = "s11_3ImmDecoder"; } - def s10Ext : Operand<i32> { let DecoderMethod = "s10ImmDecoder"; } - def s9Ext : Operand<i32> { let DecoderMethod = "s90ImmDecoder"; } - def s8Ext : Operand<i32> { let DecoderMethod = "s8ImmDecoder"; } - def s7Ext : Operand<i32>; - def s6Ext : Operand<i32> { let DecoderMethod = "s6_0ImmDecoder"; } - def u6Ext : Operand<i32>; - def u7Ext : Operand<i32>; - def u8Ext : Operand<i32>; - def u9Ext : Operand<i32>; - def u10Ext : Operand<i32>; - def u6_0Ext : Operand<i32>; - def u6_1Ext : Operand<i32>; - def u6_2Ext : Operand<i32>; - def u6_3Ext : Operand<i32>; +def f32ExtOperand : AsmOperandClass { let Name = "f32Ext"; } +def s16ExtOperand : AsmOperandClass { let Name = "s16Ext"; } +def s12ExtOperand : AsmOperandClass { let Name = "s12Ext"; } +def s10ExtOperand : AsmOperandClass { let Name = "s10Ext"; } +def s9ExtOperand : AsmOperandClass { let Name = "s9Ext"; } +def s8ExtOperand : AsmOperandClass { let Name = "s8Ext"; } +def s7ExtOperand : AsmOperandClass { let Name = "s7Ext"; } +def s6ExtOperand : AsmOperandClass { let Name = "s6Ext"; } +def s11_0ExtOperand : AsmOperandClass { let Name = "s11_0Ext"; } +def s11_1ExtOperand : AsmOperandClass { let Name = "s11_1Ext"; } +def s11_2ExtOperand : AsmOperandClass { let Name = "s11_2Ext"; } +def s11_3ExtOperand : AsmOperandClass { let Name = "s11_3Ext"; } +def u6ExtOperand : AsmOperandClass { let Name = "u6Ext"; } +def u7ExtOperand : AsmOperandClass { let Name = "u7Ext"; } +def u8ExtOperand : AsmOperandClass { let Name = "u8Ext"; } +def u9ExtOperand : AsmOperandClass { let Name = "u9Ext"; } +def u10ExtOperand : AsmOperandClass { let Name = "u10Ext"; } +def u6_0ExtOperand : AsmOperandClass { let Name = "u6_0Ext"; } +def u6_1ExtOperand : AsmOperandClass { let Name = "u6_1Ext"; } +def u6_2ExtOperand : AsmOperandClass { let Name = "u6_2Ext"; } +def u6_3ExtOperand : AsmOperandClass { let Name = "u6_3Ext"; } +def u32MustExtOperand : AsmOperandClass { let Name = "u32MustExt"; } + + + +let OperandType = "OPERAND_IMMEDIATE", PrintMethod = "printExtOperand", + DecoderMethod = "unsignedImmDecoder" in { + def f32Ext : Operand<f32> { let ParserMatchClass = f32ExtOperand; } + def s16Ext : Operand<i32> { let ParserMatchClass = s16ExtOperand; + let DecoderMethod = "s16ImmDecoder"; } + def s12Ext : Operand<i32> { let ParserMatchClass = s12ExtOperand; + let DecoderMethod = "s12ImmDecoder"; } + def s11_0Ext : Operand<i32> { let ParserMatchClass = s11_0ExtOperand; + let DecoderMethod = "s11_0ImmDecoder"; } + def s11_1Ext : Operand<i32> { let ParserMatchClass = s11_1ExtOperand; + let DecoderMethod = "s11_1ImmDecoder"; } + def s11_2Ext : Operand<i32> { let ParserMatchClass = s11_2ExtOperand; + let DecoderMethod = "s11_2ImmDecoder"; } + def s11_3Ext : Operand<i32> { let ParserMatchClass = s11_3ExtOperand; + let DecoderMethod = "s11_3ImmDecoder"; } + def s10Ext : Operand<i32> { let ParserMatchClass = s10ExtOperand; + let DecoderMethod = "s10ImmDecoder"; } + def s9Ext : Operand<i32> { let ParserMatchClass = s9ExtOperand; + let DecoderMethod = "s90ImmDecoder"; } + def s8Ext : Operand<i32> { let ParserMatchClass = s8ExtOperand; + let DecoderMethod = "s8ImmDecoder"; } + def s7Ext : Operand<i32> { let ParserMatchClass = s7ExtOperand; } + def s6Ext : Operand<i32> { let ParserMatchClass = s6ExtOperand; + let DecoderMethod = "s6_0ImmDecoder"; } + def u6Ext : Operand<i32> { let ParserMatchClass = u6ExtOperand; } + def u7Ext : Operand<i32> { let ParserMatchClass = u7ExtOperand; } + def u8Ext : Operand<i32> { let ParserMatchClass = u8ExtOperand; } + def u9Ext : Operand<i32> { let ParserMatchClass = u9ExtOperand; } + def u10Ext : Operand<i32> { let ParserMatchClass = u10ExtOperand; } + def u6_0Ext : Operand<i32> { let ParserMatchClass = u6_0ExtOperand; } + def u6_1Ext : Operand<i32> { let ParserMatchClass = u6_1ExtOperand; } + def u6_2Ext : Operand<i32> { let ParserMatchClass = u6_2ExtOperand; } + def u6_3Ext : Operand<i32> { let ParserMatchClass = u6_3ExtOperand; } + def u32MustExt : Operand<i32> { let ParserMatchClass = u32MustExtOperand; } } -def s10ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (isInt<10>(v)) - return true; - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit signed field. - return isConstExtProfitable(Node) && isInt<32>(v); +def s4_7ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + if (HST->hasV60TOps()) + // Return true if the immediate can fit in a 10-bit sign extended field and + // is 128-byte aligned. + return isShiftedInt<4,7>(v); + return false; }]>; -def s8ExtPred : PatLeaf<(i32 imm), [{ +def s3_7ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); - if (isInt<8>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit signed field. - return isConstExtProfitable(Node) && isInt<32>(v); + if (HST->hasV60TOps()) + // Return true if the immediate can fit in a 9-bit sign extended field and + // is 128-byte aligned. + return isShiftedInt<3,7>(v); + return false; }]>; -def u8ExtPred : PatLeaf<(i32 imm), [{ +def s4_6ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); - if (isUInt<8>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit unsigned field. - return isConstExtProfitable(Node) && isUInt<32>(v); + if (HST->hasV60TOps()) + // Return true if the immediate can fit in a 10-bit sign extended field and + // is 64-byte aligned. + return isShiftedInt<4,6>(v); + return false; }]>; -def u9ExtPred : PatLeaf<(i32 imm), [{ +def s3_6ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); - if (isUInt<9>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit unsigned field. - return isConstExtProfitable(Node) && isUInt<32>(v); + if (HST->hasV60TOps()) + // Return true if the immediate can fit in a 9-bit sign extended field and + // is 64-byte aligned. + return isShiftedInt<3,6>(v); + return false; }]>; @@ -523,21 +577,21 @@ let PrintMethod = "printGlobalOperand" in { let PrintMethod = "printJumpTable" in def jumptablebase : Operand<i32>; -def brtarget : Operand<OtherVT>; +def brtarget : Operand<OtherVT> { + let DecoderMethod = "brtargetDecoder"; + let PrintMethod = "printBrtarget"; +} def brtargetExt : Operand<OtherVT> { - let PrintMethod = "printExtBrtarget"; + let DecoderMethod = "brtargetDecoder"; + let PrintMethod = "printBrtarget"; +} +def calltarget : Operand<i32> { + let DecoderMethod = "brtargetDecoder"; + let PrintMethod = "printBrtarget"; } -def calltarget : Operand<i32>; def bblabel : Operand<i32>; -def bbl : SDNode<"ISD::BasicBlock", SDTPtrLeaf , [], "BasicBlockSDNode">; - -def symbolHi32 : Operand<i32> { - let PrintMethod = "printSymbolHi"; -} -def symbolLo32 : Operand<i32> { - let PrintMethod = "printSymbolLo"; -} +def bbl : SDNode<"ISD::BasicBlock", SDTPtrLeaf, [], "BasicBlockSDNode">; // Return true if for a 32 to 64-bit sign-extended load. def is_sext_i32 : PatLeaf<(i64 DoubleRegs:$src1), [{ diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp new file mode 100644 index 000000000000..1723771550c9 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp @@ -0,0 +1,150 @@ +//===- HexagonOptimizeSZextends.cpp - Remove unnecessary argument extends -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Pass that removes sign extends for function parameters. These parameters +// are already sign extended by the caller per Hexagon's ABI +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/StackProtector.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Scalar.h" + +#include "Hexagon.h" + +using namespace llvm; + +namespace llvm { + FunctionPass *createHexagonOptimizeSZextends(); + void initializeHexagonOptimizeSZextendsPass(PassRegistry&); +} + +namespace { + struct HexagonOptimizeSZextends : public FunctionPass { + public: + static char ID; + HexagonOptimizeSZextends() : FunctionPass(ID) { + initializeHexagonOptimizeSZextendsPass(*PassRegistry::getPassRegistry()); + } + bool runOnFunction(Function &F) override; + + const char *getPassName() const override { + return "Remove sign extends"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<MachineFunctionAnalysis>(); + AU.addPreserved<MachineFunctionAnalysis>(); + AU.addPreserved<StackProtector>(); + FunctionPass::getAnalysisUsage(AU); + } + + bool intrinsicAlreadySextended(Intrinsic::ID IntID); + }; +} + +char HexagonOptimizeSZextends::ID = 0; + +INITIALIZE_PASS(HexagonOptimizeSZextends, "reargs", + "Remove Sign and Zero Extends for Args", false, false) + +bool HexagonOptimizeSZextends::intrinsicAlreadySextended(Intrinsic::ID IntID) { + switch(IntID) { + case llvm::Intrinsic::hexagon_A2_addh_l16_sat_ll: + return true; + default: + break; + } + return false; +} + +bool HexagonOptimizeSZextends::runOnFunction(Function &F) { + unsigned Idx = 1; + // Try to optimize sign extends in formal parameters. It's relying on + // callee already sign extending the values. I'm not sure if our ABI + // requires callee to sign extend though. + for (auto &Arg : F.args()) { + if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) { + if (!isa<PointerType>(Arg.getType())) { + for (auto UI = Arg.use_begin(); UI != Arg.use_end();) { + if (isa<SExtInst>(*UI)) { + Instruction* Use = cast<Instruction>(*UI); + SExtInst* SI = new SExtInst(&Arg, Use->getType()); + assert (EVT::getEVT(SI->getType()) == + (EVT::getEVT(Use->getType()))); + ++UI; + Use->replaceAllUsesWith(SI); + Instruction* First = &F.getEntryBlock().front(); + SI->insertBefore(First); + Use->eraseFromParent(); + } else { + ++UI; + } + } + } + } + ++Idx; + } + + // Try to remove redundant sext operations on Hexagon. The hardware + // already sign extends many 16 bit intrinsic operations to 32 bits. + // For example: + // %34 = tail call i32 @llvm.hexagon.A2.addh.l16.sat.ll(i32 %x, i32 %y) + // %sext233 = shl i32 %34, 16 + // %conv52 = ashr exact i32 %sext233, 16 + for (auto &B : F) { + for (auto &I : B) { + // Look for arithmetic shift right by 16. + BinaryOperator *Ashr = dyn_cast<BinaryOperator>(&I); + if (!(Ashr && Ashr->getOpcode() == Instruction::AShr)) + continue; + Value *AshrOp1 = Ashr->getOperand(1); + ConstantInt *C = dyn_cast<ConstantInt>(AshrOp1); + // Right shifted by 16. + if (!(C && C->getSExtValue() == 16)) + continue; + + // The first operand of Ashr comes from logical shift left. + Instruction *Shl = dyn_cast<Instruction>(Ashr->getOperand(0)); + if (!(Shl && Shl->getOpcode() == Instruction::Shl)) + continue; + Value *Intr = Shl->getOperand(0); + Value *ShlOp1 = Shl->getOperand(1); + C = dyn_cast<ConstantInt>(ShlOp1); + // Left shifted by 16. + if (!(C && C->getSExtValue() == 16)) + continue; + + // The first operand of Shl comes from an intrinsic. + if (IntrinsicInst *I = dyn_cast<IntrinsicInst>(Intr)) { + if (!intrinsicAlreadySextended(I->getIntrinsicID())) + continue; + // All is well. Replace all uses of AShr with I. + for (auto UI = Ashr->user_begin(), UE = Ashr->user_end(); + UI != UE; ++UI) { + const Use &TheUse = UI.getUse(); + if (Instruction *J = dyn_cast<Instruction>(TheUse.getUser())) { + J->replaceUsesOfWith(Ashr, I); + } + } + } + } + } + + return true; +} + + +FunctionPass *llvm::createHexagonOptimizeSZextends() { + return new HexagonOptimizeSZextends(); +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp index 93dcbe233b25..e68ff85b1da6 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp @@ -124,7 +124,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { // Loop over all of the basic blocks. for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end(); MBBb != MBBe; ++MBBb) { - MachineBasicBlock* MBB = MBBb; + MachineBasicBlock *MBB = &*MBBb; PeepholeMap.clear(); PeepholeDoubleRegsMap.clear(); @@ -180,7 +180,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { unsigned DstReg = Dst.getReg(); unsigned SrcReg = Src1.getReg(); PeepholeDoubleRegsMap[DstReg] = - std::make_pair(*&SrcReg, 1/*Hexagon::subreg_hireg*/); + std::make_pair(*&SrcReg, Hexagon::subreg_hireg); } // Look for P=NOT(P). diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp index f6bb4a045438..61c0589fb5bf 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -66,6 +66,8 @@ HexagonRegisterInfo::getCallerSavedRegs(const MachineFunction *MF) const { switch (HST.getHexagonArchVersion()) { case HexagonSubtarget::V4: case HexagonSubtarget::V5: + case HexagonSubtarget::V55: + case HexagonSubtarget::V60: return CallerSavedRegsV4; } llvm_unreachable( @@ -84,6 +86,8 @@ HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { switch (MF->getSubtarget<HexagonSubtarget>().getHexagonArchVersion()) { case HexagonSubtarget::V4: case HexagonSubtarget::V5: + case HexagonSubtarget::V55: + case HexagonSubtarget::V60: return CalleeSavedRegsV3; } llvm_unreachable("Callee saved registers requested for unknown architecture " @@ -98,7 +102,7 @@ BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF) Reserved.set(Hexagon::R29); Reserved.set(Hexagon::R30); Reserved.set(Hexagon::R31); - Reserved.set(Hexagon::D14); + Reserved.set(Hexagon::PC); Reserved.set(Hexagon::D15); Reserved.set(Hexagon::LC0); Reserved.set(Hexagon::LC1); @@ -116,62 +120,21 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, assert(SPAdj == 0 && "Unexpected"); MachineInstr &MI = *II; - MachineBasicBlock &MB = *MI.getParent(); MachineFunction &MF = *MB.getParent(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); - auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget()); + auto &HST = MF.getSubtarget<HexagonSubtarget>(); auto &HII = *HST.getInstrInfo(); auto &HFI = *HST.getFrameLowering(); + unsigned BP = 0; int FI = MI.getOperand(FIOp).getIndex(); - int Offset = MFI.getObjectOffset(FI) + MI.getOperand(FIOp+1).getImm(); - bool HasAlloca = MFI.hasVarSizedObjects(); - bool HasAlign = needsStackRealignment(MF); - - // XXX: Fixed objects cannot be accessed through SP if there are aligned - // objects in the local frame, or if there are dynamically allocated objects. - // In such cases, there has to be FP available. - if (!HFI.hasFP(MF)) { - assert(!HasAlloca && !HasAlign && "This function must have frame pointer"); - // We will not reserve space on the stack for the lr and fp registers. - Offset -= 8; - } - - unsigned SP = getStackRegister(), FP = getFrameRegister(); - unsigned AP = 0; - if (MachineInstr *AI = HFI.getAlignaInstr(MF)) - AP = AI->getOperand(0).getReg(); - unsigned FrameSize = MFI.getStackSize(); - - // Special handling of dbg_value instructions and INLINEASM. - if (MI.isDebugValue() || MI.isInlineAsm()) { - MI.getOperand(FIOp).ChangeToRegister(SP, false /*isDef*/); - MI.getOperand(FIOp+1).ChangeToImmediate(Offset+FrameSize); - return; - } - - bool UseFP = false, UseAP = false; // Default: use SP. - if (MFI.isFixedObjectIndex(FI) || MFI.isObjectPreAllocated(FI)) { - UseFP = HasAlloca || HasAlign; - } else { - if (HasAlloca) { - if (HasAlign) - UseAP = true; - else - UseFP = true; - } - } + // Select the base pointer (BP) and calculate the actual offset from BP + // to the beginning of the object at index FI. + int Offset = HFI.getFrameIndexReference(MF, FI, BP); + // Add the offset from the instruction. + int RealOffset = Offset + MI.getOperand(FIOp+1).getImm(); unsigned Opc = MI.getOpcode(); - bool ValidSP = HII.isValidOffset(Opc, FrameSize+Offset); - bool ValidFP = HII.isValidOffset(Opc, Offset); - - // Calculate the actual offset in the instruction. - int64_t RealOffset = Offset; - if (!UseFP && !UseAP) - RealOffset = FrameSize+Offset; - switch (Opc) { case Hexagon::TFR_FIA: MI.setDesc(HII.get(Hexagon::A2_addi)); @@ -184,20 +147,7 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, break; } - unsigned BP = 0; - bool Valid = false; - if (UseFP) { - BP = FP; - Valid = ValidFP; - } else if (UseAP) { - BP = AP; - Valid = ValidFP; - } else { - BP = SP; - Valid = ValidSP; - } - - if (Valid) { + if (HII.isValidOffset(Opc, RealOffset)) { MI.getOperand(FIOp).ChangeToRegister(BP, false); MI.getOperand(FIOp+1).ChangeToImmediate(RealOffset); return; @@ -223,8 +173,8 @@ unsigned HexagonRegisterInfo::getFrameRegister(const MachineFunction &MF) const { const HexagonFrameLowering *TFI = getFrameLowering(MF); if (TFI->hasFP(MF)) - return Hexagon::R30; - return Hexagon::R29; + return getFrameRegister(); + return getStackRegister(); } @@ -238,17 +188,9 @@ unsigned HexagonRegisterInfo::getStackRegister() const { } -bool -HexagonRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const { - const HexagonFrameLowering *TFI = getFrameLowering(MF); - return TFI->hasFP(MF); -} - - -bool -HexagonRegisterInfo::needsStackRealignment(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - return MFI->getMaxAlignment() > 8; +bool HexagonRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) + const { + return MF.getSubtarget<HexagonSubtarget>().getFrameLowering()->hasFP(MF); } diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h index 7edefee93993..db7e0f27815d 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h +++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h @@ -63,8 +63,6 @@ public: return true; } - bool needsStackRealignment(const MachineFunction &MF) const override; - /// Returns true if the frame pointer is valid. bool useFPForScavengingIndex(const MachineFunction &MF) const override; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td index edf1c251ac77..81629dc6d47f 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td @@ -53,6 +53,12 @@ let Namespace = "Hexagon" in { let Num = num; } + + // Rq - vector predicate registers + class Rq<bits<3> num, string n> : Register<n, []> { + let HWEncoding{2-0} = num; + } + // Rc - control registers class Rc<bits<5> num, string n, list<string> alt = [], list<Register> alias = []> : @@ -131,20 +137,21 @@ let Namespace = "Hexagon" in { def LC1 : Rc<3, "lc1", ["c3"]>, DwarfRegNum<[70]>; def P3_0 : Rc<4, "p3:0", ["c4"], [P0, P1, P2, P3]>, DwarfRegNum<[71]>; - def C6 : Rc<6, "c6", [], [M0]>, DwarfRegNum<[72]>; - def C7 : Rc<7, "c7", [], [M1]>, DwarfRegNum<[73]>; + def C5 : Rc<5, "c5", ["c5"]>, DwarfRegNum<[72]>; // future use + def C6 : Rc<6, "c6", [], [M0]>, DwarfRegNum<[73]>; + def C7 : Rc<7, "c7", [], [M1]>, DwarfRegNum<[74]>; - def USR : Rc<8, "usr", ["c8"]>, DwarfRegNum<[74]> { + def USR : Rc<8, "usr", ["c8"]>, DwarfRegNum<[75]> { let SubRegIndices = [subreg_overflow]; let SubRegs = [USR_OVF]; } - def PC : Rc<9, "pc">, DwarfRegNum<[75]>; - def UGP : Rc<10, "ugp", ["c10"]>, DwarfRegNum<[76]>; - def GP : Rc<11, "gp">, DwarfRegNum<[77]>; - def CS0 : Rc<12, "cs0", ["c12"]>, DwarfRegNum<[78]>; - def CS1 : Rc<13, "cs1", ["c13"]>, DwarfRegNum<[79]>; - def UPCL : Rc<14, "upcyclelo", ["c14"]>, DwarfRegNum<[80]>; - def UPCH : Rc<15, "upcyclehi", ["c15"]>, DwarfRegNum<[81]>; + def PC : Rc<9, "pc">, DwarfRegNum<[76]>; + def UGP : Rc<10, "ugp", ["c10"]>, DwarfRegNum<[77]>; + def GP : Rc<11, "gp">, DwarfRegNum<[78]>; + def CS0 : Rc<12, "cs0", ["c12"]>, DwarfRegNum<[79]>; + def CS1 : Rc<13, "cs1", ["c13"]>, DwarfRegNum<[80]>; + def UPCL : Rc<14, "upcyclelo", ["c14"]>, DwarfRegNum<[81]>; + def UPCH : Rc<15, "upcyclehi", ["c15"]>, DwarfRegNum<[82]>; } // Control registers pairs. @@ -158,6 +165,36 @@ let Namespace = "Hexagon" in { def UPC : Rcc<14, "c15:14", [UPCL, UPCH]>, DwarfRegNum<[80]>; } + foreach i = 0-31 in { + def V#i : Ri<i, "v"#i>, DwarfRegNum<[!add(i, 99)]>; + } + + // Aliases of the V* registers used to hold double vec values. + let SubRegIndices = [subreg_loreg, subreg_hireg], CoveredBySubRegs = 1 in { + def W0 : Rd< 0, "v1:0", [V0, V1]>, DwarfRegNum<[99]>; + def W1 : Rd< 2, "v3:2", [V2, V3]>, DwarfRegNum<[101]>; + def W2 : Rd< 4, "v5:4", [V4, V5]>, DwarfRegNum<[103]>; + def W3 : Rd< 6, "v7:6", [V6, V7]>, DwarfRegNum<[105]>; + def W4 : Rd< 8, "v9:8", [V8, V9]>, DwarfRegNum<[107]>; + def W5 : Rd<10, "v11:10", [V10, V11]>, DwarfRegNum<[109]>; + def W6 : Rd<12, "v13:12", [V12, V13]>, DwarfRegNum<[111]>; + def W7 : Rd<14, "v15:14", [V14, V15]>, DwarfRegNum<[113]>; + def W8 : Rd<16, "v17:16", [V16, V17]>, DwarfRegNum<[115]>; + def W9 : Rd<18, "v19:18", [V18, V19]>, DwarfRegNum<[117]>; + def W10 : Rd<20, "v21:20", [V20, V21]>, DwarfRegNum<[119]>; + def W11 : Rd<22, "v23:22", [V22, V23]>, DwarfRegNum<[121]>; + def W12 : Rd<24, "v25:24", [V24, V25]>, DwarfRegNum<[123]>; + def W13 : Rd<26, "v27:26", [V26, V27]>, DwarfRegNum<[125]>; + def W14 : Rd<28, "v29:28", [V28, V29]>, DwarfRegNum<[127]>; + def W15 : Rd<30, "v31:30", [V30, V31]>, DwarfRegNum<[129]>; + } + + // Vector Predicate registers. + def Q0 : Rq<0, "q0">, DwarfRegNum<[131]>; + def Q1 : Rq<1, "q1">, DwarfRegNum<[132]>; + def Q2 : Rq<2, "q2">, DwarfRegNum<[133]>; + def Q3 : Rq<3, "q3">, DwarfRegNum<[134]>; + // Register classes. // // FIXME: the register order should be defined in terms of the preferred @@ -169,10 +206,34 @@ def IntRegs : RegisterClass<"Hexagon", [i32, f32, v4i8, v2i16], 32, R10, R11, R29, R30, R31)> { } +// Registers are listed in reverse order for allocation preference reasons. +def IntRegsLow8 : RegisterClass<"Hexagon", [i32], 32, + (add R7, R6, R5, R4, R3, R2, R1, R0)> ; + def DoubleRegs : RegisterClass<"Hexagon", [i64, f64, v8i8, v4i16, v2i32], 64, (add (sequence "D%u", 0, 4), (sequence "D%u", 6, 13), D5, D14, D15)>; +def VectorRegs : RegisterClass<"Hexagon", [v64i8, v32i16, v16i32, v8i64], 512, + (add (sequence "V%u", 0, 31))>; + +def VecDblRegs : RegisterClass<"Hexagon", + [v128i8, v64i16, v32i32, v16i64], 1024, + (add (sequence "W%u", 0, 15))>; + +def VectorRegs128B : RegisterClass<"Hexagon", + [v128i8, v64i16, v32i32, v16i64], 1024, + (add (sequence "V%u", 0, 31))>; + +def VecDblRegs128B : RegisterClass<"Hexagon", + [v256i8,v128i16,v64i32,v32i64], 2048, + (add (sequence "W%u", 0, 15))>; + +def VecPredRegs : RegisterClass<"Hexagon", [v512i1], 512, + (add (sequence "Q%u", 0, 3))>; + +def VecPredRegs128B : RegisterClass<"Hexagon", [v1024i1], 1024, + (add (sequence "Q%u", 0, 3))>; def PredRegs : RegisterClass<"Hexagon", [i1, v2i1, v4i1, v8i1, v4i8, v2i16, i32], 32, diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp deleted file mode 100644 index 7069ad36e21a..000000000000 --- a/contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp +++ /dev/null @@ -1,91 +0,0 @@ -//===- HexagonRemoveExtendArgs.cpp - Remove unnecessary argument sign extends // -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Pass that removes sign extends for function parameters. These parameters -// are already sign extended by the caller per Hexagon's ABI -// -//===----------------------------------------------------------------------===// - -#include "Hexagon.h" -#include "HexagonTargetMachine.h" -#include "llvm/CodeGen/MachineFunctionAnalysis.h" -#include "llvm/CodeGen/StackProtector.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" -#include "llvm/Pass.h" -#include "llvm/Transforms/Scalar.h" - -using namespace llvm; - -namespace llvm { - FunctionPass *createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM); - void initializeHexagonRemoveExtendArgsPass(PassRegistry&); -} - -namespace { - struct HexagonRemoveExtendArgs : public FunctionPass { - public: - static char ID; - HexagonRemoveExtendArgs() : FunctionPass(ID) { - initializeHexagonRemoveExtendArgsPass(*PassRegistry::getPassRegistry()); - } - bool runOnFunction(Function &F) override; - - const char *getPassName() const override { - return "Remove sign extends"; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<MachineFunctionAnalysis>(); - AU.addPreserved<MachineFunctionAnalysis>(); - AU.addPreserved<StackProtector>(); - FunctionPass::getAnalysisUsage(AU); - } - }; -} - -char HexagonRemoveExtendArgs::ID = 0; - -INITIALIZE_PASS(HexagonRemoveExtendArgs, "reargs", - "Remove Sign and Zero Extends for Args", false, false) - -bool HexagonRemoveExtendArgs::runOnFunction(Function &F) { - unsigned Idx = 1; - for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE; - ++AI, ++Idx) { - if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) { - Argument* Arg = AI; - if (!isa<PointerType>(Arg->getType())) { - for (auto UI = Arg->user_begin(); UI != Arg->user_end();) { - if (isa<SExtInst>(*UI)) { - Instruction* I = cast<Instruction>(*UI); - SExtInst* SI = new SExtInst(Arg, I->getType()); - assert (EVT::getEVT(SI->getType()) == - (EVT::getEVT(I->getType()))); - ++UI; - I->replaceAllUsesWith(SI); - Instruction* First = F.getEntryBlock().begin(); - SI->insertBefore(First); - I->eraseFromParent(); - } else { - ++UI; - } - } - } - } - } - return true; -} - - - -FunctionPass* -llvm::createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM) { - return new HexagonRemoveExtendArgs(); -} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td b/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td index 528cafc2bfea..6e4987b7e4e3 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td @@ -13,6 +13,12 @@ include "HexagonScheduleV4.td" +// V55 Machine Info + +include "HexagonScheduleV55.td" + //===----------------------------------------------------------------------===// -// V4 Machine Info - +// V60 Machine Info - //===----------------------------------------------------------------------===// + +include "HexagonScheduleV60.td" + diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td index a7d2d4724d0b..67af147b25b3 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td @@ -35,10 +35,11 @@ def SLOT_ENDLOOP: FuncUnit; // Itinerary classes. def PSEUDO : InstrItinClass; -def PSEUDOM : InstrItinClass; +def PSEUDOM : InstrItinClass; // ALU64/M/S Instruction classes of V2 are collectively knownn as XTYPE in V4. def DUPLEX : InstrItinClass; def PREFIX : InstrItinClass; +def COMPOUND_CJ_ARCHDEPSLOT : InstrItinClass; def COMPOUND : InstrItinClass; def ALU32_2op_tc_1_SLOT0123 : InstrItinClass; @@ -58,6 +59,7 @@ def CR_tc_2early_SLOT3 : InstrItinClass; def CR_tc_3x_SLOT23 : InstrItinClass; def CR_tc_3x_SLOT3 : InstrItinClass; def J_tc_2early_SLOT23 : InstrItinClass; +def J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT : InstrItinClass; def J_tc_2early_SLOT2 : InstrItinClass; def LD_tc_ld_SLOT01 : InstrItinClass; def LD_tc_ld_SLOT0 : InstrItinClass; @@ -91,6 +93,7 @@ def V4LDST_tc_st_SLOT0 : InstrItinClass; def V4LDST_tc_st_SLOT01 : InstrItinClass; def J_tc_2early_SLOT0123 : InstrItinClass; def EXTENDER_tc_1_SLOT0123 : InstrItinClass; +def S_3op_tc_3stall_SLOT23 : InstrItinClass; def HexagonItinerariesV4 : diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV55.td b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV55.td new file mode 100644 index 000000000000..d9ad25d4cd5a --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV55.td @@ -0,0 +1,170 @@ +//=-HexagonScheduleV4.td - HexagonV4 Scheduling Definitions --*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// There are four SLOTS (four parallel pipelines) in Hexagon V4 machine. +// This file describes that machine information. + +// +// |===========|==================================================| +// | PIPELINE | Instruction Classes | +// |===========|==================================================| +// | SLOT0 | LD ST ALU32 MEMOP NV SYSTEM | +// |-----------|--------------------------------------------------| +// | SLOT1 | LD ST ALU32 | +// |-----------|--------------------------------------------------| +// | SLOT2 | XTYPE ALU32 J JR | +// |-----------|--------------------------------------------------| +// | SLOT3 | XTYPE ALU32 J CR | +// |===========|==================================================| + +def CJ_tc_1_SLOT23 : InstrItinClass; +def CJ_tc_2early_SLOT23 : InstrItinClass; +def COPROC_VMEM_vtc_long_SLOT01 : InstrItinClass; +def COPROC_VX_vtc_long_SLOT23 : InstrItinClass; +def COPROC_VX_vtc_SLOT23 : InstrItinClass; +def J_tc_3stall_SLOT2 : InstrItinClass; +def MAPPING_tc_1_SLOT0123 : InstrItinClass; +def M_tc_3stall_SLOT23 : InstrItinClass; +def SUBINSN_tc_1_SLOT01 : InstrItinClass; +def SUBINSN_tc_2early_SLOT0 : InstrItinClass; +def SUBINSN_tc_2early_SLOT01 : InstrItinClass; +def SUBINSN_tc_3stall_SLOT0 : InstrItinClass; +def SUBINSN_tc_ld_SLOT0 : InstrItinClass; +def SUBINSN_tc_ld_SLOT01 : InstrItinClass; +def SUBINSN_tc_st_SLOT01 : InstrItinClass; + +def HexagonItinerariesV55 : + ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP], [], [ + // ALU32 + InstrItinData<ALU32_2op_tc_1_SLOT0123 , + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<ALU32_2op_tc_2early_SLOT0123, + [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<ALU32_3op_tc_1_SLOT0123 , + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<ALU32_3op_tc_2_SLOT0123 , + [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<ALU32_3op_tc_2early_SLOT0123, + [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<ALU32_ADDI_tc_1_SLOT0123 , + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + + // ALU64 + InstrItinData<ALU64_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<ALU64_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>, + InstrItinData<ALU64_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>, + InstrItinData<ALU64_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>, + + // CR -> System + InstrItinData<CR_tc_2_SLOT3 , [InstrStage<2, [SLOT3]>]>, + InstrItinData<CR_tc_2early_SLOT3 , [InstrStage<2, [SLOT3]>]>, + InstrItinData<CR_tc_3x_SLOT3 , [InstrStage<3, [SLOT3]>]>, + + // Jump (conditional/unconditional/return etc) + InstrItinData<CR_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>, + InstrItinData<CR_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>, + InstrItinData<CJ_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<CJ_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>, + InstrItinData<J_tc_2early_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>, + InstrItinData<J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT , [InstrStage<1, [SLOT2, SLOT3]>]>, + + // JR + InstrItinData<J_tc_2early_SLOT2 , [InstrStage<2, [SLOT2]>]>, + InstrItinData<J_tc_3stall_SLOT2 , [InstrStage<3, [SLOT2]>]>, + + // Extender + InstrItinData<EXTENDER_tc_1_SLOT0123, + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + + // Load + InstrItinData<LD_tc_ld_SLOT01 , [InstrStage<3, [SLOT0, SLOT1]>]>, + InstrItinData<LD_tc_3or4stall_SLOT0, [InstrStage<3, [SLOT0]>]>, + InstrItinData<LD_tc_ld_SLOT0 , [InstrStage<3, [SLOT0]>]>, + + // M + InstrItinData<M_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<M_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>, + InstrItinData<M_tc_3_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>, + InstrItinData<M_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>, + InstrItinData<M_tc_3or4x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>, + InstrItinData<M_tc_3stall_SLOT23, [InstrStage<3, [SLOT2, SLOT3]>]>, + + // Store + InstrItinData<ST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>, + InstrItinData<ST_tc_3stall_SLOT0, [InstrStage<3, [SLOT0]>]>, + InstrItinData<ST_tc_ld_SLOT0 , [InstrStage<3, [SLOT0]>]>, + InstrItinData<ST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>, + + // Subinsn + InstrItinData<SUBINSN_tc_2early_SLOT0, [InstrStage<2, [SLOT0]>]>, + InstrItinData<SUBINSN_tc_3stall_SLOT0, [InstrStage<3, [SLOT0]>]>, + InstrItinData<SUBINSN_tc_ld_SLOT0 , [InstrStage<3, [SLOT0]>]>, + InstrItinData<SUBINSN_tc_1_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>, + InstrItinData<SUBINSN_tc_2early_SLOT01, + [InstrStage<2, [SLOT0, SLOT1]>]>, + InstrItinData<SUBINSN_tc_ld_SLOT01 , [InstrStage<3, [SLOT0, SLOT1]>]>, + InstrItinData<SUBINSN_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>, + + // S + InstrItinData<S_2op_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<S_2op_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>, + InstrItinData<S_2op_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>, + InstrItinData<S_2op_tc_3or4x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>, + InstrItinData<S_3op_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<S_3op_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>, + InstrItinData<S_3op_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>, + InstrItinData<S_3op_tc_3_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>, + InstrItinData<S_3op_tc_3stall_SLOT23, [InstrStage<3, [SLOT2, SLOT3]>]>, + InstrItinData<S_3op_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>, + + // New Value Compare Jump + InstrItinData<NCJ_tc_3or4stall_SLOT0, [InstrStage<3, [SLOT0]>]>, + + // Mem ops + InstrItinData<V2LDST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>, + InstrItinData<V2LDST_tc_ld_SLOT01 , [InstrStage<2, [SLOT0, SLOT1]>]>, + InstrItinData<V2LDST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>, + InstrItinData<V4LDST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>, + InstrItinData<V4LDST_tc_ld_SLOT01 , [InstrStage<3, [SLOT0, SLOT1]>]>, + InstrItinData<V4LDST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>, + + // Endloop + InstrItinData<J_tc_2early_SLOT0123, [InstrStage<2, [SLOT_ENDLOOP]>]>, + + // Vector + InstrItinData<COPROC_VMEM_vtc_long_SLOT01, + [InstrStage<3, [SLOT0, SLOT1]>]>, + InstrItinData<COPROC_VX_vtc_long_SLOT23 , + [InstrStage<3, [SLOT2, SLOT3]>]>, + InstrItinData<COPROC_VX_vtc_SLOT23 , + [InstrStage<3, [SLOT2, SLOT3]>]>, + InstrItinData<MAPPING_tc_1_SLOT0123 , + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + + // Misc + InstrItinData<COMPOUND_CJ_ARCHDEPSLOT , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<COMPOUND , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<DUPLEX , [InstrStage<1, [SLOT0]>]>, + InstrItinData<PREFIX , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<PSEUDO , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<PSEUDOM, [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [SLOT2, SLOT3]>]> + + ]>; + +def HexagonModelV55 : SchedMachineModel { + // Max issue per cycle == bundle width. + let IssueWidth = 4; + let Itineraries = HexagonItinerariesV55; + let LoadLatency = 1; +} + +//===----------------------------------------------------------------------===// +// Hexagon V4 Resource Definitions - +//===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV60.td b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV60.td new file mode 100644 index 000000000000..2ccff8242a47 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV60.td @@ -0,0 +1,310 @@ +//=-HexagonScheduleV60.td - HexagonV60 Scheduling Definitions *- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// CVI pipes from the "Hexagon Multimedia Co-Processor Extensions Arch Spec". +def CVI_ST : FuncUnit; +def CVI_XLANE : FuncUnit; +def CVI_SHIFT : FuncUnit; +def CVI_MPY0 : FuncUnit; +def CVI_MPY1 : FuncUnit; +def CVI_LD : FuncUnit; + +// Combined functional units. +def CVI_XLSHF : FuncUnit; +def CVI_MPY01 : FuncUnit; +def CVI_ALL : FuncUnit; + +// Combined functional unit data. +def HexagonComboFuncsV60 : + ComboFuncUnits<[ + ComboFuncData<CVI_XLSHF , [CVI_XLANE, CVI_SHIFT]>, + ComboFuncData<CVI_MPY01 , [CVI_MPY0, CVI_MPY1]>, + ComboFuncData<CVI_ALL , [CVI_ST, CVI_XLANE, CVI_SHIFT, + CVI_MPY0, CVI_MPY1, CVI_LD]> + ]>; + +// Note: When adding additional vector scheduling classes, add the +// corresponding methods to the class HexagonInstrInfo. +def CVI_VA : InstrItinClass; +def CVI_VA_DV : InstrItinClass; +def CVI_VX_LONG : InstrItinClass; +def CVI_VX_LATE : InstrItinClass; +def CVI_VX : InstrItinClass; +def CVI_VX_DV_LONG : InstrItinClass; +def CVI_VX_DV : InstrItinClass; +def CVI_VX_DV_SLOT2 : InstrItinClass; +def CVI_VP : InstrItinClass; +def CVI_VP_LONG : InstrItinClass; +def CVI_VP_VS_EARLY : InstrItinClass; +def CVI_VP_VS_LONG_EARLY : InstrItinClass; +def CVI_VP_VS_LONG : InstrItinClass; +def CVI_VP_VS : InstrItinClass; +def CVI_VP_DV : InstrItinClass; +def CVI_VS : InstrItinClass; +def CVI_VINLANESAT : InstrItinClass; +def CVI_VM_LD : InstrItinClass; +def CVI_VM_TMP_LD : InstrItinClass; +def CVI_VM_CUR_LD : InstrItinClass; +def CVI_VM_VP_LDU : InstrItinClass; +def CVI_VM_ST : InstrItinClass; +def CVI_VM_NEW_ST : InstrItinClass; +def CVI_VM_STU : InstrItinClass; +def CVI_HIST : InstrItinClass; +def CVI_VA_EXT : InstrItinClass; + +// There are four SLOTS (four parallel pipelines) in Hexagon V60 machine. +// This file describes that machine information. +// +// |===========|==================================================| +// | PIPELINE | Instruction Classes | +// |===========|==================================================| +// | SLOT0 | LD ST ALU32 MEMOP NV SYSTEM | +// |-----------|--------------------------------------------------| +// | SLOT1 | LD ST ALU32 | +// |-----------|--------------------------------------------------| +// | SLOT2 | XTYPE ALU32 J JR | +// |-----------|--------------------------------------------------| +// | SLOT3 | XTYPE ALU32 J CR | +// |===========|==================================================| +// +// +// In addition to using the above SLOTS, there are also six vector pipelines +// in the CVI co-processor in the Hexagon V60 machine. +// +// |=========| |=========| |=========| |=========| |=========| |=========| +// SLOT | CVI_LD | |CVI_MPY3 | |CVI_MPY2 | |CVI_SHIFT| |CVI_XLANE| | CVI_ST | +// ==== |=========| |=========| |=========| |=========| |=========| |=========| +// S0-3 | | | CVI_VA | | CVI_VA | | CVI_VA | | CVI_VA | | | +// S2-3 | | | CVI_VX | | CVI_VX | | | | | | | +// S0-3 | | | | | | | | | CVI_VP | | | +// S0-3 | | | | | | | CVI_VS | | | | | +// S0-1 |(CVI_LD) | | CVI_LD | | CVI_LD | | CVI_LD | | CVI_LD | | | +// S0-1 |(C*TMP_LD) | | | | | | | | | | +// S01 |(C*_LDU) | | | | | | | | C*_LDU | | | +// S0 | | | CVI_ST | | CVI_ST | | CVI_ST | | CVI_ST | |(CVI_ST) | +// S0 | | | | | | | | | | |(C*TMP_ST) +// S01 | | | | | | | | | VSTU | |(C*_STU) | +// |=========| |=========| |=========| |=========| |=========| |=========| +// |=====================| |=====================| +// | CVI_MPY2 & CVI_MPY3 | |CVI_XLANE & CVI_SHIFT| +// |=====================| |=====================| +// S0-3 | CVI_VA_DV | | CVI_VA_DV | +// S0-3 | | | CVI_VP_DV | +// S2-3 | CVI_VX_DV | | | +// |=====================| |=====================| +// |=====================================================================| +// S0-3 | CVI_HIST Histogram | +// S0123| CVI_VA_EXT Extract | +// |=====================================================================| + +def HexagonItinerariesV60 : + ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP, + CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1, + CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL], [], [ + // ALU32 + InstrItinData<ALU32_2op_tc_1_SLOT0123 , + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<ALU32_2op_tc_2early_SLOT0123, + [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<ALU32_3op_tc_1_SLOT0123 , + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<ALU32_3op_tc_2_SLOT0123 , + [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<ALU32_3op_tc_2early_SLOT0123, + [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<ALU32_ADDI_tc_1_SLOT0123 , + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + + // ALU64 + InstrItinData<ALU64_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<ALU64_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>, + InstrItinData<ALU64_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>, + InstrItinData<ALU64_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>, + + // CR -> System + InstrItinData<CR_tc_2_SLOT3 , [InstrStage<2, [SLOT3]>]>, + InstrItinData<CR_tc_2early_SLOT3 , [InstrStage<2, [SLOT3]>]>, + InstrItinData<CR_tc_3x_SLOT3 , [InstrStage<3, [SLOT3]>]>, + + // Jump (conditional/unconditional/return etc) + InstrItinData<CR_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>, + InstrItinData<CR_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>, + InstrItinData<CJ_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<CJ_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>, + InstrItinData<J_tc_2early_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>, + InstrItinData<J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT , [InstrStage<1, [SLOT2, SLOT3]>]>, + + // JR + InstrItinData<J_tc_2early_SLOT2 , [InstrStage<2, [SLOT2]>]>, + InstrItinData<J_tc_3stall_SLOT2 , [InstrStage<3, [SLOT2]>]>, + + // Extender + InstrItinData<EXTENDER_tc_1_SLOT0123, [InstrStage<1, + [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + + // Load + InstrItinData<LD_tc_ld_SLOT01 , [InstrStage<3, [SLOT0, SLOT1]>]>, + InstrItinData<LD_tc_3or4stall_SLOT0, [InstrStage<4, [SLOT0]>]>, + InstrItinData<LD_tc_ld_SLOT0 , [InstrStage<3, [SLOT0]>]>, + + // M + InstrItinData<M_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<M_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>, + InstrItinData<M_tc_3_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>, + InstrItinData<M_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>, + InstrItinData<M_tc_3or4x_SLOT23 , [InstrStage<4, [SLOT2, SLOT3]>]>, + InstrItinData<M_tc_3stall_SLOT23, [InstrStage<3, [SLOT2, SLOT3]>]>, + + // Store + InstrItinData<ST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>, + InstrItinData<ST_tc_3stall_SLOT0, [InstrStage<3, [SLOT0]>]>, + InstrItinData<ST_tc_ld_SLOT0 , [InstrStage<3, [SLOT0]>]>, + InstrItinData<ST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>, + + // Subinsn + InstrItinData<SUBINSN_tc_2early_SLOT0, [InstrStage<2, [SLOT0]>]>, + InstrItinData<SUBINSN_tc_3stall_SLOT0, [InstrStage<3, [SLOT0]>]>, + InstrItinData<SUBINSN_tc_ld_SLOT0 , [InstrStage<3, [SLOT0]>]>, + InstrItinData<SUBINSN_tc_1_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>, + InstrItinData<SUBINSN_tc_2early_SLOT01, + [InstrStage<2, [SLOT0, SLOT1]>]>, + InstrItinData<SUBINSN_tc_ld_SLOT01 , [InstrStage<3, [SLOT0, SLOT1]>]>, + InstrItinData<SUBINSN_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>, + + // S + InstrItinData<S_2op_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<S_2op_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>, + InstrItinData<S_2op_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>, + // The S_2op_tc_3x_SLOT23 slots are 4 cycles on v60. + InstrItinData<S_2op_tc_3or4x_SLOT23 , [InstrStage<4, [SLOT2, SLOT3]>]>, + InstrItinData<S_3op_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<S_3op_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>, + InstrItinData<S_3op_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>, + InstrItinData<S_3op_tc_3_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>, + InstrItinData<S_3op_tc_3stall_SLOT23, [InstrStage<3, [SLOT2, SLOT3]>]>, + InstrItinData<S_3op_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>, + + // New Value Compare Jump + InstrItinData<NCJ_tc_3or4stall_SLOT0, [InstrStage<4, [SLOT0]>]>, + + // Mem ops + InstrItinData<V2LDST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>, + InstrItinData<V2LDST_tc_ld_SLOT01 , [InstrStage<2, [SLOT0, SLOT1]>]>, + InstrItinData<V2LDST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>, + InstrItinData<V4LDST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>, + InstrItinData<V4LDST_tc_ld_SLOT01 , [InstrStage<3, [SLOT0, SLOT1]>]>, + InstrItinData<V4LDST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>, + + // Endloop + InstrItinData<J_tc_2early_SLOT0123, [InstrStage<2, [SLOT_ENDLOOP]>]>, + + // Vector + InstrItinData<COPROC_VMEM_vtc_long_SLOT01, + [InstrStage<3, [SLOT0, SLOT1]>]>, + InstrItinData<COPROC_VX_vtc_long_SLOT23 , + [InstrStage<3, [SLOT2, SLOT3]>]>, + InstrItinData<COPROC_VX_vtc_SLOT23 , + [InstrStage<3, [SLOT2, SLOT3]>]>, + InstrItinData<MAPPING_tc_1_SLOT0123 , + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + + // Duplex and Compound + InstrItinData<DUPLEX , [InstrStage<1, [SLOT0]>]>, + InstrItinData<COMPOUND_CJ_ARCHDEPSLOT , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<COMPOUND , [InstrStage<1, [SLOT2, SLOT3]>]>, + // Misc + InstrItinData<PREFIX , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<PSEUDO , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<PSEUDOM , [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [SLOT2, SLOT3]>]>, + + // Latest CVI spec definitions. + InstrItinData<CVI_VA,[InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>, + InstrStage<1, [CVI_XLANE,CVI_SHIFT, + CVI_MPY0, CVI_MPY1]>]>, + InstrItinData<CVI_VA_DV, + [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>, + InstrStage<1, [CVI_XLSHF, CVI_MPY01]>]>, + InstrItinData<CVI_VX_LONG, [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>]>, + InstrItinData<CVI_VX_LATE, [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>]>, + InstrItinData<CVI_VX,[InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>]>, + InstrItinData<CVI_VX_DV_LONG, + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01]>]>, + InstrItinData<CVI_VX_DV, + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01]>]>, + InstrItinData<CVI_VX_DV_SLOT2, + [InstrStage<1, [SLOT2], 0>, + InstrStage<1, [CVI_MPY01]>]>, + InstrItinData<CVI_VP, [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>, + InstrStage<1, [CVI_XLANE]>]>, + InstrItinData<CVI_VP_LONG, [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>, + InstrStage<1, [CVI_XLANE]>]>, + InstrItinData<CVI_VP_VS_EARLY, + [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>, + InstrStage<1, [CVI_XLSHF]>]>, + InstrItinData<CVI_VP_VS_LONG, + [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>, + InstrStage<1, [CVI_XLSHF]>]>, + InstrItinData<CVI_VP_VS, + [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>, + InstrStage<1, [CVI_XLSHF]>]>, + InstrItinData<CVI_VP_VS_LONG_EARLY, + [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>, + InstrStage<1, [CVI_XLSHF]>]>, + InstrItinData<CVI_VP_DV , [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>, + InstrStage<1, [CVI_XLSHF]>]>, + InstrItinData<CVI_VS, + [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>, + InstrStage<1, [CVI_SHIFT]>]>, + InstrItinData<CVI_VINLANESAT, + [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>, + InstrStage<1, [CVI_SHIFT]>]>, + InstrItinData<CVI_VM_LD , [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_XLANE, CVI_SHIFT, + CVI_MPY0, CVI_MPY1]>]>, + InstrItinData<CVI_VM_TMP_LD,[InstrStage<1,[SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD]>]>, + InstrItinData<CVI_VM_CUR_LD,[InstrStage<1,[SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_XLANE, CVI_SHIFT, + CVI_MPY0, CVI_MPY1]>]>, + InstrItinData<CVI_VM_VP_LDU,[InstrStage<1,[SLOT0], 0>, + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_XLANE]>]>, + InstrItinData<CVI_VM_ST , [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE, CVI_SHIFT, + CVI_MPY0, CVI_MPY1]>]>, + InstrItinData<CVI_VM_NEW_ST,[InstrStage<1,[SLOT0], 0>, + InstrStage<1, [CVI_ST]>]>, + InstrItinData<CVI_VM_STU , [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>]>, + InstrItinData<CVI_HIST , [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>, + InstrStage<1, [CVI_ALL]>]> + ]>; + +def HexagonModelV60 : SchedMachineModel { + // Max issue per cycle == bundle width. + let IssueWidth = 4; + let Itineraries = HexagonItinerariesV60; + let LoadLatency = 1; +} + +//===----------------------------------------------------------------------===// +// Hexagon V60 Resource Definitions - +//===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp index 276cc69eed0f..239dbda8f27b 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp @@ -12,12 +12,11 @@ //===----------------------------------------------------------------------===// #include "HexagonTargetMachine.h" +#include "llvm/CodeGen/SelectionDAG.h" using namespace llvm; #define DEBUG_TYPE "hexagon-selectiondag-info" -bool llvm::flag_aligned_memcpy; - SDValue HexagonSelectionDAGInfo:: EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain, @@ -25,15 +24,40 @@ EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { - flag_aligned_memcpy = false; - if ((Align & 0x3) == 0) { - ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); - if (ConstantSize) { - uint64_t SizeVal = ConstantSize->getZExtValue(); - if ((SizeVal > 32) && ((SizeVal % 8) == 0)) - flag_aligned_memcpy = true; - } - } - - return SDValue(); + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (AlwaysInline || (Align & 0x3) != 0 || !ConstantSize) + return SDValue(); + + uint64_t SizeVal = ConstantSize->getZExtValue(); + if (SizeVal < 32 || (SizeVal % 8) != 0) + return SDValue(); + + // Special case aligned memcpys with size >= 32 bytes and a multiple of 8. + // + const TargetLowering &TLI = *DAG.getSubtarget().getTargetLowering(); + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); + Entry.Node = Dst; + Args.push_back(Entry); + Entry.Node = Src; + Args.push_back(Entry); + Entry.Node = Size; + Args.push_back(Entry); + + const char *SpecialMemcpyName = + "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes"; + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl) + .setChain(Chain) + .setCallee(TLI.getLibcallCallingConv(RTLIB::MEMCPY), + Type::getVoidTy(*DAG.getContext()), + DAG.getTargetExternalSymbol( + SpecialMemcpyName, TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args), 0) + .setDiscardResult(); + + std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); + return CallResult.second; } diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp index d3eb56f4ba0f..10fe606985dd 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp @@ -81,7 +81,7 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) { // Loop over all of the basic blocks for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end(); MBBb != MBBe; ++MBBb) { - MachineBasicBlock* MBB = MBBb; + MachineBasicBlock *MBB = &*MBBb; // Traverse the basic block MachineBasicBlock::iterator MII = MBB->begin(); MachineBasicBlock::iterator MIE = MBB->end (); diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp new file mode 100644 index 000000000000..d4e95b0d0210 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp @@ -0,0 +1,1209 @@ +//===--- HexagonSplitDouble.cpp -------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "hsdr" + +#include "HexagonRegisterInfo.h" +#include "HexagonTargetMachine.h" + +#include "llvm/Pass.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" + +#include <map> +#include <set> +#include <vector> + +using namespace llvm; + +namespace llvm { + FunctionPass *createHexagonSplitDoubleRegs(); + void initializeHexagonSplitDoubleRegsPass(PassRegistry&); +} + +namespace { + static cl::opt<int> MaxHSDR("max-hsdr", cl::Hidden, cl::init(-1), + cl::desc("Maximum number of split partitions")); + static cl::opt<bool> MemRefsFixed("hsdr-no-mem", cl::Hidden, cl::init(true), + cl::desc("Do not split loads or stores")); + + class HexagonSplitDoubleRegs : public MachineFunctionPass { + public: + static char ID; + HexagonSplitDoubleRegs() : MachineFunctionPass(ID), TRI(nullptr), + TII(nullptr) { + initializeHexagonSplitDoubleRegsPass(*PassRegistry::getPassRegistry()); + } + const char *getPassName() const override { + return "Hexagon Split Double Registers"; + } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + bool runOnMachineFunction(MachineFunction &MF) override; + + private: + static const TargetRegisterClass *const DoubleRC; + + const HexagonRegisterInfo *TRI; + const HexagonInstrInfo *TII; + const MachineLoopInfo *MLI; + MachineRegisterInfo *MRI; + + typedef std::set<unsigned> USet; + typedef std::map<unsigned,USet> UUSetMap; + typedef std::pair<unsigned,unsigned> UUPair; + typedef std::map<unsigned,UUPair> UUPairMap; + typedef std::map<const MachineLoop*,USet> LoopRegMap; + + bool isInduction(unsigned Reg, LoopRegMap &IRM) const; + bool isVolatileInstr(const MachineInstr *MI) const; + bool isFixedInstr(const MachineInstr *MI) const; + void partitionRegisters(UUSetMap &P2Rs); + int32_t profit(const MachineInstr *MI) const; + bool isProfitable(const USet &Part, LoopRegMap &IRM) const; + + void collectIndRegsForLoop(const MachineLoop *L, USet &Rs); + void collectIndRegs(LoopRegMap &IRM); + + void createHalfInstr(unsigned Opc, MachineInstr *MI, + const UUPairMap &PairMap, unsigned SubR); + void splitMemRef(MachineInstr *MI, const UUPairMap &PairMap); + void splitImmediate(MachineInstr *MI, const UUPairMap &PairMap); + void splitCombine(MachineInstr *MI, const UUPairMap &PairMap); + void splitExt(MachineInstr *MI, const UUPairMap &PairMap); + void splitShift(MachineInstr *MI, const UUPairMap &PairMap); + void splitAslOr(MachineInstr *MI, const UUPairMap &PairMap); + bool splitInstr(MachineInstr *MI, const UUPairMap &PairMap); + void replaceSubregUses(MachineInstr *MI, const UUPairMap &PairMap); + void collapseRegPairs(MachineInstr *MI, const UUPairMap &PairMap); + bool splitPartition(const USet &Part); + + static int Counter; + static void dump_partition(raw_ostream&, const USet&, + const TargetRegisterInfo&); + }; + char HexagonSplitDoubleRegs::ID; + int HexagonSplitDoubleRegs::Counter = 0; + const TargetRegisterClass *const HexagonSplitDoubleRegs::DoubleRC + = &Hexagon::DoubleRegsRegClass; +} + +INITIALIZE_PASS(HexagonSplitDoubleRegs, "hexagon-split-double", + "Hexagon Split Double Registers", false, false) + + +static inline uint32_t getRegState(const MachineOperand &R) { + assert(R.isReg()); + return getDefRegState(R.isDef()) | + getImplRegState(R.isImplicit()) | + getKillRegState(R.isKill()) | + getDeadRegState(R.isDead()) | + getUndefRegState(R.isUndef()) | + getInternalReadRegState(R.isInternalRead()) | + (R.isDebug() ? RegState::Debug : 0); +} + + +void HexagonSplitDoubleRegs::dump_partition(raw_ostream &os, + const USet &Part, const TargetRegisterInfo &TRI) { + dbgs() << '{'; + for (auto I : Part) + dbgs() << ' ' << PrintReg(I, &TRI); + dbgs() << " }"; +} + + +bool HexagonSplitDoubleRegs::isInduction(unsigned Reg, LoopRegMap &IRM) const { + for (auto I : IRM) { + const USet &Rs = I.second; + if (Rs.find(Reg) != Rs.end()) + return true; + } + return false; +} + + +bool HexagonSplitDoubleRegs::isVolatileInstr(const MachineInstr *MI) const { + for (auto &I : MI->memoperands()) + if (I->isVolatile()) + return true; + return false; +} + + +bool HexagonSplitDoubleRegs::isFixedInstr(const MachineInstr *MI) const { + if (MI->mayLoad() || MI->mayStore()) + if (MemRefsFixed || isVolatileInstr(MI)) + return true; + if (MI->isDebugValue()) + return false; + + unsigned Opc = MI->getOpcode(); + switch (Opc) { + default: + return true; + + case TargetOpcode::PHI: + case TargetOpcode::COPY: + break; + + case Hexagon::L2_loadrd_io: + // Not handling stack stores (only reg-based addresses). + if (MI->getOperand(1).isReg()) + break; + return true; + case Hexagon::S2_storerd_io: + // Not handling stack stores (only reg-based addresses). + if (MI->getOperand(0).isReg()) + break; + return true; + case Hexagon::L2_loadrd_pi: + case Hexagon::S2_storerd_pi: + + case Hexagon::A2_tfrpi: + case Hexagon::A2_combineii: + case Hexagon::A4_combineir: + case Hexagon::A4_combineii: + case Hexagon::A4_combineri: + case Hexagon::A2_combinew: + case Hexagon::CONST64_Int_Real: + + case Hexagon::A2_sxtw: + + case Hexagon::A2_andp: + case Hexagon::A2_orp: + case Hexagon::A2_xorp: + case Hexagon::S2_asl_i_p_or: + case Hexagon::S2_asl_i_p: + case Hexagon::S2_asr_i_p: + case Hexagon::S2_lsr_i_p: + break; + } + + for (auto &Op : MI->operands()) { + if (!Op.isReg()) + continue; + unsigned R = Op.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(R)) + return true; + } + return false; +} + + +void HexagonSplitDoubleRegs::partitionRegisters(UUSetMap &P2Rs) { + typedef std::map<unsigned,unsigned> UUMap; + typedef std::vector<unsigned> UVect; + + unsigned NumRegs = MRI->getNumVirtRegs(); + BitVector DoubleRegs(NumRegs); + for (unsigned i = 0; i < NumRegs; ++i) { + unsigned R = TargetRegisterInfo::index2VirtReg(i); + if (MRI->getRegClass(R) == DoubleRC) + DoubleRegs.set(i); + } + + BitVector FixedRegs(NumRegs); + for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) { + unsigned R = TargetRegisterInfo::index2VirtReg(x); + MachineInstr *DefI = MRI->getVRegDef(R); + // In some cases a register may exist, but never be defined or used. + // It should never appear anywhere, but mark it as "fixed", just to be + // safe. + if (!DefI || isFixedInstr(DefI)) + FixedRegs.set(x); + } + + UUSetMap AssocMap; + for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) { + if (FixedRegs[x]) + continue; + unsigned R = TargetRegisterInfo::index2VirtReg(x); + DEBUG(dbgs() << PrintReg(R, TRI) << " ~~"); + USet &Asc = AssocMap[R]; + for (auto U = MRI->use_nodbg_begin(R), Z = MRI->use_nodbg_end(); + U != Z; ++U) { + MachineOperand &Op = *U; + MachineInstr *UseI = Op.getParent(); + if (isFixedInstr(UseI)) + continue; + for (unsigned i = 0, n = UseI->getNumOperands(); i < n; ++i) { + MachineOperand &MO = UseI->getOperand(i); + // Skip non-registers or registers with subregisters. + if (&MO == &Op || !MO.isReg() || MO.getSubReg()) + continue; + unsigned T = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(T)) { + FixedRegs.set(x); + continue; + } + if (MRI->getRegClass(T) != DoubleRC) + continue; + unsigned u = TargetRegisterInfo::virtReg2Index(T); + if (FixedRegs[u]) + continue; + DEBUG(dbgs() << ' ' << PrintReg(T, TRI)); + Asc.insert(T); + // Make it symmetric. + AssocMap[T].insert(R); + } + } + DEBUG(dbgs() << '\n'); + } + + UUMap R2P; + unsigned NextP = 1; + USet Visited; + for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) { + unsigned R = TargetRegisterInfo::index2VirtReg(x); + if (Visited.count(R)) + continue; + // Create a new partition for R. + unsigned ThisP = FixedRegs[x] ? 0 : NextP++; + UVect WorkQ; + WorkQ.push_back(R); + for (unsigned i = 0; i < WorkQ.size(); ++i) { + unsigned T = WorkQ[i]; + if (Visited.count(T)) + continue; + R2P[T] = ThisP; + Visited.insert(T); + // Add all registers associated with T. + USet &Asc = AssocMap[T]; + for (USet::iterator J = Asc.begin(), F = Asc.end(); J != F; ++J) + WorkQ.push_back(*J); + } + } + + for (auto I : R2P) + P2Rs[I.second].insert(I.first); +} + + +static inline int32_t profitImm(unsigned Lo, unsigned Hi) { + int32_t P = 0; + bool LoZ1 = false, HiZ1 = false; + if (Lo == 0 || Lo == 0xFFFFFFFF) + P += 10, LoZ1 = true; + if (Hi == 0 || Hi == 0xFFFFFFFF) + P += 10, HiZ1 = true; + if (!LoZ1 && !HiZ1 && Lo == Hi) + P += 3; + return P; +} + + +int32_t HexagonSplitDoubleRegs::profit(const MachineInstr *MI) const { + unsigned ImmX = 0; + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case TargetOpcode::PHI: + for (const auto &Op : MI->operands()) + if (!Op.getSubReg()) + return 0; + return 10; + case TargetOpcode::COPY: + if (MI->getOperand(1).getSubReg() != 0) + return 10; + return 0; + + case Hexagon::L2_loadrd_io: + case Hexagon::S2_storerd_io: + return -1; + case Hexagon::L2_loadrd_pi: + case Hexagon::S2_storerd_pi: + return 2; + + case Hexagon::A2_tfrpi: + case Hexagon::CONST64_Int_Real: { + uint64_t D = MI->getOperand(1).getImm(); + unsigned Lo = D & 0xFFFFFFFFULL; + unsigned Hi = D >> 32; + return profitImm(Lo, Hi); + } + case Hexagon::A2_combineii: + case Hexagon::A4_combineii: + return profitImm(MI->getOperand(1).getImm(), + MI->getOperand(2).getImm()); + case Hexagon::A4_combineri: + ImmX++; + case Hexagon::A4_combineir: { + ImmX++; + int64_t V = MI->getOperand(ImmX).getImm(); + if (V == 0 || V == -1) + return 10; + // Fall through into A2_combinew. + } + case Hexagon::A2_combinew: + return 2; + + case Hexagon::A2_sxtw: + return 3; + + case Hexagon::A2_andp: + case Hexagon::A2_orp: + case Hexagon::A2_xorp: + return 1; + + case Hexagon::S2_asl_i_p_or: { + unsigned S = MI->getOperand(3).getImm(); + if (S == 0 || S == 32) + return 10; + return -1; + } + case Hexagon::S2_asl_i_p: + case Hexagon::S2_asr_i_p: + case Hexagon::S2_lsr_i_p: + unsigned S = MI->getOperand(2).getImm(); + if (S == 0 || S == 32) + return 10; + if (S == 16) + return 5; + if (S == 48) + return 7; + return -10; + } + + return 0; +} + + +bool HexagonSplitDoubleRegs::isProfitable(const USet &Part, LoopRegMap &IRM) + const { + unsigned FixedNum = 0, SplitNum = 0, LoopPhiNum = 0; + int32_t TotalP = 0; + + for (unsigned DR : Part) { + MachineInstr *DefI = MRI->getVRegDef(DR); + int32_t P = profit(DefI); + if (P == INT_MIN) + return false; + TotalP += P; + // Reduce the profitability of splitting induction registers. + if (isInduction(DR, IRM)) + TotalP -= 30; + + for (auto U = MRI->use_nodbg_begin(DR), W = MRI->use_nodbg_end(); + U != W; ++U) { + MachineInstr *UseI = U->getParent(); + if (isFixedInstr(UseI)) { + FixedNum++; + // Calculate the cost of generating REG_SEQUENCE instructions. + for (auto &Op : UseI->operands()) { + if (Op.isReg() && Part.count(Op.getReg())) + if (Op.getSubReg()) + TotalP -= 2; + } + continue; + } + // If a register from this partition is used in a fixed instruction, + // and there is also a register in this partition that is used in + // a loop phi node, then decrease the splitting profit as this can + // confuse the modulo scheduler. + if (UseI->isPHI()) { + const MachineBasicBlock *PB = UseI->getParent(); + const MachineLoop *L = MLI->getLoopFor(PB); + if (L && L->getHeader() == PB) + LoopPhiNum++; + } + // Splittable instruction. + SplitNum++; + int32_t P = profit(UseI); + if (P == INT_MIN) + return false; + TotalP += P; + } + } + + if (FixedNum > 0 && LoopPhiNum > 0) + TotalP -= 20*LoopPhiNum; + + DEBUG(dbgs() << "Partition profit: " << TotalP << '\n'); + return TotalP > 0; +} + + +void HexagonSplitDoubleRegs::collectIndRegsForLoop(const MachineLoop *L, + USet &Rs) { + const MachineBasicBlock *HB = L->getHeader(); + const MachineBasicBlock *LB = L->getLoopLatch(); + if (!HB || !LB) + return; + + // Examine the latch branch. Expect it to be a conditional branch to + // the header (either "br-cond header" or "br-cond exit; br header"). + MachineBasicBlock *TB = 0, *FB = 0; + MachineBasicBlock *TmpLB = const_cast<MachineBasicBlock*>(LB); + SmallVector<MachineOperand,2> Cond; + bool BadLB = TII->AnalyzeBranch(*TmpLB, TB, FB, Cond, false); + // Only analyzable conditional branches. HII::AnalyzeBranch will put + // the branch opcode as the first element of Cond, and the predicate + // operand as the second. + if (BadLB || Cond.size() != 2) + return; + // Only simple jump-conditional (with or without negation). + if (!TII->PredOpcodeHasJMP_c(Cond[0].getImm())) + return; + // Must go to the header. + if (TB != HB && FB != HB) + return; + assert(Cond[1].isReg() && "Unexpected Cond vector from AnalyzeBranch"); + // Expect a predicate register. + unsigned PR = Cond[1].getReg(); + assert(MRI->getRegClass(PR) == &Hexagon::PredRegsRegClass); + + // Get the registers on which the loop controlling compare instruction + // depends. + unsigned CmpR1 = 0, CmpR2 = 0; + const MachineInstr *CmpI = MRI->getVRegDef(PR); + while (CmpI->getOpcode() == Hexagon::C2_not) + CmpI = MRI->getVRegDef(CmpI->getOperand(1).getReg()); + + int Mask = 0, Val = 0; + bool OkCI = TII->analyzeCompare(CmpI, CmpR1, CmpR2, Mask, Val); + if (!OkCI) + return; + // Eliminate non-double input registers. + if (CmpR1 && MRI->getRegClass(CmpR1) != DoubleRC) + CmpR1 = 0; + if (CmpR2 && MRI->getRegClass(CmpR2) != DoubleRC) + CmpR2 = 0; + if (!CmpR1 && !CmpR2) + return; + + // Now examine the top of the loop: the phi nodes that could poten- + // tially define loop induction registers. The registers defined by + // such a phi node would be used in a 64-bit add, which then would + // be used in the loop compare instruction. + + // Get the set of all double registers defined by phi nodes in the + // loop header. + typedef std::vector<unsigned> UVect; + UVect DP; + for (auto &MI : *HB) { + if (!MI.isPHI()) + break; + const MachineOperand &MD = MI.getOperand(0); + unsigned R = MD.getReg(); + if (MRI->getRegClass(R) == DoubleRC) + DP.push_back(R); + } + if (DP.empty()) + return; + + auto NoIndOp = [this, CmpR1, CmpR2] (unsigned R) -> bool { + for (auto I = MRI->use_nodbg_begin(R), E = MRI->use_nodbg_end(); + I != E; ++I) { + const MachineInstr *UseI = I->getParent(); + if (UseI->getOpcode() != Hexagon::A2_addp) + continue; + // Get the output from the add. If it is one of the inputs to the + // loop-controlling compare instruction, then R is likely an induc- + // tion register. + unsigned T = UseI->getOperand(0).getReg(); + if (T == CmpR1 || T == CmpR2) + return false; + } + return true; + }; + UVect::iterator End = std::remove_if(DP.begin(), DP.end(), NoIndOp); + Rs.insert(DP.begin(), End); + Rs.insert(CmpR1); + Rs.insert(CmpR2); + + DEBUG({ + dbgs() << "For loop at BB#" << HB->getNumber() << " ind regs: "; + dump_partition(dbgs(), Rs, *TRI); + dbgs() << '\n'; + }); +} + + +void HexagonSplitDoubleRegs::collectIndRegs(LoopRegMap &IRM) { + typedef std::vector<MachineLoop*> LoopVector; + LoopVector WorkQ; + + for (auto I : *MLI) + WorkQ.push_back(I); + for (unsigned i = 0; i < WorkQ.size(); ++i) { + for (auto I : *WorkQ[i]) + WorkQ.push_back(I); + } + + USet Rs; + for (unsigned i = 0, n = WorkQ.size(); i < n; ++i) { + MachineLoop *L = WorkQ[i]; + Rs.clear(); + collectIndRegsForLoop(L, Rs); + if (!Rs.empty()) + IRM.insert(std::make_pair(L, Rs)); + } +} + + +void HexagonSplitDoubleRegs::createHalfInstr(unsigned Opc, MachineInstr *MI, + const UUPairMap &PairMap, unsigned SubR) { + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + MachineInstr *NewI = BuildMI(B, MI, DL, TII->get(Opc)); + + for (auto &Op : MI->operands()) { + if (!Op.isReg()) { + NewI->addOperand(Op); + continue; + } + // For register operands, set the subregister. + unsigned R = Op.getReg(); + unsigned SR = Op.getSubReg(); + bool isVirtReg = TargetRegisterInfo::isVirtualRegister(R); + bool isKill = Op.isKill(); + if (isVirtReg && MRI->getRegClass(R) == DoubleRC) { + isKill = false; + UUPairMap::const_iterator F = PairMap.find(R); + if (F == PairMap.end()) { + SR = SubR; + } else { + const UUPair &P = F->second; + R = (SubR == Hexagon::subreg_loreg) ? P.first : P.second; + SR = 0; + } + } + auto CO = MachineOperand::CreateReg(R, Op.isDef(), Op.isImplicit(), isKill, + Op.isDead(), Op.isUndef(), Op.isEarlyClobber(), SR, Op.isDebug(), + Op.isInternalRead()); + NewI->addOperand(CO); + } +} + + +void HexagonSplitDoubleRegs::splitMemRef(MachineInstr *MI, + const UUPairMap &PairMap) { + bool Load = MI->mayLoad(); + unsigned OrigOpc = MI->getOpcode(); + bool PostInc = (OrigOpc == Hexagon::L2_loadrd_pi || + OrigOpc == Hexagon::S2_storerd_pi); + MachineInstr *LowI, *HighI; + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + + // Index of the base-address-register operand. + unsigned AdrX = PostInc ? (Load ? 2 : 1) + : (Load ? 1 : 0); + MachineOperand &AdrOp = MI->getOperand(AdrX); + unsigned RSA = getRegState(AdrOp); + MachineOperand &ValOp = Load ? MI->getOperand(0) + : (PostInc ? MI->getOperand(3) + : MI->getOperand(2)); + UUPairMap::const_iterator F = PairMap.find(ValOp.getReg()); + assert(F != PairMap.end()); + + if (Load) { + const UUPair &P = F->second; + int64_t Off = PostInc ? 0 : MI->getOperand(2).getImm(); + LowI = BuildMI(B, MI, DL, TII->get(Hexagon::L2_loadri_io), P.first) + .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg()) + .addImm(Off); + HighI = BuildMI(B, MI, DL, TII->get(Hexagon::L2_loadri_io), P.second) + .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg()) + .addImm(Off+4); + } else { + const UUPair &P = F->second; + int64_t Off = PostInc ? 0 : MI->getOperand(1).getImm(); + LowI = BuildMI(B, MI, DL, TII->get(Hexagon::S2_storeri_io)) + .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg()) + .addImm(Off) + .addReg(P.first); + HighI = BuildMI(B, MI, DL, TII->get(Hexagon::S2_storeri_io)) + .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg()) + .addImm(Off+4) + .addReg(P.second); + } + + if (PostInc) { + // Create the increment of the address register. + int64_t Inc = Load ? MI->getOperand(3).getImm() + : MI->getOperand(2).getImm(); + MachineOperand &UpdOp = Load ? MI->getOperand(1) : MI->getOperand(0); + const TargetRegisterClass *RC = MRI->getRegClass(UpdOp.getReg()); + unsigned NewR = MRI->createVirtualRegister(RC); + assert(!UpdOp.getSubReg() && "Def operand with subreg"); + BuildMI(B, MI, DL, TII->get(Hexagon::A2_addi), NewR) + .addReg(AdrOp.getReg(), RSA) + .addImm(Inc); + MRI->replaceRegWith(UpdOp.getReg(), NewR); + // The original instruction will be deleted later. + } + + // Generate a new pair of memory-operands. + MachineFunction &MF = *B.getParent(); + for (auto &MO : MI->memoperands()) { + const MachinePointerInfo &Ptr = MO->getPointerInfo(); + unsigned F = MO->getFlags(); + int A = MO->getAlignment(); + + auto *Tmp1 = MF.getMachineMemOperand(Ptr, F, 4/*size*/, A); + LowI->addMemOperand(MF, Tmp1); + auto *Tmp2 = MF.getMachineMemOperand(Ptr, F, 4/*size*/, std::min(A, 4)); + HighI->addMemOperand(MF, Tmp2); + } +} + + +void HexagonSplitDoubleRegs::splitImmediate(MachineInstr *MI, + const UUPairMap &PairMap) { + MachineOperand &Op0 = MI->getOperand(0); + MachineOperand &Op1 = MI->getOperand(1); + assert(Op0.isReg() && Op1.isImm()); + uint64_t V = Op1.getImm(); + + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + UUPairMap::const_iterator F = PairMap.find(Op0.getReg()); + assert(F != PairMap.end()); + const UUPair &P = F->second; + + // The operand to A2_tfrsi can only have 32 significant bits. Immediate + // values in MachineOperand are stored as 64-bit integers, and so the + // value -1 may be represented either as 64-bit -1, or 4294967295. Both + // will have the 32 higher bits truncated in the end, but -1 will remain + // as -1, while the latter may appear to be a large unsigned value + // requiring a constant extender. The casting to int32_t will select the + // former representation. (The same reasoning applies to all 32-bit + // values.) + BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.first) + .addImm(int32_t(V & 0xFFFFFFFFULL)); + BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.second) + .addImm(int32_t(V >> 32)); +} + + +void HexagonSplitDoubleRegs::splitCombine(MachineInstr *MI, + const UUPairMap &PairMap) { + MachineOperand &Op0 = MI->getOperand(0); + MachineOperand &Op1 = MI->getOperand(1); + MachineOperand &Op2 = MI->getOperand(2); + assert(Op0.isReg()); + + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + UUPairMap::const_iterator F = PairMap.find(Op0.getReg()); + assert(F != PairMap.end()); + const UUPair &P = F->second; + + if (Op1.isImm()) { + BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.second) + .addImm(Op1.getImm()); + } else if (Op1.isReg()) { + BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), P.second) + .addReg(Op1.getReg(), getRegState(Op1), Op1.getSubReg()); + } else + llvm_unreachable("Unexpected operand"); + + if (Op2.isImm()) { + BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.first) + .addImm(Op2.getImm()); + } else if (Op2.isReg()) { + BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), P.first) + .addReg(Op2.getReg(), getRegState(Op2), Op2.getSubReg()); + } else + llvm_unreachable("Unexpected operand"); +} + + +void HexagonSplitDoubleRegs::splitExt(MachineInstr *MI, + const UUPairMap &PairMap) { + MachineOperand &Op0 = MI->getOperand(0); + MachineOperand &Op1 = MI->getOperand(1); + assert(Op0.isReg() && Op1.isReg()); + + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + UUPairMap::const_iterator F = PairMap.find(Op0.getReg()); + assert(F != PairMap.end()); + const UUPair &P = F->second; + unsigned RS = getRegState(Op1); + + BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), P.first) + .addReg(Op1.getReg(), RS & ~RegState::Kill, Op1.getSubReg()); + BuildMI(B, MI, DL, TII->get(Hexagon::S2_asr_i_r), P.second) + .addReg(Op1.getReg(), RS, Op1.getSubReg()) + .addImm(31); +} + + +void HexagonSplitDoubleRegs::splitShift(MachineInstr *MI, + const UUPairMap &PairMap) { + MachineOperand &Op0 = MI->getOperand(0); + MachineOperand &Op1 = MI->getOperand(1); + MachineOperand &Op2 = MI->getOperand(2); + assert(Op0.isReg() && Op1.isReg() && Op2.isImm()); + int64_t Sh64 = Op2.getImm(); + assert(Sh64 >= 0 && Sh64 < 64); + unsigned S = Sh64; + + UUPairMap::const_iterator F = PairMap.find(Op0.getReg()); + assert(F != PairMap.end()); + const UUPair &P = F->second; + unsigned LoR = P.first; + unsigned HiR = P.second; + using namespace Hexagon; + + unsigned Opc = MI->getOpcode(); + bool Right = (Opc == S2_lsr_i_p || Opc == S2_asr_i_p); + bool Left = !Right; + bool Signed = (Opc == S2_asr_i_p); + + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + unsigned RS = getRegState(Op1); + unsigned ShiftOpc = Left ? S2_asl_i_r + : (Signed ? S2_asr_i_r : S2_lsr_i_r); + unsigned LoSR = subreg_loreg; + unsigned HiSR = subreg_hireg; + + if (S == 0) { + // No shift, subregister copy. + BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), LoR) + .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR); + BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), HiR) + .addReg(Op1.getReg(), RS, HiSR); + } else if (S < 32) { + const TargetRegisterClass *IntRC = &IntRegsRegClass; + unsigned TmpR = MRI->createVirtualRegister(IntRC); + // Expansion: + // Shift left: DR = shl R, #s + // LoR = shl R.lo, #s + // TmpR = extractu R.lo, #s, #32-s + // HiR = or (TmpR, asl(R.hi, #s)) + // Shift right: DR = shr R, #s + // HiR = shr R.hi, #s + // TmpR = shr R.lo, #s + // LoR = insert TmpR, R.hi, #s, #32-s + + // Shift left: + // LoR = shl R.lo, #s + // Shift right: + // TmpR = shr R.lo, #s + + // Make a special case for A2_aslh and A2_asrh (they are predicable as + // opposed to S2_asl_i_r/S2_asr_i_r). + if (S == 16 && Left) + BuildMI(B, MI, DL, TII->get(A2_aslh), LoR) + .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR); + else if (S == 16 && Signed) + BuildMI(B, MI, DL, TII->get(A2_asrh), TmpR) + .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR); + else + BuildMI(B, MI, DL, TII->get(ShiftOpc), (Left ? LoR : TmpR)) + .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR) + .addImm(S); + + if (Left) { + // TmpR = extractu R.lo, #s, #32-s + BuildMI(B, MI, DL, TII->get(S2_extractu), TmpR) + .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR) + .addImm(S) + .addImm(32-S); + // HiR = or (TmpR, asl(R.hi, #s)) + BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), HiR) + .addReg(TmpR) + .addReg(Op1.getReg(), RS, HiSR) + .addImm(S); + } else { + // HiR = shr R.hi, #s + BuildMI(B, MI, DL, TII->get(ShiftOpc), HiR) + .addReg(Op1.getReg(), RS & ~RegState::Kill, HiSR) + .addImm(S); + // LoR = insert TmpR, R.hi, #s, #32-s + BuildMI(B, MI, DL, TII->get(S2_insert), LoR) + .addReg(TmpR) + .addReg(Op1.getReg(), RS, HiSR) + .addImm(S) + .addImm(32-S); + } + } else if (S == 32) { + BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), (Left ? HiR : LoR)) + .addReg(Op1.getReg(), RS & ~RegState::Kill, (Left ? LoSR : HiSR)); + if (!Signed) + BuildMI(B, MI, DL, TII->get(A2_tfrsi), (Left ? LoR : HiR)) + .addImm(0); + else // Must be right shift. + BuildMI(B, MI, DL, TII->get(S2_asr_i_r), HiR) + .addReg(Op1.getReg(), RS, HiSR) + .addImm(31); + } else if (S < 64) { + S -= 32; + if (S == 16 && Left) + BuildMI(B, MI, DL, TII->get(A2_aslh), HiR) + .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR); + else if (S == 16 && Signed) + BuildMI(B, MI, DL, TII->get(A2_asrh), LoR) + .addReg(Op1.getReg(), RS & ~RegState::Kill, HiSR); + else + BuildMI(B, MI, DL, TII->get(ShiftOpc), (Left ? HiR : LoR)) + .addReg(Op1.getReg(), RS & ~RegState::Kill, (Left ? LoSR : HiSR)) + .addImm(S); + + if (Signed) + BuildMI(B, MI, DL, TII->get(S2_asr_i_r), HiR) + .addReg(Op1.getReg(), RS, HiSR) + .addImm(31); + else + BuildMI(B, MI, DL, TII->get(A2_tfrsi), (Left ? LoR : HiR)) + .addImm(0); + } +} + + +void HexagonSplitDoubleRegs::splitAslOr(MachineInstr *MI, + const UUPairMap &PairMap) { + MachineOperand &Op0 = MI->getOperand(0); + MachineOperand &Op1 = MI->getOperand(1); + MachineOperand &Op2 = MI->getOperand(2); + MachineOperand &Op3 = MI->getOperand(3); + assert(Op0.isReg() && Op1.isReg() && Op2.isReg() && Op3.isImm()); + int64_t Sh64 = Op3.getImm(); + assert(Sh64 >= 0 && Sh64 < 64); + unsigned S = Sh64; + + UUPairMap::const_iterator F = PairMap.find(Op0.getReg()); + assert(F != PairMap.end()); + const UUPair &P = F->second; + unsigned LoR = P.first; + unsigned HiR = P.second; + using namespace Hexagon; + + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + unsigned RS1 = getRegState(Op1); + unsigned RS2 = getRegState(Op2); + const TargetRegisterClass *IntRC = &IntRegsRegClass; + + unsigned LoSR = subreg_loreg; + unsigned HiSR = subreg_hireg; + + // Op0 = S2_asl_i_p_or Op1, Op2, Op3 + // means: Op0 = or (Op1, asl(Op2, Op3)) + + // Expansion of + // DR = or (R1, asl(R2, #s)) + // + // LoR = or (R1.lo, asl(R2.lo, #s)) + // Tmp1 = extractu R2.lo, #s, #32-s + // Tmp2 = or R1.hi, Tmp1 + // HiR = or (Tmp2, asl(R2.hi, #s)) + + if (S == 0) { + // DR = or (R1, asl(R2, #0)) + // -> or (R1, R2) + // i.e. LoR = or R1.lo, R2.lo + // HiR = or R1.hi, R2.hi + BuildMI(B, MI, DL, TII->get(A2_or), LoR) + .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR) + .addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR); + BuildMI(B, MI, DL, TII->get(A2_or), HiR) + .addReg(Op1.getReg(), RS1, HiSR) + .addReg(Op2.getReg(), RS2, HiSR); + } else if (S < 32) { + BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), LoR) + .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR) + .addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR) + .addImm(S); + unsigned TmpR1 = MRI->createVirtualRegister(IntRC); + BuildMI(B, MI, DL, TII->get(S2_extractu), TmpR1) + .addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR) + .addImm(S) + .addImm(32-S); + unsigned TmpR2 = MRI->createVirtualRegister(IntRC); + BuildMI(B, MI, DL, TII->get(A2_or), TmpR2) + .addReg(Op1.getReg(), RS1, HiSR) + .addReg(TmpR1); + BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), HiR) + .addReg(TmpR2) + .addReg(Op2.getReg(), RS2, HiSR) + .addImm(S); + } else if (S == 32) { + // DR = or (R1, asl(R2, #32)) + // -> or R1, R2.lo + // LoR = R1.lo + // HiR = or R1.hi, R2.lo + BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), LoR) + .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR); + BuildMI(B, MI, DL, TII->get(A2_or), HiR) + .addReg(Op1.getReg(), RS1, HiSR) + .addReg(Op2.getReg(), RS2, LoSR); + } else if (S < 64) { + // DR = or (R1, asl(R2, #s)) + // + // LoR = R1:lo + // HiR = or (R1:hi, asl(R2:lo, #s-32)) + S -= 32; + BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), LoR) + .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR); + BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), HiR) + .addReg(Op1.getReg(), RS1, HiSR) + .addReg(Op2.getReg(), RS2, LoSR) + .addImm(S); + } +} + + +bool HexagonSplitDoubleRegs::splitInstr(MachineInstr *MI, + const UUPairMap &PairMap) { + DEBUG(dbgs() << "Splitting: " << *MI); + bool Split = false; + unsigned Opc = MI->getOpcode(); + using namespace Hexagon; + + switch (Opc) { + case TargetOpcode::PHI: + case TargetOpcode::COPY: { + unsigned DstR = MI->getOperand(0).getReg(); + if (MRI->getRegClass(DstR) == DoubleRC) { + createHalfInstr(Opc, MI, PairMap, subreg_loreg); + createHalfInstr(Opc, MI, PairMap, subreg_hireg); + Split = true; + } + break; + } + case A2_andp: + createHalfInstr(A2_and, MI, PairMap, subreg_loreg); + createHalfInstr(A2_and, MI, PairMap, subreg_hireg); + Split = true; + break; + case A2_orp: + createHalfInstr(A2_or, MI, PairMap, subreg_loreg); + createHalfInstr(A2_or, MI, PairMap, subreg_hireg); + Split = true; + break; + case A2_xorp: + createHalfInstr(A2_xor, MI, PairMap, subreg_loreg); + createHalfInstr(A2_xor, MI, PairMap, subreg_hireg); + Split = true; + break; + + case L2_loadrd_io: + case L2_loadrd_pi: + case S2_storerd_io: + case S2_storerd_pi: + splitMemRef(MI, PairMap); + Split = true; + break; + + case A2_tfrpi: + case CONST64_Int_Real: + splitImmediate(MI, PairMap); + Split = true; + break; + + case A2_combineii: + case A4_combineir: + case A4_combineii: + case A4_combineri: + case A2_combinew: + splitCombine(MI, PairMap); + Split = true; + break; + + case A2_sxtw: + splitExt(MI, PairMap); + Split = true; + break; + + case S2_asl_i_p: + case S2_asr_i_p: + case S2_lsr_i_p: + splitShift(MI, PairMap); + Split = true; + break; + + case S2_asl_i_p_or: + splitAslOr(MI, PairMap); + Split = true; + break; + + default: + llvm_unreachable("Instruction not splitable"); + return false; + } + + return Split; +} + + +void HexagonSplitDoubleRegs::replaceSubregUses(MachineInstr *MI, + const UUPairMap &PairMap) { + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isUse() || !Op.getSubReg()) + continue; + unsigned R = Op.getReg(); + UUPairMap::const_iterator F = PairMap.find(R); + if (F == PairMap.end()) + continue; + const UUPair &P = F->second; + switch (Op.getSubReg()) { + case Hexagon::subreg_loreg: + Op.setReg(P.first); + break; + case Hexagon::subreg_hireg: + Op.setReg(P.second); + break; + } + Op.setSubReg(0); + } +} + + +void HexagonSplitDoubleRegs::collapseRegPairs(MachineInstr *MI, + const UUPairMap &PairMap) { + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isUse()) + continue; + unsigned R = Op.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(R)) + continue; + if (MRI->getRegClass(R) != DoubleRC || Op.getSubReg()) + continue; + UUPairMap::const_iterator F = PairMap.find(R); + if (F == PairMap.end()) + continue; + const UUPair &Pr = F->second; + unsigned NewDR = MRI->createVirtualRegister(DoubleRC); + BuildMI(B, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), NewDR) + .addReg(Pr.first) + .addImm(Hexagon::subreg_loreg) + .addReg(Pr.second) + .addImm(Hexagon::subreg_hireg); + Op.setReg(NewDR); + } +} + + +bool HexagonSplitDoubleRegs::splitPartition(const USet &Part) { + const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass; + typedef std::set<MachineInstr*> MISet; + bool Changed = false; + + DEBUG(dbgs() << "Splitting partition: "; dump_partition(dbgs(), Part, *TRI); + dbgs() << '\n'); + + UUPairMap PairMap; + + MISet SplitIns; + for (unsigned DR : Part) { + MachineInstr *DefI = MRI->getVRegDef(DR); + SplitIns.insert(DefI); + + // Collect all instructions, including fixed ones. We won't split them, + // but we need to visit them again to insert the REG_SEQUENCE instructions. + for (auto U = MRI->use_nodbg_begin(DR), W = MRI->use_nodbg_end(); + U != W; ++U) + SplitIns.insert(U->getParent()); + + unsigned LoR = MRI->createVirtualRegister(IntRC); + unsigned HiR = MRI->createVirtualRegister(IntRC); + DEBUG(dbgs() << "Created mapping: " << PrintReg(DR, TRI) << " -> " + << PrintReg(HiR, TRI) << ':' << PrintReg(LoR, TRI) << '\n'); + PairMap.insert(std::make_pair(DR, UUPair(LoR, HiR))); + } + + MISet Erase; + for (auto MI : SplitIns) { + if (isFixedInstr(MI)) { + collapseRegPairs(MI, PairMap); + } else { + bool Done = splitInstr(MI, PairMap); + if (Done) + Erase.insert(MI); + Changed |= Done; + } + } + + for (unsigned DR : Part) { + // Before erasing "double" instructions, revisit all uses of the double + // registers in this partition, and replace all uses of them with subre- + // gisters, with the corresponding single registers. + MISet Uses; + for (auto U = MRI->use_nodbg_begin(DR), W = MRI->use_nodbg_end(); + U != W; ++U) + Uses.insert(U->getParent()); + for (auto M : Uses) + replaceSubregUses(M, PairMap); + } + + for (auto MI : Erase) { + MachineBasicBlock *B = MI->getParent(); + B->erase(MI); + } + + return Changed; +} + + +bool HexagonSplitDoubleRegs::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "Splitting double registers in function: " + << MF.getName() << '\n'); + + auto &ST = MF.getSubtarget<HexagonSubtarget>(); + TRI = ST.getRegisterInfo(); + TII = ST.getInstrInfo(); + MRI = &MF.getRegInfo(); + MLI = &getAnalysis<MachineLoopInfo>(); + + UUSetMap P2Rs; + LoopRegMap IRM; + + collectIndRegs(IRM); + partitionRegisters(P2Rs); + + DEBUG({ + dbgs() << "Register partitioning: (partition #0 is fixed)\n"; + for (UUSetMap::iterator I = P2Rs.begin(), E = P2Rs.end(); I != E; ++I) { + dbgs() << '#' << I->first << " -> "; + dump_partition(dbgs(), I->second, *TRI); + dbgs() << '\n'; + } + }); + + bool Changed = false; + int Limit = MaxHSDR; + + for (UUSetMap::iterator I = P2Rs.begin(), E = P2Rs.end(); I != E; ++I) { + if (I->first == 0) + continue; + if (Limit >= 0 && Counter >= Limit) + break; + USet &Part = I->second; + DEBUG(dbgs() << "Calculating profit for partition #" << I->first << '\n'); + if (!isProfitable(Part, IRM)) + continue; + Counter++; + Changed |= splitPartition(Part); + } + + return Changed; +} + +FunctionPass *llvm::createHexagonSplitDoubleRegs() { + return new HexagonSplitDoubleRegs(); +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp new file mode 100644 index 000000000000..b5339ff4c0dc --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp @@ -0,0 +1,616 @@ +//===--- HexagonStoreWidening.cpp------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// Replace sequences of "narrow" stores to adjacent memory locations with +// a fewer "wide" stores that have the same effect. +// For example, replace: +// S4_storeirb_io %vreg100, 0, 0 ; store-immediate-byte +// S4_storeirb_io %vreg100, 1, 0 ; store-immediate-byte +// with +// S4_storeirh_io %vreg100, 0, 0 ; store-immediate-halfword +// The above is the general idea. The actual cases handled by the code +// may be a bit more complex. +// The purpose of this pass is to reduce the number of outstanding stores, +// or as one could say, "reduce store queue pressure". Also, wide stores +// mean fewer stores, and since there are only two memory instructions allowed +// per packet, it also means fewer packets, and ultimately fewer cycles. +//===---------------------------------------------------------------------===// + +#define DEBUG_TYPE "hexagon-widen-stores" + +#include "HexagonTargetMachine.h" + +#include "llvm/PassSupport.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" + +#include <algorithm> + + +using namespace llvm; + +namespace llvm { + FunctionPass *createHexagonStoreWidening(); + void initializeHexagonStoreWideningPass(PassRegistry&); +} + +namespace { + struct HexagonStoreWidening : public MachineFunctionPass { + const HexagonInstrInfo *TII; + const HexagonRegisterInfo *TRI; + const MachineRegisterInfo *MRI; + AliasAnalysis *AA; + MachineFunction *MF; + + public: + static char ID; + HexagonStoreWidening() : MachineFunctionPass(ID) { + initializeHexagonStoreWideningPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + const char *getPassName() const override { + return "Hexagon Store Widening"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<AAResultsWrapperPass>(); + AU.addPreserved<AAResultsWrapperPass>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + static bool handledStoreType(const MachineInstr *MI); + + private: + static const int MaxWideSize = 4; + + typedef std::vector<MachineInstr*> InstrGroup; + typedef std::vector<InstrGroup> InstrGroupList; + + bool instrAliased(InstrGroup &Stores, const MachineMemOperand &MMO); + bool instrAliased(InstrGroup &Stores, const MachineInstr *MI); + void createStoreGroup(MachineInstr *BaseStore, InstrGroup::iterator Begin, + InstrGroup::iterator End, InstrGroup &Group); + void createStoreGroups(MachineBasicBlock &MBB, + InstrGroupList &StoreGroups); + bool processBasicBlock(MachineBasicBlock &MBB); + bool processStoreGroup(InstrGroup &Group); + bool selectStores(InstrGroup::iterator Begin, InstrGroup::iterator End, + InstrGroup &OG, unsigned &TotalSize, unsigned MaxSize); + bool createWideStores(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize); + bool replaceStores(InstrGroup &OG, InstrGroup &NG); + bool storesAreAdjacent(const MachineInstr *S1, const MachineInstr *S2); + }; + +} // namespace + + +namespace { + +// Some local helper functions... +unsigned getBaseAddressRegister(const MachineInstr *MI) { + const MachineOperand &MO = MI->getOperand(0); + assert(MO.isReg() && "Expecting register operand"); + return MO.getReg(); +} + +int64_t getStoreOffset(const MachineInstr *MI) { + unsigned OpC = MI->getOpcode(); + assert(HexagonStoreWidening::handledStoreType(MI) && "Unhandled opcode"); + + switch (OpC) { + case Hexagon::S4_storeirb_io: + case Hexagon::S4_storeirh_io: + case Hexagon::S4_storeiri_io: { + const MachineOperand &MO = MI->getOperand(1); + assert(MO.isImm() && "Expecting immediate offset"); + return MO.getImm(); + } + } + dbgs() << *MI; + llvm_unreachable("Store offset calculation missing for a handled opcode"); + return 0; +} + +const MachineMemOperand &getStoreTarget(const MachineInstr *MI) { + assert(!MI->memoperands_empty() && "Expecting memory operands"); + return **MI->memoperands_begin(); +} + +} // namespace + + +char HexagonStoreWidening::ID = 0; + +INITIALIZE_PASS_BEGIN(HexagonStoreWidening, "hexagon-widen-stores", + "Hexason Store Widening", false, false) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_END(HexagonStoreWidening, "hexagon-widen-stores", + "Hexagon Store Widening", false, false) + + +// Filtering function: any stores whose opcodes are not "approved" of by +// this function will not be subjected to widening. +inline bool HexagonStoreWidening::handledStoreType(const MachineInstr *MI) { + // For now, only handle stores of immediate values. + // Also, reject stores to stack slots. + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case Hexagon::S4_storeirb_io: + case Hexagon::S4_storeirh_io: + case Hexagon::S4_storeiri_io: + // Base address must be a register. (Implement FI later.) + return MI->getOperand(0).isReg(); + default: + return false; + } +} + + +// Check if the machine memory operand MMO is aliased with any of the +// stores in the store group Stores. +bool HexagonStoreWidening::instrAliased(InstrGroup &Stores, + const MachineMemOperand &MMO) { + if (!MMO.getValue()) + return true; + + MemoryLocation L(MMO.getValue(), MMO.getSize(), MMO.getAAInfo()); + + for (auto SI : Stores) { + const MachineMemOperand &SMO = getStoreTarget(SI); + if (!SMO.getValue()) + return true; + + MemoryLocation SL(SMO.getValue(), SMO.getSize(), SMO.getAAInfo()); + if (AA->alias(L, SL)) + return true; + } + + return false; +} + + +// Check if the machine instruction MI accesses any storage aliased with +// any store in the group Stores. +bool HexagonStoreWidening::instrAliased(InstrGroup &Stores, + const MachineInstr *MI) { + for (auto &I : MI->memoperands()) + if (instrAliased(Stores, *I)) + return true; + return false; +} + + +// Inspect a machine basic block, and generate store groups out of stores +// encountered in the block. +// +// A store group is a group of stores that use the same base register, +// and which can be reordered within that group without altering the +// semantics of the program. A single store group could be widened as +// a whole, if there existed a single store instruction with the same +// semantics as the entire group. In many cases, a single store group +// may need more than one wide store. +void HexagonStoreWidening::createStoreGroups(MachineBasicBlock &MBB, + InstrGroupList &StoreGroups) { + InstrGroup AllInsns; + + // Copy all instruction pointers from the basic block to a temporary + // list. This will allow operating on the list, and modifying its + // elements without affecting the basic block. + for (auto &I : MBB) + AllInsns.push_back(&I); + + // Traverse all instructions in the AllInsns list, and if we encounter + // a store, then try to create a store group starting at that instruction + // i.e. a sequence of independent stores that can be widened. + for (auto I = AllInsns.begin(), E = AllInsns.end(); I != E; ++I) { + MachineInstr *MI = *I; + // Skip null pointers (processed instructions). + if (!MI || !handledStoreType(MI)) + continue; + + // Found a store. Try to create a store group. + InstrGroup G; + createStoreGroup(MI, I+1, E, G); + if (G.size() > 1) + StoreGroups.push_back(G); + } +} + + +// Create a single store group. The stores need to be independent between +// themselves, and also there cannot be other instructions between them +// that could read or modify storage being stored into. +void HexagonStoreWidening::createStoreGroup(MachineInstr *BaseStore, + InstrGroup::iterator Begin, InstrGroup::iterator End, InstrGroup &Group) { + assert(handledStoreType(BaseStore) && "Unexpected instruction"); + unsigned BaseReg = getBaseAddressRegister(BaseStore); + InstrGroup Other; + + Group.push_back(BaseStore); + + for (auto I = Begin; I != End; ++I) { + MachineInstr *MI = *I; + if (!MI) + continue; + + if (handledStoreType(MI)) { + // If this store instruction is aliased with anything already in the + // group, terminate the group now. + if (instrAliased(Group, getStoreTarget(MI))) + return; + // If this store is aliased to any of the memory instructions we have + // seen so far (that are not a part of this group), terminate the group. + if (instrAliased(Other, getStoreTarget(MI))) + return; + + unsigned BR = getBaseAddressRegister(MI); + if (BR == BaseReg) { + Group.push_back(MI); + *I = 0; + continue; + } + } + + // Assume calls are aliased to everything. + if (MI->isCall() || MI->hasUnmodeledSideEffects()) + return; + + if (MI->mayLoad() || MI->mayStore()) { + if (MI->hasOrderedMemoryRef() || instrAliased(Group, MI)) + return; + Other.push_back(MI); + } + } // for +} + + +// Check if store instructions S1 and S2 are adjacent. More precisely, +// S2 has to access memory immediately following that accessed by S1. +bool HexagonStoreWidening::storesAreAdjacent(const MachineInstr *S1, + const MachineInstr *S2) { + if (!handledStoreType(S1) || !handledStoreType(S2)) + return false; + + const MachineMemOperand &S1MO = getStoreTarget(S1); + + // Currently only handling immediate stores. + int Off1 = S1->getOperand(1).getImm(); + int Off2 = S2->getOperand(1).getImm(); + + return (Off1 >= 0) ? Off1+S1MO.getSize() == unsigned(Off2) + : int(Off1+S1MO.getSize()) == Off2; +} + + +/// Given a sequence of adjacent stores, and a maximum size of a single wide +/// store, pick a group of stores that can be replaced by a single store +/// of size not exceeding MaxSize. The selected sequence will be recorded +/// in OG ("old group" of instructions). +/// OG should be empty on entry, and should be left empty if the function +/// fails. +bool HexagonStoreWidening::selectStores(InstrGroup::iterator Begin, + InstrGroup::iterator End, InstrGroup &OG, unsigned &TotalSize, + unsigned MaxSize) { + assert(Begin != End && "No instructions to analyze"); + assert(OG.empty() && "Old group not empty on entry"); + + if (std::distance(Begin, End) <= 1) + return false; + + MachineInstr *FirstMI = *Begin; + assert(!FirstMI->memoperands_empty() && "Expecting some memory operands"); + const MachineMemOperand &FirstMMO = getStoreTarget(FirstMI); + unsigned Alignment = FirstMMO.getAlignment(); + unsigned SizeAccum = FirstMMO.getSize(); + unsigned FirstOffset = getStoreOffset(FirstMI); + + // The initial value of SizeAccum should always be a power of 2. + assert(isPowerOf2_32(SizeAccum) && "First store size not a power of 2"); + + // If the size of the first store equals to or exceeds the limit, do nothing. + if (SizeAccum >= MaxSize) + return false; + + // If the size of the first store is greater than or equal to the address + // stored to, then the store cannot be made any wider. + if (SizeAccum >= Alignment) + return false; + + // The offset of a store will put restrictions on how wide the store can be. + // Offsets in stores of size 2^n bytes need to have the n lowest bits be 0. + // If the first store already exhausts the offset limits, quit. Test this + // by checking if the next wider size would exceed the limit. + if ((2*SizeAccum-1) & FirstOffset) + return false; + + OG.push_back(FirstMI); + MachineInstr *S1 = FirstMI, *S2 = *(Begin+1); + InstrGroup::iterator I = Begin+1; + + // Pow2Num will be the largest number of elements in OG such that the sum + // of sizes of stores 0...Pow2Num-1 will be a power of 2. + unsigned Pow2Num = 1; + unsigned Pow2Size = SizeAccum; + + // Be greedy: keep accumulating stores as long as they are to adjacent + // memory locations, and as long as the total number of bytes stored + // does not exceed the limit (MaxSize). + // Keep track of when the total size covered is a power of 2, since + // this is a size a single store can cover. + while (I != End) { + S2 = *I; + // Stores are sorted, so if S1 and S2 are not adjacent, there won't be + // any other store to fill the "hole". + if (!storesAreAdjacent(S1, S2)) + break; + + unsigned S2Size = getStoreTarget(S2).getSize(); + if (SizeAccum + S2Size > std::min(MaxSize, Alignment)) + break; + + OG.push_back(S2); + SizeAccum += S2Size; + if (isPowerOf2_32(SizeAccum)) { + Pow2Num = OG.size(); + Pow2Size = SizeAccum; + } + if ((2*Pow2Size-1) & FirstOffset) + break; + + S1 = S2; + ++I; + } + + // The stores don't add up to anything that can be widened. Clean up. + if (Pow2Num <= 1) { + OG.clear(); + return false; + } + + // Only leave the stored being widened. + OG.resize(Pow2Num); + TotalSize = Pow2Size; + return true; +} + + +/// Given an "old group" OG of stores, create a "new group" NG of instructions +/// to replace them. Ideally, NG would only have a single instruction in it, +/// but that may only be possible for store-immediate. +bool HexagonStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG, + unsigned TotalSize) { + // XXX Current limitations: + // - only expect stores of immediate values in OG, + // - only handle a TotalSize of up to 4. + + if (TotalSize > 4) + return false; + + unsigned Acc = 0; // Value accumulator. + unsigned Shift = 0; + + for (InstrGroup::iterator I = OG.begin(), E = OG.end(); I != E; ++I) { + MachineInstr *MI = *I; + const MachineMemOperand &MMO = getStoreTarget(MI); + MachineOperand &SO = MI->getOperand(2); // Source. + assert(SO.isImm() && "Expecting an immediate operand"); + + unsigned NBits = MMO.getSize()*8; + unsigned Mask = (0xFFFFFFFFU >> (32-NBits)); + unsigned Val = (SO.getImm() & Mask) << Shift; + Acc |= Val; + Shift += NBits; + } + + + MachineInstr *FirstSt = OG.front(); + DebugLoc DL = OG.back()->getDebugLoc(); + const MachineMemOperand &OldM = getStoreTarget(FirstSt); + MachineMemOperand *NewM = + MF->getMachineMemOperand(OldM.getPointerInfo(), OldM.getFlags(), + TotalSize, OldM.getAlignment(), + OldM.getAAInfo()); + + if (Acc < 0x10000) { + // Create mem[hw] = #Acc + unsigned WOpc = (TotalSize == 2) ? Hexagon::S4_storeirh_io : + (TotalSize == 4) ? Hexagon::S4_storeiri_io : 0; + assert(WOpc && "Unexpected size"); + + int Val = (TotalSize == 2) ? int16_t(Acc) : int(Acc); + const MCInstrDesc &StD = TII->get(WOpc); + MachineOperand &MR = FirstSt->getOperand(0); + int64_t Off = FirstSt->getOperand(1).getImm(); + MachineInstr *StI = BuildMI(*MF, DL, StD) + .addReg(MR.getReg(), getKillRegState(MR.isKill())) + .addImm(Off) + .addImm(Val); + StI->addMemOperand(*MF, NewM); + NG.push_back(StI); + } else { + // Create vreg = A2_tfrsi #Acc; mem[hw] = vreg + const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi); + const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI, *MF); + unsigned VReg = MF->getRegInfo().createVirtualRegister(RC); + MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg) + .addImm(int(Acc)); + NG.push_back(TfrI); + + unsigned WOpc = (TotalSize == 2) ? Hexagon::S2_storerh_io : + (TotalSize == 4) ? Hexagon::S2_storeri_io : 0; + assert(WOpc && "Unexpected size"); + + const MCInstrDesc &StD = TII->get(WOpc); + MachineOperand &MR = FirstSt->getOperand(0); + int64_t Off = FirstSt->getOperand(1).getImm(); + MachineInstr *StI = BuildMI(*MF, DL, StD) + .addReg(MR.getReg(), getKillRegState(MR.isKill())) + .addImm(Off) + .addReg(VReg, RegState::Kill); + StI->addMemOperand(*MF, NewM); + NG.push_back(StI); + } + + return true; +} + + +// Replace instructions from the old group OG with instructions from the +// new group NG. Conceptually, remove all instructions in OG, and then +// insert all instructions in NG, starting at where the first instruction +// from OG was (in the order in which they appeared in the basic block). +// (The ordering in OG does not have to match the order in the basic block.) +bool HexagonStoreWidening::replaceStores(InstrGroup &OG, InstrGroup &NG) { + DEBUG({ + dbgs() << "Replacing:\n"; + for (auto I : OG) + dbgs() << " " << *I; + dbgs() << "with\n"; + for (auto I : NG) + dbgs() << " " << *I; + }); + + MachineBasicBlock *MBB = OG.back()->getParent(); + MachineBasicBlock::iterator InsertAt = MBB->end(); + + // Need to establish the insertion point. The best one is right before + // the first store in the OG, but in the order in which the stores occur + // in the program list. Since the ordering in OG does not correspond + // to the order in the program list, we need to do some work to find + // the insertion point. + + // Create a set of all instructions in OG (for quick lookup). + SmallPtrSet<MachineInstr*, 4> InstrSet; + for (auto I : OG) + InstrSet.insert(I); + + // Traverse the block, until we hit an instruction from OG. + for (auto &I : *MBB) { + if (InstrSet.count(&I)) { + InsertAt = I; + break; + } + } + + assert((InsertAt != MBB->end()) && "Cannot locate any store from the group"); + + bool AtBBStart = false; + + // InsertAt points at the first instruction that will be removed. We need + // to move it out of the way, so it remains valid after removing all the + // old stores, and so we are able to recover it back to the proper insertion + // position. + if (InsertAt != MBB->begin()) + --InsertAt; + else + AtBBStart = true; + + for (auto I : OG) + I->eraseFromParent(); + + if (!AtBBStart) + ++InsertAt; + else + InsertAt = MBB->begin(); + + for (auto I : NG) + MBB->insert(InsertAt, I); + + return true; +} + + +// Break up the group into smaller groups, each of which can be replaced by +// a single wide store. Widen each such smaller group and replace the old +// instructions with the widened ones. +bool HexagonStoreWidening::processStoreGroup(InstrGroup &Group) { + bool Changed = false; + InstrGroup::iterator I = Group.begin(), E = Group.end(); + InstrGroup OG, NG; // Old and new groups. + unsigned CollectedSize; + + while (I != E) { + OG.clear(); + NG.clear(); + + bool Succ = selectStores(I++, E, OG, CollectedSize, MaxWideSize) && + createWideStores(OG, NG, CollectedSize) && + replaceStores(OG, NG); + if (!Succ) + continue; + + assert(OG.size() > 1 && "Created invalid group"); + assert(distance(I, E)+1 >= int(OG.size()) && "Too many elements"); + I += OG.size()-1; + + Changed = true; + } + + return Changed; +} + + +// Process a single basic block: create the store groups, and replace them +// with the widened stores, if possible. Processing of each basic block +// is independent from processing of any other basic block. This transfor- +// mation could be stopped after having processed any basic block without +// any ill effects (other than not having performed widening in the unpro- +// cessed blocks). Also, the basic blocks can be processed in any order. +bool HexagonStoreWidening::processBasicBlock(MachineBasicBlock &MBB) { + InstrGroupList SGs; + bool Changed = false; + + createStoreGroups(MBB, SGs); + + auto Less = [] (const MachineInstr *A, const MachineInstr *B) -> bool { + return getStoreOffset(A) < getStoreOffset(B); + }; + for (auto &G : SGs) { + assert(G.size() > 1 && "Store group with fewer than 2 elements"); + std::sort(G.begin(), G.end(), Less); + + Changed |= processStoreGroup(G); + } + + return Changed; +} + + +bool HexagonStoreWidening::runOnMachineFunction(MachineFunction &MFn) { + MF = &MFn; + auto &ST = MFn.getSubtarget<HexagonSubtarget>(); + TII = ST.getInstrInfo(); + TRI = ST.getRegisterInfo(); + MRI = &MFn.getRegInfo(); + AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); + + bool Changed = false; + + for (auto &B : MFn) + Changed |= processBasicBlock(B); + + return Changed; +} + + +FunctionPass *llvm::createHexagonStoreWidening() { + return new HexagonStoreWidening(); +} + diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp index cd482b3e3af1..aa0efd4f65e0 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -16,6 +16,8 @@ #include "HexagonRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" +#include <map> + using namespace llvm; #define DEBUG_TYPE "hexagon-subtarget" @@ -24,49 +26,65 @@ using namespace llvm; #define GET_SUBTARGETINFO_TARGET_DESC #include "HexagonGenSubtargetInfo.inc" -static cl::opt<bool> -EnableV3("enable-hexagon-v3", cl::Hidden, - cl::desc("Enable Hexagon V3 instructions.")); - -static cl::opt<bool> -EnableMemOps( - "enable-hexagon-memops", - cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(true), - cl::desc( - "Generate V4 MEMOP in code generation for Hexagon target")); - -static cl::opt<bool> -DisableMemOps( - "disable-hexagon-memops", - cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(false), - cl::desc( - "Do not generate V4 MEMOP in code generation for Hexagon target")); - -static cl::opt<bool> -EnableIEEERndNear( - "enable-hexagon-ieee-rnd-near", - cl::Hidden, cl::ZeroOrMore, cl::init(false), - cl::desc("Generate non-chopped conversion from fp to int.")); +static cl::opt<bool> EnableMemOps("enable-hexagon-memops", + cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(true), + cl::desc("Generate V4 MEMOP in code generation for Hexagon target")); + +static cl::opt<bool> DisableMemOps("disable-hexagon-memops", + cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(false), + cl::desc("Do not generate V4 MEMOP in code generation for Hexagon target")); + +static cl::opt<bool> EnableIEEERndNear("enable-hexagon-ieee-rnd-near", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Generate non-chopped conversion from fp to int.")); + +static cl::opt<bool> EnableBSBSched("enable-bsb-sched", + cl::Hidden, cl::ZeroOrMore, cl::init(true)); + +static cl::opt<bool> EnableHexagonHVXDouble("enable-hexagon-hvx-double", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Enable Hexagon Double Vector eXtensions")); + +static cl::opt<bool> EnableHexagonHVX("enable-hexagon-hvx", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Enable Hexagon Vector eXtensions")); static cl::opt<bool> DisableHexagonMISched("disable-hexagon-misched", - cl::Hidden, cl::ZeroOrMore, cl::init(false), - cl::desc("Disable Hexagon MI Scheduling")); + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Disable Hexagon MI Scheduling")); + +void HexagonSubtarget::initializeEnvironment() { + UseMemOps = false; + ModeIEEERndNear = false; + UseBSBScheduling = false; +} HexagonSubtarget & HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { - // If the programmer has not specified a Hexagon version, default to -mv4. - if (CPUString.empty()) - CPUString = "hexagonv4"; - - if (CPUString == "hexagonv4") { - HexagonArchVersion = V4; - } else if (CPUString == "hexagonv5") { - HexagonArchVersion = V5; - } else { + CPUString = HEXAGON_MC::selectHexagonCPU(getTargetTriple(), CPU); + + static std::map<StringRef, HexagonArchEnum> CpuTable { + { "hexagonv4", V4 }, + { "hexagonv5", V5 }, + { "hexagonv55", V55 }, + { "hexagonv60", V60 }, + }; + + auto foundIt = CpuTable.find(CPUString); + if (foundIt != CpuTable.end()) + HexagonArchVersion = foundIt->second; + else llvm_unreachable("Unrecognized Hexagon processor version"); - } + UseHVXOps = false; + UseHVXDblOps = false; ParseSubtargetFeatures(CPUString, FS); + + if (EnableHexagonHVX.getPosition()) + UseHVXOps = EnableHexagonHVX; + if (EnableHexagonHVXDouble.getPosition()) + UseHVXDblOps = EnableHexagonHVXDouble; + return *this; } @@ -76,6 +94,8 @@ HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU, InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), FrameLowering() { + initializeEnvironment(); + // Initialize scheduling itinerary for the specified CPU. InstrItins = getInstrItineraryForCPU(CPUString); @@ -91,6 +111,8 @@ HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU, ModeIEEERndNear = true; else ModeIEEERndNear = false; + + UseBSBScheduling = hasV60TOps() && EnableBSBSched; } // Pin the vtable to this file. diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h index 34cdad786f82..c7ae139c4346 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h +++ b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h @@ -34,15 +34,19 @@ namespace llvm { class HexagonSubtarget : public HexagonGenSubtargetInfo { virtual void anchor(); - bool UseMemOps; + bool UseMemOps, UseHVXOps, UseHVXDblOps; bool ModeIEEERndNear; public: enum HexagonArchEnum { - V4, V5 + V4, V5, V55, V60 }; HexagonArchEnum HexagonArchVersion; + /// True if the target should use Back-Skip-Back scheduling. This is the + /// default for V60. + bool UseBSBScheduling; + private: std::string CPUString; HexagonInstrInfo InstrInfo; @@ -50,6 +54,7 @@ private: HexagonSelectionDAGInfo TSInfo; HexagonFrameLowering FrameLowering; InstrItineraryData InstrItins; + void initializeEnvironment(); public: HexagonSubtarget(const Triple &TT, StringRef CPU, StringRef FS, @@ -84,7 +89,16 @@ public: bool useMemOps() const { return UseMemOps; } bool hasV5TOps() const { return getHexagonArchVersion() >= V5; } bool hasV5TOpsOnly() const { return getHexagonArchVersion() == V5; } + bool hasV55TOps() const { return getHexagonArchVersion() >= V55; } + bool hasV55TOpsOnly() const { return getHexagonArchVersion() == V55; } + bool hasV60TOps() const { return getHexagonArchVersion() >= V60; } + bool hasV60TOpsOnly() const { return getHexagonArchVersion() == V60; } bool modeIEEERndNear() const { return ModeIEEERndNear; } + bool useHVXOps() const { return UseHVXOps; } + bool useHVXDblOps() const { return UseHVXOps && UseHVXDblOps; } + bool useHVXSglOps() const { return UseHVXOps && !UseHVXDblOps; } + + bool useBSBScheduling() const { return UseBSBScheduling; } bool enableMachineScheduler() const override; // Always use the TargetLowering default scheduler. // FIXME: This will use the vliw scheduler which is probably just hurting @@ -98,7 +112,7 @@ public: return Hexagon_SMALL_DATA_THRESHOLD; } const HexagonArchEnum &getHexagonArchVersion() const { - return HexagonArchVersion; + return HexagonArchVersion; } }; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp index b50442969a29..9dccd696c989 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -16,12 +16,12 @@ #include "HexagonISelLowering.h" #include "HexagonMachineScheduler.h" #include "HexagonTargetObjectFile.h" +#include "HexagonTargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/TargetRegistry.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Scalar.h" using namespace llvm; @@ -33,10 +33,16 @@ static cl::opt<bool> DisableHexagonCFGOpt("disable-hexagon-cfgopt", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Disable Hexagon CFG Optimization")); +static cl::opt<bool> DisableStoreWidening("disable-store-widen", + cl::Hidden, cl::init(false), cl::desc("Disable store widening")); + static cl::opt<bool> EnableExpandCondsets("hexagon-expand-condsets", cl::init(true), cl::Hidden, cl::ZeroOrMore, cl::desc("Early expansion of MUX")); +static cl::opt<bool> EnableEarlyIf("hexagon-eif", cl::init(true), cl::Hidden, + cl::ZeroOrMore, cl::desc("Enable early if-conversion")); + static cl::opt<bool> EnableGenInsert("hexagon-insert", cl::init(true), cl::Hidden, cl::desc("Generate \"insert\" instructions")); @@ -46,10 +52,22 @@ static cl::opt<bool> EnableCommGEP("hexagon-commgep", cl::init(true), static cl::opt<bool> EnableGenExtract("hexagon-extract", cl::init(true), cl::Hidden, cl::desc("Generate \"extract\" instructions")); +static cl::opt<bool> EnableGenMux("hexagon-mux", cl::init(true), cl::Hidden, + cl::desc("Enable converting conditional transfers into MUX instructions")); + static cl::opt<bool> EnableGenPred("hexagon-gen-pred", cl::init(true), cl::Hidden, cl::desc("Enable conversion of arithmetic operations to " "predicate instructions")); +static cl::opt<bool> DisableHSDR("disable-hsdr", cl::init(false), cl::Hidden, + cl::desc("Disable splitting double registers")); + +static cl::opt<bool> EnableBitSimplify("hexagon-bit", cl::init(true), + cl::Hidden, cl::desc("Bit simplification")); + +static cl::opt<bool> EnableLoopResched("hexagon-loop-resched", cl::init(true), + cl::Hidden, cl::desc("Loop rescheduling")); + /// HexagonTargetMachineModule - Note that this is used on hosts that /// cannot link in a library unless there are references into the /// library. In particular, it seems that it is not possible to get @@ -72,23 +90,30 @@ SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler", createVLIWMachineSched); namespace llvm { + FunctionPass *createHexagonBitSimplify(); + FunctionPass *createHexagonCallFrameInformation(); FunctionPass *createHexagonCFGOptimizer(); FunctionPass *createHexagonCommonGEP(); FunctionPass *createHexagonCopyToCombine(); + FunctionPass *createHexagonEarlyIfConversion(); FunctionPass *createHexagonExpandCondsets(); FunctionPass *createHexagonExpandPredSpillCode(); FunctionPass *createHexagonFixupHwLoops(); FunctionPass *createHexagonGenExtract(); FunctionPass *createHexagonGenInsert(); + FunctionPass *createHexagonGenMux(); FunctionPass *createHexagonGenPredicate(); FunctionPass *createHexagonHardwareLoops(); FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM, CodeGenOpt::Level OptLevel); + FunctionPass *createHexagonLoopRescheduling(); FunctionPass *createHexagonNewValueJump(); + FunctionPass *createHexagonOptimizeSZextends(); FunctionPass *createHexagonPacketizer(); FunctionPass *createHexagonPeephole(); - FunctionPass *createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM); FunctionPass *createHexagonSplitConst32AndConst64(); + FunctionPass *createHexagonSplitDoubleRegs(); + FunctionPass *createHexagonStoreWidening(); } // end namespace llvm; /// HexagonTargetMachine ctor - Create an ILP32 architecture model. @@ -101,13 +126,46 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : LLVMTargetMachine(T, "e-m:e-p:32:32-i1:32-i64:64-a:0-n32", TT, CPU, FS, - Options, RM, CM, OL), - TLOF(make_unique<HexagonTargetObjectFile>()), - Subtarget(TT, CPU, FS, *this) { - initAsmInfo(); + : LLVMTargetMachine(T, "e-m:e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-" + "i1:8:8-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a:0-" + "n16:32", TT, CPU, FS, Options, RM, CM, OL), + TLOF(make_unique<HexagonTargetObjectFile>()) { + initAsmInfo(); +} + +const HexagonSubtarget * +HexagonTargetMachine::getSubtargetImpl(const Function &F) const { + AttributeSet FnAttrs = F.getAttributes(); + Attribute CPUAttr = + FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu"); + Attribute FSAttr = + FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features"); + + std::string CPU = !CPUAttr.hasAttribute(Attribute::None) + ? CPUAttr.getValueAsString().str() + : TargetCPU; + std::string FS = !FSAttr.hasAttribute(Attribute::None) + ? FSAttr.getValueAsString().str() + : TargetFS; + + auto &I = SubtargetMap[CPU + FS]; + if (!I) { + // This needs to be done before we create a new subtarget since any + // creation will depend on the TM and the code generation flags on the + // function that reside in TargetOptions. + resetTargetOptions(F); + I = llvm::make_unique<HexagonSubtarget>(TargetTriple, CPU, FS, *this); + } + return I.get(); +} + +TargetIRAnalysis HexagonTargetMachine::getTargetIRAnalysis() { + return TargetIRAnalysis([this](const Function &F) { + return TargetTransformInfo(HexagonTTIImpl(this, F)); + }); } + HexagonTargetMachine::~HexagonTargetMachine() {} namespace { @@ -166,7 +224,7 @@ bool HexagonPassConfig::addInstSelector() { bool NoOpt = (getOptLevel() == CodeGenOpt::None); if (!NoOpt) - addPass(createHexagonRemoveExtendArgs(TM)); + addPass(createHexagonOptimizeSZextends()); addPass(createHexagonISelDag(TM, getOptLevel())); @@ -174,19 +232,33 @@ bool HexagonPassConfig::addInstSelector() { // Create logical operations on predicate registers. if (EnableGenPred) addPass(createHexagonGenPredicate(), false); + // Rotate loops to expose bit-simplification opportunities. + if (EnableLoopResched) + addPass(createHexagonLoopRescheduling(), false); + // Split double registers. + if (!DisableHSDR) + addPass(createHexagonSplitDoubleRegs()); + // Bit simplification. + if (EnableBitSimplify) + addPass(createHexagonBitSimplify(), false); addPass(createHexagonPeephole()); printAndVerify("After hexagon peephole pass"); if (EnableGenInsert) addPass(createHexagonGenInsert(), false); + if (EnableEarlyIf) + addPass(createHexagonEarlyIfConversion(), false); } return false; } void HexagonPassConfig::addPreRegAlloc() { - if (getOptLevel() != CodeGenOpt::None) + if (getOptLevel() != CodeGenOpt::None) { + if (!DisableStoreWidening) + addPass(createHexagonStoreWidening(), false); if (!DisableHardwareLoops) addPass(createHexagonHardwareLoops(), false); + } } void HexagonPassConfig::addPostRegAlloc() { @@ -215,6 +287,13 @@ void HexagonPassConfig::addPreEmitPass() { if (!NoOpt) { if (!DisableHardwareLoops) addPass(createHexagonFixupHwLoops(), false); + // Generate MUX from pairs of conditional transfers. + if (EnableGenMux) + addPass(createHexagonGenMux(), false); + addPass(createHexagonPacketizer(), false); } + + // Add CFI instructions if necessary. + addPass(createHexagonCallFrameInformation(), false); } diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h index 115eadb98c33..968814b3ea32 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h +++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h @@ -16,6 +16,7 @@ #include "HexagonInstrInfo.h" #include "HexagonSubtarget.h" +#include "HexagonTargetObjectFile.h" #include "llvm/Target/TargetMachine.h" namespace llvm { @@ -24,7 +25,7 @@ class Module; class HexagonTargetMachine : public LLVMTargetMachine { std::unique_ptr<TargetLoweringObjectFile> TLOF; - HexagonSubtarget Subtarget; + mutable StringMap<std::unique_ptr<HexagonSubtarget>> SubtargetMap; public: HexagonTargetMachine(const Target &T, const Triple &TT, StringRef CPU, @@ -32,20 +33,18 @@ public: Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); ~HexagonTargetMachine() override; - const HexagonSubtarget *getSubtargetImpl(const Function &) const override { - return &Subtarget; - } + const HexagonSubtarget *getSubtargetImpl(const Function &F) const override; + static unsigned getModuleMatchQuality(const Module &M); TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + TargetIRAnalysis getTargetIRAnalysis() override; - TargetLoweringObjectFile *getObjFileLowering() const override { - return TLOF.get(); + HexagonTargetObjectFile *getObjFileLowering() const override { + return static_cast<HexagonTargetObjectFile*>(TLOF.get()); } }; -extern bool flag_aligned_memcpy; - } // end namespace llvm #endif diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp index 4ea0e0d11998..ccca62021f5b 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp @@ -73,9 +73,10 @@ IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM, if (!GVA) return false; - if (Kind.isBSS() || Kind.isDataNoRel() || Kind.isCommon()) { + if (Kind.isBSS() || Kind.isData() || Kind.isCommon()) { Type *Ty = GV->getType()->getElementType(); - return IsInSmallSection(TM.getDataLayout()->getTypeAllocSize(Ty)); + return IsInSmallSection( + GV->getParent()->getDataLayout().getTypeAllocSize(Ty)); } return false; @@ -89,7 +90,7 @@ HexagonTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV, // Handle Small Section classification here. if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind)) return SmallBSSSection; - if (Kind.isDataNoRel() && IsGlobalInSmallSection(GV, TM, Kind)) + if (Kind.isData() && IsGlobalInSmallSection(GV, TM, Kind)) return SmallDataSection; // Otherwise, we work the same as ELF. diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp new file mode 100644 index 000000000000..a05443eb83b8 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -0,0 +1,38 @@ +//===-- HexagonTargetTransformInfo.cpp - Hexagon specific TTI pass --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +/// \file +/// This file implements a TargetTransformInfo analysis pass specific to the +/// Hexagon target machine. It uses the target's detailed information to provide +/// more precise answers to certain TTI queries, while letting the target +/// independent and default TTI implementations handle the rest. +/// +//===----------------------------------------------------------------------===// + +#include "HexagonTargetTransformInfo.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "hexagontti" + +TargetTransformInfo::PopcntSupportKind +HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const { + // Return Fast Hardware support as every input < 64 bits will be promoted + // to 64 bits. + return TargetTransformInfo::PSK_FastHardware; +} + +// The Hexagon target can unroll loops with run-time trip counts. +void HexagonTTIImpl::getUnrollingPreferences(Loop *L, + TTI::UnrollingPreferences &UP) { + UP.Runtime = UP.Partial = true; +} + +unsigned HexagonTTIImpl::getNumberOfRegisters(bool vector) const { + return vector ? 0 : 32; +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h new file mode 100644 index 000000000000..71ae17a19e5f --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -0,0 +1,70 @@ +//===-- HexagonTargetTransformInfo.cpp - Hexagon specific TTI pass --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +/// \file +/// This file implements a TargetTransformInfo analysis pass specific to the +/// Hexagon target machine. It uses the target's detailed information to provide +/// more precise answers to certain TTI queries, while letting the target +/// independent and default TTI implementations handle the rest. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETTRANSFORMINFO_H +#define LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETTRANSFORMINFO_H + +#include "Hexagon.h" +#include "HexagonTargetMachine.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/BasicTTIImpl.h" +#include "llvm/Target/TargetLowering.h" + +namespace llvm { + +class HexagonTTIImpl : public BasicTTIImplBase<HexagonTTIImpl> { + typedef BasicTTIImplBase<HexagonTTIImpl> BaseT; + typedef TargetTransformInfo TTI; + friend BaseT; + + const HexagonSubtarget *ST; + const HexagonTargetLowering *TLI; + + const HexagonSubtarget *getST() const { return ST; } + const HexagonTargetLowering *getTLI() const { return TLI; } + +public: + explicit HexagonTTIImpl(const HexagonTargetMachine *TM, const Function &F) + : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), + TLI(ST->getTargetLowering()) {} + + // Provide value semantics. MSVC requires that we spell all of these out. + HexagonTTIImpl(const HexagonTTIImpl &Arg) + : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {} + HexagonTTIImpl(HexagonTTIImpl &&Arg) + : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)), + TLI(std::move(Arg.TLI)) {} + + /// \name Scalar TTI Implementations + /// @{ + + TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const; + + // The Hexagon target can unroll loops with run-time trip counts. + void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP); + + /// @} + + /// \name Vector TTI Implementations + /// @{ + + unsigned getNumberOfRegisters(bool vector) const; + + /// @} +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index b91a3f6f8c6c..81850548bb6e 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -16,35 +16,19 @@ // prune the dependence. // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/DFAPacketizer.h" -#include "Hexagon.h" -#include "HexagonMachineFunctionInfo.h" #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" #include "HexagonTargetMachine.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/LatencyPriorityQueue.h" +#include "HexagonVLIWPacketizer.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/CodeGen/ScheduleDAGInstrs.h" -#include "llvm/CodeGen/ScheduleHazardRecognizer.h" -#include "llvm/CodeGen/SchedulerRegistry.h" -#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" #include <map> #include <vector> @@ -52,9 +36,22 @@ using namespace llvm; #define DEBUG_TYPE "packets" +static cl::opt<bool> DisablePacketizer("disable-packetizer", cl::Hidden, + cl::ZeroOrMore, cl::init(false), + cl::desc("Disable Hexagon packetizer pass")); + static cl::opt<bool> PacketizeVolatiles("hexagon-packetize-volatiles", - cl::ZeroOrMore, cl::Hidden, cl::init(true), - cl::desc("Allow non-solo packetization of volatile memory references")); + cl::ZeroOrMore, cl::Hidden, cl::init(true), + cl::desc("Allow non-solo packetization of volatile memory references")); + +static cl::opt<bool> EnableGenAllInsnClass("enable-gen-insn", cl::init(false), + cl::Hidden, cl::ZeroOrMore, cl::desc("Generate all instruction with TC")); + +static cl::opt<bool> DisableVecDblNVStores("disable-vecdbl-nv-stores", + cl::init(false), cl::Hidden, cl::ZeroOrMore, + cl::desc("Disable vector double new-value-stores")); + +extern cl::opt<bool> ScheduleInlineAsm; namespace llvm { FunctionPass *createHexagonPacketizer(); @@ -64,7 +61,6 @@ namespace llvm { namespace { class HexagonPacketizer : public MachineFunctionPass { - public: static char ID; HexagonPacketizer() : MachineFunctionPass(ID) { @@ -73,103 +69,25 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); - AU.addRequired<MachineDominatorTree>(); + AU.addRequired<AAResultsWrapperPass>(); AU.addRequired<MachineBranchProbabilityInfo>(); - AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<MachineDominatorTree>(); AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineDominatorTree>(); AU.addPreserved<MachineLoopInfo>(); MachineFunctionPass::getAnalysisUsage(AU); } - const char *getPassName() const override { return "Hexagon Packetizer"; } - bool runOnMachineFunction(MachineFunction &Fn) override; - }; - char HexagonPacketizer::ID = 0; - - class HexagonPacketizerList : public VLIWPacketizerList { private: - - // Has the instruction been promoted to a dot-new instruction. - bool PromotedToDotNew; - - // Has the instruction been glued to allocframe. - bool GlueAllocframeStore; - - // Has the feeder instruction been glued to new value jump. - bool GlueToNewValueJump; - - // Check if there is a dependence between some instruction already in this - // packet and this instruction. - bool Dependence; - - // Only check for dependence if there are resources available to - // schedule this instruction. - bool FoundSequentialDependence; - - /// \brief A handle to the branch probability pass. - const MachineBranchProbabilityInfo *MBPI; - - // Track MIs with ignored dependece. - std::vector<MachineInstr*> IgnoreDepMIs; - - public: - // Ctor. - HexagonPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI, - const MachineBranchProbabilityInfo *MBPI); - - // initPacketizerState - initialize some internal flags. - void initPacketizerState() override; - - // ignorePseudoInstruction - Ignore bundling of pseudo instructions. - bool ignorePseudoInstruction(MachineInstr *MI, - MachineBasicBlock *MBB) override; - - // isSoloInstruction - return true if instruction MI can not be packetized - // with any other instruction, which means that MI itself is a packet. - bool isSoloInstruction(MachineInstr *MI) override; - - // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ - // together. - bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override; - - // isLegalToPruneDependencies - Is it legal to prune dependece between SUI - // and SUJ. - bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override; - - MachineBasicBlock::iterator addToPacket(MachineInstr *MI) override; - private: - bool IsCallDependent(MachineInstr* MI, SDep::Kind DepType, unsigned DepReg); - bool PromoteToDotNew(MachineInstr* MI, SDep::Kind DepType, - MachineBasicBlock::iterator &MII, - const TargetRegisterClass* RC); - bool CanPromoteToDotNew(MachineInstr *MI, SUnit *PacketSU, unsigned DepReg, - const std::map<MachineInstr *, SUnit *> &MIToSUnit, - MachineBasicBlock::iterator &MII, - const TargetRegisterClass *RC); - bool - CanPromoteToNewValue(MachineInstr *MI, SUnit *PacketSU, unsigned DepReg, - const std::map<MachineInstr *, SUnit *> &MIToSUnit, - MachineBasicBlock::iterator &MII); - bool CanPromoteToNewValueStore( - MachineInstr *MI, MachineInstr *PacketMI, unsigned DepReg, - const std::map<MachineInstr *, SUnit *> &MIToSUnit); - bool DemoteToDotOld(MachineInstr *MI); - bool ArePredicatesComplements( - MachineInstr *MI1, MachineInstr *MI2, - const std::map<MachineInstr *, SUnit *> &MIToSUnit); - bool RestrictingDepExistInPacket(MachineInstr *, unsigned, - const std::map<MachineInstr *, SUnit *> &); - bool isNewifiable(MachineInstr* MI); - bool isCondInst(MachineInstr* MI); - bool tryAllocateResourcesForConstExt(MachineInstr* MI); - bool canReserveResourcesForConstExt(MachineInstr *MI); - void reserveResourcesForConstExt(MachineInstr* MI); - bool isNewValueInst(MachineInstr* MI); + const HexagonInstrInfo *HII; + const HexagonRegisterInfo *HRI; }; + + char HexagonPacketizer::ID = 0; } INITIALIZE_PASS_BEGIN(HexagonPacketizer, "packets", "Hexagon Packetizer", @@ -177,26 +95,93 @@ INITIALIZE_PASS_BEGIN(HexagonPacketizer, "packets", "Hexagon Packetizer", INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(HexagonPacketizer, "packets", "Hexagon Packetizer", false, false) -// HexagonPacketizerList Ctor. -HexagonPacketizerList::HexagonPacketizerList( - MachineFunction &MF, MachineLoopInfo &MLI, - const MachineBranchProbabilityInfo *MBPI) - : VLIWPacketizerList(MF, MLI, true) { - this->MBPI = MBPI; +HexagonPacketizerList::HexagonPacketizerList(MachineFunction &MF, + MachineLoopInfo &MLI, AliasAnalysis *AA, + const MachineBranchProbabilityInfo *MBPI) + : VLIWPacketizerList(MF, MLI, AA), MBPI(MBPI), MLI(&MLI) { + HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); + HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); } -bool HexagonPacketizer::runOnMachineFunction(MachineFunction &Fn) { - const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo(); - MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); - const MachineBranchProbabilityInfo *MBPI = - &getAnalysis<MachineBranchProbabilityInfo>(); +// Check if FirstI modifies a register that SecondI reads. +static bool hasWriteToReadDep(const MachineInstr *FirstI, + const MachineInstr *SecondI, const TargetRegisterInfo *TRI) { + for (auto &MO : FirstI->operands()) { + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned R = MO.getReg(); + if (SecondI->readsRegister(R, TRI)) + return true; + } + return false; +} + + +static MachineBasicBlock::iterator moveInstrOut(MachineInstr *MI, + MachineBasicBlock::iterator BundleIt, bool Before) { + MachineBasicBlock::instr_iterator InsertPt; + if (Before) + InsertPt = BundleIt.getInstrIterator(); + else + InsertPt = std::next(BundleIt).getInstrIterator(); + + MachineBasicBlock &B = *MI->getParent(); + // The instruction should at least be bundled with the preceding instruction + // (there will always be one, i.e. BUNDLE, if nothing else). + assert(MI->isBundledWithPred()); + if (MI->isBundledWithSucc()) { + MI->clearFlag(MachineInstr::BundledSucc); + MI->clearFlag(MachineInstr::BundledPred); + } else { + // If it's not bundled with the successor (i.e. it is the last one + // in the bundle), then we can simply unbundle it from the predecessor, + // which will take care of updating the predecessor's flag. + MI->unbundleFromPred(); + } + B.splice(InsertPt, &B, MI); + + // Get the size of the bundle without asserting. + MachineBasicBlock::const_instr_iterator I(BundleIt); + MachineBasicBlock::const_instr_iterator E = B.instr_end(); + unsigned Size = 0; + for (++I; I != E && I->isBundledWithPred(); ++I) + ++Size; + + // If there are still two or more instructions, then there is nothing + // else to be done. + if (Size > 1) + return BundleIt; + + // Otherwise, extract the single instruction out and delete the bundle. + MachineBasicBlock::iterator NextIt = std::next(BundleIt); + MachineInstr *SingleI = BundleIt->getNextNode(); + SingleI->unbundleFromPred(); + assert(!SingleI->isBundledWithSucc()); + BundleIt->eraseFromParent(); + return NextIt; +} + + +bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) { + if (DisablePacketizer) + return false; + + HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); + HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); + auto &MLI = getAnalysis<MachineLoopInfo>(); + auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); + auto *MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); + + if (EnableGenAllInsnClass) + HII->genAllInsnTimingClasses(MF); + // Instantiate the packetizer. - HexagonPacketizerList Packetizer(Fn, MLI, MBPI); + HexagonPacketizerList Packetizer(MF, MLI, AA, MBPI); // DFA state table should not be empty. assert(Packetizer.getResourceTracker() && "Empty DFA table!"); @@ -211,162 +196,107 @@ bool HexagonPacketizer::runOnMachineFunction(MachineFunction &Fn) { // dependence between Insn 0 and Insn 2. This can lead to incorrect // packetization // - for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); - MBB != MBBe; ++MBB) { - MachineBasicBlock::iterator End = MBB->end(); - MachineBasicBlock::iterator MI = MBB->begin(); + for (auto &MB : MF) { + auto End = MB.end(); + auto MI = MB.begin(); while (MI != End) { + auto NextI = std::next(MI); if (MI->isKill()) { - MachineBasicBlock::iterator DeleteMI = MI; - ++MI; - MBB->erase(DeleteMI); - End = MBB->end(); - continue; + MB.erase(MI); + End = MB.end(); } - ++MI; + MI = NextI; } } // Loop over all of the basic blocks. - for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); - MBB != MBBe; ++MBB) { - // Find scheduling regions and schedule / packetize each region. - unsigned RemainingCount = MBB->size(); - for(MachineBasicBlock::iterator RegionEnd = MBB->end(); - RegionEnd != MBB->begin();) { - // The next region starts above the previous region. Look backward in the - // instruction stream until we find the nearest boundary. - MachineBasicBlock::iterator I = RegionEnd; - for(;I != MBB->begin(); --I, --RemainingCount) { - if (TII->isSchedulingBoundary(std::prev(I), MBB, Fn)) - break; - } - I = MBB->begin(); - - // Skip empty scheduling regions. - if (I == RegionEnd) { - RegionEnd = std::prev(RegionEnd); - --RemainingCount; - continue; - } - // Skip regions with one instruction. - if (I == std::prev(RegionEnd)) { - RegionEnd = std::prev(RegionEnd); - continue; - } - - Packetizer.PacketizeMIs(MBB, I, RegionEnd); - RegionEnd = I; + for (auto &MB : MF) { + auto Begin = MB.begin(), End = MB.end(); + while (Begin != End) { + // First the first non-boundary starting from the end of the last + // scheduling region. + MachineBasicBlock::iterator RB = Begin; + while (RB != End && HII->isSchedulingBoundary(RB, &MB, MF)) + ++RB; + // First the first boundary starting from the beginning of the new + // region. + MachineBasicBlock::iterator RE = RB; + while (RE != End && !HII->isSchedulingBoundary(RE, &MB, MF)) + ++RE; + // Add the scheduling boundary if it's not block end. + if (RE != End) + ++RE; + // If RB == End, then RE == End. + if (RB != End) + Packetizer.PacketizeMIs(&MB, RB, RE); + + Begin = RE; } } + Packetizer.unpacketizeSoloInstrs(MF); return true; } -static bool IsIndirectCall(MachineInstr* MI) { - return MI->getOpcode() == Hexagon::J2_callr; +// Reserve resources for a constant extender. Trigger an assertion if the +// reservation fails. +void HexagonPacketizerList::reserveResourcesForConstExt() { + if (!tryAllocateResourcesForConstExt(true)) + llvm_unreachable("Resources not available"); } -// Reserve resources for constant extender. Trigure an assertion if -// reservation fail. -void HexagonPacketizerList::reserveResourcesForConstExt(MachineInstr* MI) { - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - MachineInstr *PseudoMI = MF.CreateMachineInstr(QII->get(Hexagon::A4_ext), - MI->getDebugLoc()); - - if (ResourceTracker->canReserveResources(PseudoMI)) { - ResourceTracker->reserveResources(PseudoMI); - MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI); - } else { - MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI); - llvm_unreachable("can not reserve resources for constant extender."); - } - return; +bool HexagonPacketizerList::canReserveResourcesForConstExt() { + return tryAllocateResourcesForConstExt(false); } -bool HexagonPacketizerList::canReserveResourcesForConstExt(MachineInstr *MI) { - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - assert((QII->isExtended(MI) || QII->isConstExtended(MI)) && - "Should only be called for constant extended instructions"); - MachineInstr *PseudoMI = MF.CreateMachineInstr(QII->get(Hexagon::A4_ext), - MI->getDebugLoc()); - bool CanReserve = ResourceTracker->canReserveResources(PseudoMI); - MF.DeleteMachineInstr(PseudoMI); - return CanReserve; +// Allocate resources (i.e. 4 bytes) for constant extender. If succeeded, +// return true, otherwise, return false. +bool HexagonPacketizerList::tryAllocateResourcesForConstExt(bool Reserve) { + auto *ExtMI = MF.CreateMachineInstr(HII->get(Hexagon::A4_ext), DebugLoc()); + bool Avail = ResourceTracker->canReserveResources(ExtMI); + if (Reserve && Avail) + ResourceTracker->reserveResources(ExtMI); + MF.DeleteMachineInstr(ExtMI); + return Avail; } -// Allocate resources (i.e. 4 bytes) for constant extender. If succeed, return -// true, otherwise, return false. -bool HexagonPacketizerList::tryAllocateResourcesForConstExt(MachineInstr* MI) { - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - MachineInstr *PseudoMI = MF.CreateMachineInstr(QII->get(Hexagon::A4_ext), - MI->getDebugLoc()); - if (ResourceTracker->canReserveResources(PseudoMI)) { - ResourceTracker->reserveResources(PseudoMI); - MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI); +bool HexagonPacketizerList::isCallDependent(const MachineInstr* MI, + SDep::Kind DepType, unsigned DepReg) { + // Check for LR dependence. + if (DepReg == HRI->getRARegister()) return true; - } else { - MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI); - return false; - } -} - - -bool HexagonPacketizerList::IsCallDependent(MachineInstr* MI, - SDep::Kind DepType, - unsigned DepReg) { - - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - const HexagonRegisterInfo *QRI = - (const HexagonRegisterInfo *)MF.getSubtarget().getRegisterInfo(); - - // Check for lr dependence - if (DepReg == QRI->getRARegister()) { - return true; - } - if (QII->isDeallocRet(MI)) { - if (DepReg == QRI->getFrameRegister() || - DepReg == QRI->getStackRegister()) + if (HII->isDeallocRet(MI)) + if (DepReg == HRI->getFrameRegister() || DepReg == HRI->getStackRegister()) return true; - } - // Check if this is a predicate dependence - const TargetRegisterClass* RC = QRI->getMinimalPhysRegClass(DepReg); - if (RC == &Hexagon::PredRegsRegClass) { + // Check if this is a predicate dependence. + const TargetRegisterClass* RC = HRI->getMinimalPhysRegClass(DepReg); + if (RC == &Hexagon::PredRegsRegClass) return true; - } - // - // Lastly check for an operand used in an indirect call - // If we had an attribute for checking if an instruction is an indirect call, - // then we could have avoided this relatively brittle implementation of - // IsIndirectCall() - // - // Assumes that the first operand of the CALLr is the function address - // - if (IsIndirectCall(MI) && (DepType == SDep::Data)) { + // Assumes that the first operand of the CALLr is the function address. + if (HII->isIndirectCall(MI) && (DepType == SDep::Data)) { MachineOperand MO = MI->getOperand(0); - if (MO.isReg() && MO.isUse() && (MO.getReg() == DepReg)) { + if (MO.isReg() && MO.isUse() && (MO.getReg() == DepReg)) return true; - } } return false; } -static bool IsRegDependence(const SDep::Kind DepType) { - return (DepType == SDep::Data || DepType == SDep::Anti || - DepType == SDep::Output); +static bool isRegDependence(const SDep::Kind DepType) { + return DepType == SDep::Data || DepType == SDep::Anti || + DepType == SDep::Output; } -static bool IsDirectJump(MachineInstr* MI) { - return (MI->getOpcode() == Hexagon::J2_jump); +static bool isDirectJump(const MachineInstr* MI) { + return MI->getOpcode() == Hexagon::J2_jump; } -static bool IsSchedBarrier(MachineInstr* MI) { +static bool isSchedBarrier(const MachineInstr* MI) { switch (MI->getOpcode()) { case Hexagon::Y2_barrier: return true; @@ -374,76 +304,127 @@ static bool IsSchedBarrier(MachineInstr* MI) { return false; } -static bool IsControlFlow(MachineInstr* MI) { +static bool isControlFlow(const MachineInstr* MI) { return (MI->getDesc().isTerminator() || MI->getDesc().isCall()); } -static bool IsLoopN(MachineInstr *MI) { - return (MI->getOpcode() == Hexagon::J2_loop0i || - MI->getOpcode() == Hexagon::J2_loop0r); -} -/// DoesModifyCalleeSavedReg - Returns true if the instruction modifies a -/// callee-saved register. -static bool DoesModifyCalleeSavedReg(MachineInstr *MI, +/// Returns true if the instruction modifies a callee-saved register. +static bool doesModifyCalleeSavedReg(const MachineInstr *MI, const TargetRegisterInfo *TRI) { - for (const MCPhysReg *CSR = - TRI->getCalleeSavedRegs(MI->getParent()->getParent()); - *CSR; ++CSR) { - unsigned CalleeSavedReg = *CSR; - if (MI->modifiesRegister(CalleeSavedReg, TRI)) + const MachineFunction &MF = *MI->getParent()->getParent(); + for (auto *CSR = TRI->getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR) + if (MI->modifiesRegister(*CSR, TRI)) return true; - } return false; } -// Returns true if an instruction can be promoted to .new predicate -// or new-value store. -bool HexagonPacketizerList::isNewifiable(MachineInstr* MI) { - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - return isCondInst(MI) || QII->mayBeNewStore(MI); +// TODO: MI->isIndirectBranch() and IsRegisterJump(MI) +// Returns true if an instruction can be promoted to .new predicate or +// new-value store. +bool HexagonPacketizerList::isNewifiable(const MachineInstr* MI) { + return HII->isCondInst(MI) || MI->isReturn() || HII->mayBeNewStore(MI); } -bool HexagonPacketizerList::isCondInst (MachineInstr* MI) { - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - const MCInstrDesc& TID = MI->getDesc(); - // bug 5670: until that is fixed, - // this portion is disabled. - if ( TID.isConditionalBranch() // && !IsRegisterJump(MI)) || - || QII->isConditionalTransfer(MI) - || QII->isConditionalALU32(MI) - || QII->isConditionalLoad(MI) - || QII->isConditionalStore(MI)) { - return true; +// Promote an instructiont to its .cur form. +// At this time, we have already made a call to canPromoteToDotCur and made +// sure that it can *indeed* be promoted. +bool HexagonPacketizerList::promoteToDotCur(MachineInstr* MI, + SDep::Kind DepType, MachineBasicBlock::iterator &MII, + const TargetRegisterClass* RC) { + assert(DepType == SDep::Data); + int CurOpcode = HII->getDotCurOp(MI); + MI->setDesc(HII->get(CurOpcode)); + return true; +} + +void HexagonPacketizerList::cleanUpDotCur() { + MachineInstr *MI = NULL; + for (auto BI : CurrentPacketMIs) { + DEBUG(dbgs() << "Cleanup packet has "; BI->dump();); + if (BI->getOpcode() == Hexagon::V6_vL32b_cur_ai) { + MI = BI; + continue; + } + if (MI) { + for (auto &MO : BI->operands()) + if (MO.isReg() && MO.getReg() == MI->getOperand(0).getReg()) + return; + } } - return false; + if (!MI) + return; + // We did not find a use of the CUR, so de-cur it. + MI->setDesc(HII->get(Hexagon::V6_vL32b_ai)); + DEBUG(dbgs() << "Demoted CUR "; MI->dump();); } +// Check to see if an instruction can be dot cur. +bool HexagonPacketizerList::canPromoteToDotCur(const MachineInstr *MI, + const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII, + const TargetRegisterClass *RC) { + if (!HII->isV60VectorInstruction(MI)) + return false; + if (!HII->isV60VectorInstruction(MII)) + return false; -// Promote an instructiont to its .new form. -// At this time, we have already made a call to CanPromoteToDotNew -// and made sure that it can *indeed* be promoted. -bool HexagonPacketizerList::PromoteToDotNew(MachineInstr* MI, - SDep::Kind DepType, MachineBasicBlock::iterator &MII, - const TargetRegisterClass* RC) { + // Already a dot new instruction. + if (HII->isDotCurInst(MI) && !HII->mayBeCurLoad(MI)) + return false; - assert (DepType == SDep::Data); - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; + if (!HII->mayBeCurLoad(MI)) + return false; + + // The "cur value" cannot come from inline asm. + if (PacketSU->getInstr()->isInlineAsm()) + return false; + + // Make sure candidate instruction uses cur. + DEBUG(dbgs() << "Can we DOT Cur Vector MI\n"; + MI->dump(); + dbgs() << "in packet\n";); + MachineInstr *MJ = MII; + DEBUG(dbgs() << "Checking CUR against "; MJ->dump();); + unsigned DestReg = MI->getOperand(0).getReg(); + bool FoundMatch = false; + for (auto &MO : MJ->operands()) + if (MO.isReg() && MO.getReg() == DestReg) + FoundMatch = true; + if (!FoundMatch) + return false; + + // Check for existing uses of a vector register within the packet which + // would be affected by converting a vector load into .cur formt. + for (auto BI : CurrentPacketMIs) { + DEBUG(dbgs() << "packet has "; BI->dump();); + if (BI->readsRegister(DepReg, MF.getSubtarget().getRegisterInfo())) + return false; + } + + DEBUG(dbgs() << "Can Dot CUR MI\n"; MI->dump();); + // We can convert the opcode into a .cur. + return true; +} +// Promote an instruction to its .new form. At this time, we have already +// made a call to canPromoteToDotNew and made sure that it can *indeed* be +// promoted. +bool HexagonPacketizerList::promoteToDotNew(MachineInstr* MI, + SDep::Kind DepType, MachineBasicBlock::iterator &MII, + const TargetRegisterClass* RC) { + assert (DepType == SDep::Data); int NewOpcode; if (RC == &Hexagon::PredRegsRegClass) - NewOpcode = QII->GetDotNewPredOp(MI, MBPI); + NewOpcode = HII->getDotNewPredOp(MI, MBPI); else - NewOpcode = QII->GetDotNewOp(MI); - MI->setDesc(QII->get(NewOpcode)); - + NewOpcode = HII->getDotNewOp(MI); + MI->setDesc(HII->get(NewOpcode)); return true; } -bool HexagonPacketizerList::DemoteToDotOld(MachineInstr* MI) { - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - int NewOpcode = QII->GetDotOldOp(MI->getOpcode()); - MI->setDesc(QII->get(NewOpcode)); +bool HexagonPacketizerList::demoteToDotOld(MachineInstr* MI) { + int NewOpcode = HII->getDotOldOp(MI->getOpcode()); + MI->setDesc(HII->get(NewOpcode)); return true; } @@ -455,175 +436,173 @@ enum PredicateKind { /// Returns true if an instruction is predicated on p0 and false if it's /// predicated on !p0. -static PredicateKind getPredicateSense(MachineInstr* MI, - const HexagonInstrInfo *QII) { - if (!QII->isPredicated(MI)) +static PredicateKind getPredicateSense(const MachineInstr *MI, + const HexagonInstrInfo *HII) { + if (!HII->isPredicated(MI)) return PK_Unknown; - - if (QII->isPredicatedTrue(MI)) + if (HII->isPredicatedTrue(MI)) return PK_True; - return PK_False; } -static MachineOperand& GetPostIncrementOperand(MachineInstr *MI, - const HexagonInstrInfo *QII) { - assert(QII->isPostIncrement(MI) && "Not a post increment operation."); +static const MachineOperand &getPostIncrementOperand(const MachineInstr *MI, + const HexagonInstrInfo *HII) { + assert(HII->isPostIncrement(MI) && "Not a post increment operation."); #ifndef NDEBUG // Post Increment means duplicates. Use dense map to find duplicates in the // list. Caution: Densemap initializes with the minimum of 64 buckets, // whereas there are at most 5 operands in the post increment. - DenseMap<unsigned, unsigned> DefRegsSet; - for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) - if (MI->getOperand(opNum).isReg() && - MI->getOperand(opNum).isDef()) { - DefRegsSet[MI->getOperand(opNum).getReg()] = 1; - } - - for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) - if (MI->getOperand(opNum).isReg() && - MI->getOperand(opNum).isUse()) { - if (DefRegsSet[MI->getOperand(opNum).getReg()]) { - return MI->getOperand(opNum); - } - } + DenseSet<unsigned> DefRegsSet; + for (auto &MO : MI->operands()) + if (MO.isReg() && MO.isDef()) + DefRegsSet.insert(MO.getReg()); + + for (auto &MO : MI->operands()) + if (MO.isReg() && MO.isUse() && DefRegsSet.count(MO.getReg())) + return MO; #else - if (MI->getDesc().mayLoad()) { + if (MI->mayLoad()) { + const MachineOperand &Op1 = MI->getOperand(1); // The 2nd operand is always the post increment operand in load. - assert(MI->getOperand(1).isReg() && - "Post increment operand has be to a register."); - return (MI->getOperand(1)); + assert(Op1.isReg() && "Post increment operand has be to a register."); + return Op1; } if (MI->getDesc().mayStore()) { + const MachineOperand &Op0 = MI->getOperand(0); // The 1st operand is always the post increment operand in store. - assert(MI->getOperand(0).isReg() && - "Post increment operand has be to a register."); - return (MI->getOperand(0)); + assert(Op0.isReg() && "Post increment operand has be to a register."); + return Op0; } #endif // we should never come here. llvm_unreachable("mayLoad or mayStore not set for Post Increment operation"); } -// get the value being stored -static MachineOperand& GetStoreValueOperand(MachineInstr *MI) { +// Get the value being stored. +static const MachineOperand& getStoreValueOperand(const MachineInstr *MI) { // value being stored is always the last operand. - return (MI->getOperand(MI->getNumOperands()-1)); + return MI->getOperand(MI->getNumOperands()-1); +} + +static bool isLoadAbsSet(const MachineInstr *MI) { + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case Hexagon::L4_loadrd_ap: + case Hexagon::L4_loadrb_ap: + case Hexagon::L4_loadrh_ap: + case Hexagon::L4_loadrub_ap: + case Hexagon::L4_loadruh_ap: + case Hexagon::L4_loadri_ap: + return true; + } + return false; } -// can be new value store? +static const MachineOperand &getAbsSetOperand(const MachineInstr *MI) { + assert(isLoadAbsSet(MI)); + return MI->getOperand(1); +} + + +// Can be new value store? // Following restrictions are to be respected in convert a store into // a new value store. // 1. If an instruction uses auto-increment, its address register cannot // be a new-value register. Arch Spec 5.4.2.1 -// 2. If an instruction uses absolute-set addressing mode, -// its address register cannot be a new-value register. -// Arch Spec 5.4.2.1.TODO: This is not enabled as -// as absolute-set address mode patters are not implemented. +// 2. If an instruction uses absolute-set addressing mode, its address +// register cannot be a new-value register. Arch Spec 5.4.2.1. // 3. If an instruction produces a 64-bit result, its registers cannot be used // as new-value registers. Arch Spec 5.4.2.2. -// 4. If the instruction that sets a new-value register is conditional, then +// 4. If the instruction that sets the new-value register is conditional, then // the instruction that uses the new-value register must also be conditional, // and both must always have their predicates evaluate identically. // Arch Spec 5.4.2.3. -// 5. There is an implied restriction of a packet can not have another store, -// if there is a new value store in the packet. Corollary, if there is +// 5. There is an implied restriction that a packet cannot have another store, +// if there is a new value store in the packet. Corollary: if there is // already a store in a packet, there can not be a new value store. // Arch Spec: 3.4.4.2 -bool HexagonPacketizerList::CanPromoteToNewValueStore( - MachineInstr *MI, MachineInstr *PacketMI, unsigned DepReg, - const std::map<MachineInstr *, SUnit *> &MIToSUnit) { - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; +bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI, + const MachineInstr *PacketMI, unsigned DepReg) { // Make sure we are looking at the store, that can be promoted. - if (!QII->mayBeNewStore(MI)) + if (!HII->mayBeNewStore(MI)) return false; - // Make sure there is dependency and can be new value'ed - if (GetStoreValueOperand(MI).isReg() && - GetStoreValueOperand(MI).getReg() != DepReg) + // Make sure there is dependency and can be new value'd. + const MachineOperand &Val = getStoreValueOperand(MI); + if (Val.isReg() && Val.getReg() != DepReg) return false; - const HexagonRegisterInfo *QRI = - (const HexagonRegisterInfo *)MF.getSubtarget().getRegisterInfo(); const MCInstrDesc& MCID = PacketMI->getDesc(); - // first operand is always the result - - const TargetRegisterClass* PacketRC = QII->getRegClass(MCID, 0, QRI, MF); - - // if there is already an store in the packet, no can do new value store - // Arch Spec 3.4.4.2. - for (std::vector<MachineInstr*>::iterator VI = CurrentPacketMIs.begin(), - VE = CurrentPacketMIs.end(); - (VI != VE); ++VI) { - SUnit *PacketSU = MIToSUnit.find(*VI)->second; - if (PacketSU->getInstr()->getDesc().mayStore() || - // if we have mayStore = 1 set on ALLOCFRAME and DEALLOCFRAME, - // then we don't need this - PacketSU->getInstr()->getOpcode() == Hexagon::S2_allocframe || - PacketSU->getInstr()->getOpcode() == Hexagon::L2_deallocframe) - return false; - } - if (PacketRC == &Hexagon::DoubleRegsRegClass) { - // new value store constraint: double regs can not feed into new value store - // arch spec section: 5.4.2.2 + // First operand is always the result. + const TargetRegisterClass *PacketRC = HII->getRegClass(MCID, 0, HRI, MF); + // Double regs can not feed into new value store: PRM section: 5.4.2.2. + if (PacketRC == &Hexagon::DoubleRegsRegClass) return false; + + // New-value stores are of class NV (slot 0), dual stores require class ST + // in slot 0 (PRM 5.5). + for (auto I : CurrentPacketMIs) { + SUnit *PacketSU = MIToSUnit.find(I)->second; + if (PacketSU->getInstr()->mayStore()) + return false; } // Make sure it's NOT the post increment register that we are going to // new value. - if (QII->isPostIncrement(MI) && - MI->getDesc().mayStore() && - GetPostIncrementOperand(MI, QII).getReg() == DepReg) { + if (HII->isPostIncrement(MI) && + getPostIncrementOperand(MI, HII).getReg() == DepReg) { return false; } - if (QII->isPostIncrement(PacketMI) && - PacketMI->getDesc().mayLoad() && - GetPostIncrementOperand(PacketMI, QII).getReg() == DepReg) { - // if source is post_inc, or absolute-set addressing, - // it can not feed into new value store - // r3 = memw(r2++#4) - // memw(r30 + #-1404) = r2.new -> can not be new value store - // arch spec section: 5.4.2.1 + if (HII->isPostIncrement(PacketMI) && PacketMI->mayLoad() && + getPostIncrementOperand(PacketMI, HII).getReg() == DepReg) { + // If source is post_inc, or absolute-set addressing, it can not feed + // into new value store + // r3 = memw(r2++#4) + // memw(r30 + #-1404) = r2.new -> can not be new value store + // arch spec section: 5.4.2.1. return false; } + if (isLoadAbsSet(PacketMI) && getAbsSetOperand(PacketMI).getReg() == DepReg) + return false; + // If the source that feeds the store is predicated, new value store must // also be predicated. - if (QII->isPredicated(PacketMI)) { - if (!QII->isPredicated(MI)) + if (HII->isPredicated(PacketMI)) { + if (!HII->isPredicated(MI)) return false; // Check to make sure that they both will have their predicates - // evaluate identically + // evaluate identically. unsigned predRegNumSrc = 0; unsigned predRegNumDst = 0; const TargetRegisterClass* predRegClass = nullptr; - // Get predicate register used in the source instruction - for(unsigned opNum = 0; opNum < PacketMI->getNumOperands(); opNum++) { - if ( PacketMI->getOperand(opNum).isReg()) - predRegNumSrc = PacketMI->getOperand(opNum).getReg(); - predRegClass = QRI->getMinimalPhysRegClass(predRegNumSrc); - if (predRegClass == &Hexagon::PredRegsRegClass) { + // Get predicate register used in the source instruction. + for (auto &MO : PacketMI->operands()) { + if (!MO.isReg()) + continue; + predRegNumSrc = MO.getReg(); + predRegClass = HRI->getMinimalPhysRegClass(predRegNumSrc); + if (predRegClass == &Hexagon::PredRegsRegClass) break; - } } - assert ((predRegClass == &Hexagon::PredRegsRegClass ) && - ("predicate register not found in a predicated PacketMI instruction")); - - // Get predicate register used in new-value store instruction - for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) { - if ( MI->getOperand(opNum).isReg()) - predRegNumDst = MI->getOperand(opNum).getReg(); - predRegClass = QRI->getMinimalPhysRegClass(predRegNumDst); - if (predRegClass == &Hexagon::PredRegsRegClass) { + assert((predRegClass == &Hexagon::PredRegsRegClass) && + "predicate register not found in a predicated PacketMI instruction"); + + // Get predicate register used in new-value store instruction. + for (auto &MO : MI->operands()) { + if (!MO.isReg()) + continue; + predRegNumDst = MO.getReg(); + predRegClass = HRI->getMinimalPhysRegClass(predRegNumDst); + if (predRegClass == &Hexagon::PredRegsRegClass) break; - } } - assert ((predRegClass == &Hexagon::PredRegsRegClass ) && - ("predicate register not found in a predicated MI instruction")); + assert((predRegClass == &Hexagon::PredRegsRegClass) && + "predicate register not found in a predicated MI instruction"); // New-value register producer and user (store) need to satisfy these // constraints: @@ -632,13 +611,11 @@ bool HexagonPacketizerList::CanPromoteToNewValueStore( // should also be .new predicated and if producer is not .new predicated // then store should not be .new predicated. // 3) Both new-value register producer and user should have same predicate - // sense, i.e, either both should be negated or both should be none negated. - - if (( predRegNumDst != predRegNumSrc) || - QII->isDotNewInst(PacketMI) != QII->isDotNewInst(MI) || - getPredicateSense(MI, QII) != getPredicateSense(PacketMI, QII)) { + // sense, i.e, either both should be negated or both should be non-negated. + if (predRegNumDst != predRegNumSrc || + HII->isDotNewInst(PacketMI) != HII->isDotNewInst(MI) || + getPredicateSense(MI, HII) != getPredicateSense(PacketMI, HII)) return false; - } } // Make sure that other than the new-value register no other store instruction @@ -649,81 +626,77 @@ bool HexagonPacketizerList::CanPromoteToNewValueStore( // including PacketMI. Howerver, we need to perform the check for the // remaining instructions in the packet. - std::vector<MachineInstr*>::iterator VI; - std::vector<MachineInstr*>::iterator VE; unsigned StartCheck = 0; - for (VI=CurrentPacketMIs.begin(), VE = CurrentPacketMIs.end(); - (VI != VE); ++VI) { - SUnit *TempSU = MIToSUnit.find(*VI)->second; + for (auto I : CurrentPacketMIs) { + SUnit *TempSU = MIToSUnit.find(I)->second; MachineInstr* TempMI = TempSU->getInstr(); // Following condition is true for all the instructions until PacketMI is // reached (StartCheck is set to 0 before the for loop). // StartCheck flag is 1 for all the instructions after PacketMI. - if (TempMI != PacketMI && !StartCheck) // start processing only after - continue; // encountering PacketMI + if (TempMI != PacketMI && !StartCheck) // Start processing only after + continue; // encountering PacketMI. StartCheck = 1; - if (TempMI == PacketMI) // We don't want to check PacketMI for dependence + if (TempMI == PacketMI) // We don't want to check PacketMI for dependence. continue; - for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) { - if (MI->getOperand(opNum).isReg() && - TempSU->getInstr()->modifiesRegister(MI->getOperand(opNum).getReg(), - QRI)) + for (auto &MO : MI->operands()) + if (MO.isReg() && TempSU->getInstr()->modifiesRegister(MO.getReg(), HRI)) return false; - } } // Make sure that for non-POST_INC stores: // 1. The only use of reg is DepReg and no other registers. // This handles V4 base+index registers. // The following store can not be dot new. - // Eg. r0 = add(r0, #3)a + // Eg. r0 = add(r0, #3) // memw(r1+r0<<#2) = r0 - if (!QII->isPostIncrement(MI) && - GetStoreValueOperand(MI).isReg() && - GetStoreValueOperand(MI).getReg() == DepReg) { - for(unsigned opNum = 0; opNum < MI->getNumOperands()-1; opNum++) { - if (MI->getOperand(opNum).isReg() && - MI->getOperand(opNum).getReg() == DepReg) { - return false; - } - } - // 2. If data definition is because of implicit definition of the register, - // do not newify the store. Eg. - // %R9<def> = ZXTH %R12, %D6<imp-use>, %R12<imp-def> - // STrih_indexed %R8, 2, %R12<kill>; mem:ST2[%scevgep343] - for(unsigned opNum = 0; opNum < PacketMI->getNumOperands(); opNum++) { - if (PacketMI->getOperand(opNum).isReg() && - PacketMI->getOperand(opNum).getReg() == DepReg && - PacketMI->getOperand(opNum).isDef() && - PacketMI->getOperand(opNum).isImplicit()) { + if (!HII->isPostIncrement(MI)) { + for (unsigned opNum = 0; opNum < MI->getNumOperands()-1; opNum++) { + const MachineOperand &MO = MI->getOperand(opNum); + if (MO.isReg() && MO.getReg() == DepReg) return false; - } } } + // If data definition is because of implicit definition of the register, + // do not newify the store. Eg. + // %R9<def> = ZXTH %R12, %D6<imp-use>, %R12<imp-def> + // S2_storerh_io %R8, 2, %R12<kill>; mem:ST2[%scevgep343] + for (auto &MO : PacketMI->operands()) { + if (!MO.isReg() || !MO.isDef() || !MO.isImplicit()) + continue; + unsigned R = MO.getReg(); + if (R == DepReg || HRI->isSuperRegister(DepReg, R)) + return false; + } + + // Handle imp-use of super reg case. There is a target independent side + // change that should prevent this situation but I am handling it for + // just-in-case. For example, we cannot newify R2 in the following case: + // %R3<def> = A2_tfrsi 0; + // S2_storeri_io %R0<kill>, 0, %R2<kill>, %D1<imp-use,kill>; + for (auto &MO : MI->operands()) { + if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.getReg() == DepReg) + return false; + } + // Can be dot new store. return true; } -// can this MI to promoted to either -// new value store or new value jump -bool HexagonPacketizerList::CanPromoteToNewValue( - MachineInstr *MI, SUnit *PacketSU, unsigned DepReg, - const std::map<MachineInstr *, SUnit *> &MIToSUnit, - MachineBasicBlock::iterator &MII) { - - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - if (!QII->mayBeNewStore(MI)) +// Can this MI to promoted to either new value store or new value jump. +bool HexagonPacketizerList::canPromoteToNewValue(const MachineInstr *MI, + const SUnit *PacketSU, unsigned DepReg, + MachineBasicBlock::iterator &MII) { + if (!HII->mayBeNewStore(MI)) return false; - MachineInstr *PacketMI = PacketSU->getInstr(); - // Check to see the store can be new value'ed. - if (CanPromoteToNewValueStore(MI, PacketMI, DepReg, MIToSUnit)) + MachineInstr *PacketMI = PacketSU->getInstr(); + if (canPromoteToNewValueStore(MI, PacketMI, DepReg)) return true; // Check to see the compare/jump can be new value'ed. @@ -731,93 +704,110 @@ bool HexagonPacketizerList::CanPromoteToNewValue( return false; } +static bool isImplicitDependency(const MachineInstr *I, unsigned DepReg) { + for (auto &MO : I->operands()) + if (MO.isReg() && MO.isDef() && (MO.getReg() == DepReg) && MO.isImplicit()) + return true; + return false; +} + // Check to see if an instruction can be dot new // There are three kinds. // 1. dot new on predicate - V2/V3/V4 // 2. dot new on stores NV/ST - V4 // 3. dot new on jump NV/J - V4 -- This is generated in a pass. -bool HexagonPacketizerList::CanPromoteToDotNew( - MachineInstr *MI, SUnit *PacketSU, unsigned DepReg, - const std::map<MachineInstr *, SUnit *> &MIToSUnit, - MachineBasicBlock::iterator &MII, const TargetRegisterClass *RC) { - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; +bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr *MI, + const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII, + const TargetRegisterClass* RC) { // Already a dot new instruction. - if (QII->isDotNewInst(MI) && !QII->mayBeNewStore(MI)) + if (HII->isDotNewInst(MI) && !HII->mayBeNewStore(MI)) return false; if (!isNewifiable(MI)) return false; + const MachineInstr *PI = PacketSU->getInstr(); + + // The "new value" cannot come from inline asm. + if (PI->isInlineAsm()) + return false; + + // IMPLICIT_DEFs won't materialize as real instructions, so .new makes no + // sense. + if (PI->isImplicitDef()) + return false; + + // If dependency is trough an implicitly defined register, we should not + // newify the use. + if (isImplicitDependency(PI, DepReg)) + return false; + + const MCInstrDesc& MCID = PI->getDesc(); + const TargetRegisterClass *VecRC = HII->getRegClass(MCID, 0, HRI, MF); + if (DisableVecDblNVStores && VecRC == &Hexagon::VecDblRegsRegClass) + return false; + // predicate .new - if (RC == &Hexagon::PredRegsRegClass && isCondInst(MI)) - return true; - else if (RC != &Hexagon::PredRegsRegClass && - !QII->mayBeNewStore(MI)) // MI is not a new-value store + // bug 5670: until that is fixed + // TODO: MI->isIndirectBranch() and IsRegisterJump(MI) + if (RC == &Hexagon::PredRegsRegClass) + if (HII->isCondInst(MI) || MI->isReturn()) + return HII->predCanBeUsedAsDotNew(PI, DepReg); + + if (RC != &Hexagon::PredRegsRegClass && !HII->mayBeNewStore(MI)) + return false; + + // Create a dot new machine instruction to see if resources can be + // allocated. If not, bail out now. + int NewOpcode = HII->getDotNewOp(MI); + const MCInstrDesc &D = HII->get(NewOpcode); + MachineInstr *NewMI = MF.CreateMachineInstr(D, DebugLoc()); + bool ResourcesAvailable = ResourceTracker->canReserveResources(NewMI); + MF.DeleteMachineInstr(NewMI); + if (!ResourcesAvailable) + return false; + + // New Value Store only. New Value Jump generated as a separate pass. + if (!canPromoteToNewValue(MI, PacketSU, DepReg, MII)) return false; - else { - // Create a dot new machine instruction to see if resources can be - // allocated. If not, bail out now. - int NewOpcode = QII->GetDotNewOp(MI); - const MCInstrDesc &desc = QII->get(NewOpcode); - DebugLoc dl; - MachineInstr *NewMI = - MI->getParent()->getParent()->CreateMachineInstr(desc, dl); - bool ResourcesAvailable = ResourceTracker->canReserveResources(NewMI); - MI->getParent()->getParent()->DeleteMachineInstr(NewMI); - - if (!ResourcesAvailable) - return false; - // new value store only - // new new value jump generated as a passes - if (!CanPromoteToNewValue(MI, PacketSU, DepReg, MIToSUnit, MII)) { - return false; - } - } return true; } -// Go through the packet instructions and search for anti dependency -// between them and DepReg from MI -// Consider this case: +// Go through the packet instructions and search for an anti dependency between +// them and DepReg from MI. Consider this case: // Trying to add // a) %R1<def> = TFRI_cdNotPt %P3, 2 // to this packet: // { -// b) %P0<def> = OR_pp %P3<kill>, %P0<kill> -// c) %P3<def> = TFR_PdRs %R23 -// d) %R1<def> = TFRI_cdnPt %P3, 4 +// b) %P0<def> = C2_or %P3<kill>, %P0<kill> +// c) %P3<def> = C2_tfrrp %R23 +// d) %R1<def> = C2_cmovenewit %P3, 4 // } // The P3 from a) and d) will be complements after // a)'s P3 is converted to .new form -// Anti Dep between c) and b) is irrelevant for this case -bool HexagonPacketizerList::RestrictingDepExistInPacket( - MachineInstr *MI, unsigned DepReg, - const std::map<MachineInstr *, SUnit *> &MIToSUnit) { - - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; +// Anti-dep between c) and b) is irrelevant for this case +bool HexagonPacketizerList::restrictingDepExistInPacket(MachineInstr* MI, + unsigned DepReg) { SUnit *PacketSUDep = MIToSUnit.find(MI)->second; - for (std::vector<MachineInstr*>::iterator VIN = CurrentPacketMIs.begin(), - VEN = CurrentPacketMIs.end(); (VIN != VEN); ++VIN) { - + for (auto I : CurrentPacketMIs) { // We only care for dependencies to predicated instructions - if(!QII->isPredicated(*VIN)) continue; + if (!HII->isPredicated(I)) + continue; // Scheduling Unit for current insn in the packet - SUnit *PacketSU = MIToSUnit.find(*VIN)->second; + SUnit *PacketSU = MIToSUnit.find(I)->second; - // Look at dependencies between current members of the packet - // and predicate defining instruction MI. - // Make sure that dependency is on the exact register - // we care about. + // Look at dependencies between current members of the packet and + // predicate defining instruction MI. Make sure that dependency is + // on the exact register we care about. if (PacketSU->isSucc(PacketSUDep)) { for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) { - if ((PacketSU->Succs[i].getSUnit() == PacketSUDep) && - (PacketSU->Succs[i].getKind() == SDep::Anti) && - (PacketSU->Succs[i].getReg() == DepReg)) { + auto &Dep = PacketSU->Succs[i]; + if (Dep.getSUnit() == PacketSUDep && Dep.getKind() == SDep::Anti && + Dep.getReg() == DepReg) return true; - } } } } @@ -831,276 +821,362 @@ static unsigned getPredicatedRegister(MachineInstr *MI, const HexagonInstrInfo *QII) { /// We use the following rule: The first predicate register that is a use is /// the predicate register of a predicated instruction. - assert(QII->isPredicated(MI) && "Must be predicated instruction"); - for (MachineInstr::mop_iterator OI = MI->operands_begin(), - OE = MI->operands_end(); OI != OE; ++OI) { - MachineOperand &Op = *OI; + for (auto &Op : MI->operands()) { if (Op.isReg() && Op.getReg() && Op.isUse() && Hexagon::PredRegsRegClass.contains(Op.getReg())) return Op.getReg(); } llvm_unreachable("Unknown instruction operand layout"); - return 0; } // Given two predicated instructions, this function detects whether -// the predicates are complements -bool HexagonPacketizerList::ArePredicatesComplements( - MachineInstr *MI1, MachineInstr *MI2, - const std::map<MachineInstr *, SUnit *> &MIToSUnit) { - - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - +// the predicates are complements. +bool HexagonPacketizerList::arePredicatesComplements(MachineInstr *MI1, + MachineInstr *MI2) { // If we don't know the predicate sense of the instructions bail out early, we // need it later. - if (getPredicateSense(MI1, QII) == PK_Unknown || - getPredicateSense(MI2, QII) == PK_Unknown) + if (getPredicateSense(MI1, HII) == PK_Unknown || + getPredicateSense(MI2, HII) == PK_Unknown) return false; - // Scheduling unit for candidate - SUnit *SU = MIToSUnit.find(MI1)->second; + // Scheduling unit for candidate. + SUnit *SU = MIToSUnit[MI1]; // One corner case deals with the following scenario: // Trying to add - // a) %R24<def> = TFR_cPt %P0, %R25 + // a) %R24<def> = A2_tfrt %P0, %R25 // to this packet: - // // { - // b) %R25<def> = TFR_cNotPt %P0, %R24 - // c) %P0<def> = CMPEQri %R26, 1 + // b) %R25<def> = A2_tfrf %P0, %R24 + // c) %P0<def> = C2_cmpeqi %R26, 1 // } // - // On general check a) and b) are complements, but - // presence of c) will convert a) to .new form, and - // then it is not a complement - // We attempt to detect it by analyzing existing - // dependencies in the packet + // On general check a) and b) are complements, but presence of c) will + // convert a) to .new form, and then it is not a complement. + // We attempt to detect it by analyzing existing dependencies in the packet. // Analyze relationships between all existing members of the packet. - // Look for Anti dependecy on the same predicate reg - // as used in the candidate - for (std::vector<MachineInstr*>::iterator VIN = CurrentPacketMIs.begin(), - VEN = CurrentPacketMIs.end(); (VIN != VEN); ++VIN) { - - // Scheduling Unit for current insn in the packet - SUnit *PacketSU = MIToSUnit.find(*VIN)->second; + // Look for Anti dependecy on the same predicate reg as used in the + // candidate. + for (auto I : CurrentPacketMIs) { + // Scheduling Unit for current insn in the packet. + SUnit *PacketSU = MIToSUnit.find(I)->second; // If this instruction in the packet is succeeded by the candidate... if (PacketSU->isSucc(SU)) { for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) { - // The corner case exist when there is true data - // dependency between candidate and one of current - // packet members, this dep is on predicate reg, and - // there already exist anti dep on the same pred in + auto Dep = PacketSU->Succs[i]; + // The corner case exist when there is true data dependency between + // candidate and one of current packet members, this dep is on + // predicate reg, and there already exist anti dep on the same pred in // the packet. - if (PacketSU->Succs[i].getSUnit() == SU && - PacketSU->Succs[i].getKind() == SDep::Data && - Hexagon::PredRegsRegClass.contains( - PacketSU->Succs[i].getReg()) && - // Here I know that *VIN is predicate setting instruction - // with true data dep to candidate on the register - // we care about - c) in the above example. - // Now I need to see if there is an anti dependency - // from c) to any other instruction in the - // same packet on the pred reg of interest - RestrictingDepExistInPacket(*VIN,PacketSU->Succs[i].getReg(), - MIToSUnit)) { - return false; + if (Dep.getSUnit() == SU && Dep.getKind() == SDep::Data && + Hexagon::PredRegsRegClass.contains(Dep.getReg())) { + // Here I know that I is predicate setting instruction with true + // data dep to candidate on the register we care about - c) in the + // above example. Now I need to see if there is an anti dependency + // from c) to any other instruction in the same packet on the pred + // reg of interest. + if (restrictingDepExistInPacket(I, Dep.getReg())) + return false; } } } } - // If the above case does not apply, check regular - // complement condition. - // Check that the predicate register is the same and - // that the predicate sense is different - // We also need to differentiate .old vs. .new: - // !p0 is not complimentary to p0.new - unsigned PReg1 = getPredicatedRegister(MI1, QII); - unsigned PReg2 = getPredicatedRegister(MI2, QII); - return ((PReg1 == PReg2) && - Hexagon::PredRegsRegClass.contains(PReg1) && - Hexagon::PredRegsRegClass.contains(PReg2) && - (getPredicateSense(MI1, QII) != getPredicateSense(MI2, QII)) && - (QII->isDotNewInst(MI1) == QII->isDotNewInst(MI2))); + // If the above case does not apply, check regular complement condition. + // Check that the predicate register is the same and that the predicate + // sense is different We also need to differentiate .old vs. .new: !p0 + // is not complementary to p0.new. + unsigned PReg1 = getPredicatedRegister(MI1, HII); + unsigned PReg2 = getPredicatedRegister(MI2, HII); + return PReg1 == PReg2 && + Hexagon::PredRegsRegClass.contains(PReg1) && + Hexagon::PredRegsRegClass.contains(PReg2) && + getPredicateSense(MI1, HII) != getPredicateSense(MI2, HII) && + HII->isDotNewInst(MI1) == HII->isDotNewInst(MI2); } -// initPacketizerState - Initialize packetizer flags +// Initialize packetizer flags. void HexagonPacketizerList::initPacketizerState() { - Dependence = false; PromotedToDotNew = false; GlueToNewValueJump = false; GlueAllocframeStore = false; FoundSequentialDependence = false; - - return; } -// ignorePseudoInstruction - Ignore bundling of pseudo instructions. -bool HexagonPacketizerList::ignorePseudoInstruction(MachineInstr *MI, - MachineBasicBlock *MBB) { +// Ignore bundling of pseudo instructions. +bool HexagonPacketizerList::ignorePseudoInstruction(const MachineInstr *MI, + const MachineBasicBlock*) { if (MI->isDebugValue()) return true; if (MI->isCFIInstruction()) return false; - // We must print out inline assembly + // We must print out inline assembly. if (MI->isInlineAsm()) return false; - // We check if MI has any functional units mapped to it. - // If it doesn't, we ignore the instruction. + if (MI->isImplicitDef()) + return false; + + // We check if MI has any functional units mapped to it. If it doesn't, + // we ignore the instruction. const MCInstrDesc& TID = MI->getDesc(); - unsigned SchedClass = TID.getSchedClass(); - const InstrStage* IS = - ResourceTracker->getInstrItins()->beginStage(SchedClass); + auto *IS = ResourceTracker->getInstrItins()->beginStage(TID.getSchedClass()); unsigned FuncUnits = IS->getUnits(); return !FuncUnits; } -// isSoloInstruction: - Returns true for instructions that must be -// scheduled in their own packet. -bool HexagonPacketizerList::isSoloInstruction(MachineInstr *MI) { +bool HexagonPacketizerList::isSoloInstruction(const MachineInstr *MI) { if (MI->isEHLabel() || MI->isCFIInstruction()) return true; - if (MI->isInlineAsm()) + // Consider inline asm to not be a solo instruction by default. + // Inline asm will be put in a packet temporarily, but then it will be + // removed, and placed outside of the packet (before or after, depending + // on dependencies). This is to reduce the impact of inline asm as a + // "packet splitting" instruction. + if (MI->isInlineAsm() && !ScheduleInlineAsm) return true; // From Hexagon V4 Programmer's Reference Manual 3.4.4 Grouping constraints: // trap, pause, barrier, icinva, isync, and syncht are solo instructions. // They must not be grouped with other instructions in a packet. - if (IsSchedBarrier(MI)) + if (isSchedBarrier(MI)) + return true; + + if (HII->isSolo(MI)) + return true; + + if (MI->getOpcode() == Hexagon::A2_nop) return true; return false; } -// isLegalToPacketizeTogether: -// SUI is the current instruction that is out side of the current packet. -// SUJ is the current instruction inside the current packet against which that -// SUI will be packetized. -bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { - MachineInstr *I = SUI->getInstr(); - MachineInstr *J = SUJ->getInstr(); - assert(I && J && "Unable to packetize null instruction!"); - const MCInstrDesc &MCIDI = I->getDesc(); - const MCInstrDesc &MCIDJ = J->getDesc(); +// Quick check if instructions MI and MJ cannot coexist in the same packet. +// Limit the tests to be "one-way", e.g. "if MI->isBranch and MJ->isInlineAsm", +// but not the symmetric case: "if MJ->isBranch and MI->isInlineAsm". +// For full test call this function twice: +// cannotCoexistAsymm(MI, MJ) || cannotCoexistAsymm(MJ, MI) +// Doing the test only one way saves the amount of code in this function, +// since every test would need to be repeated with the MI and MJ reversed. +static bool cannotCoexistAsymm(const MachineInstr *MI, const MachineInstr *MJ, + const HexagonInstrInfo &HII) { + const MachineFunction *MF = MI->getParent()->getParent(); + if (MF->getSubtarget<HexagonSubtarget>().hasV60TOpsOnly() && + HII.isHVXMemWithAIndirect(MI, MJ)) + return true; - MachineBasicBlock::iterator II = I; + // An inline asm cannot be together with a branch, because we may not be + // able to remove the asm out after packetizing (i.e. if the asm must be + // moved past the bundle). Similarly, two asms cannot be together to avoid + // complications when determining their relative order outside of a bundle. + if (MI->isInlineAsm()) + return MJ->isInlineAsm() || MJ->isBranch() || MJ->isBarrier() || + MJ->isCall() || MJ->isTerminator(); - const unsigned FrameSize = MF.getFrameInfo()->getStackSize(); - const HexagonRegisterInfo *QRI = - (const HexagonRegisterInfo *)MF.getSubtarget().getRegisterInfo(); - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; + // "False" really means that the quick check failed to determine if + // I and J cannot coexist. + return false; +} - // Inline asm cannot go in the packet. - if (I->getOpcode() == Hexagon::INLINEASM) - llvm_unreachable("Should not meet inline asm here!"); - if (isSoloInstruction(I)) - llvm_unreachable("Should not meet solo instr here!"); +// Full, symmetric check. +bool HexagonPacketizerList::cannotCoexist(const MachineInstr *MI, + const MachineInstr *MJ) { + return cannotCoexistAsymm(MI, MJ, *HII) || cannotCoexistAsymm(MJ, MI, *HII); +} - // A save callee-save register function call can only be in a packet - // with instructions that don't write to the callee-save registers. - if ((QII->isSaveCalleeSavedRegsCall(I) && - DoesModifyCalleeSavedReg(J, QRI)) || - (QII->isSaveCalleeSavedRegsCall(J) && - DoesModifyCalleeSavedReg(I, QRI))) { - Dependence = true; - return false; +void HexagonPacketizerList::unpacketizeSoloInstrs(MachineFunction &MF) { + for (auto &B : MF) { + MachineBasicBlock::iterator BundleIt; + MachineBasicBlock::instr_iterator NextI; + for (auto I = B.instr_begin(), E = B.instr_end(); I != E; I = NextI) { + NextI = std::next(I); + MachineInstr *MI = &*I; + if (MI->isBundle()) + BundleIt = I; + if (!MI->isInsideBundle()) + continue; + + // Decide on where to insert the instruction that we are pulling out. + // Debug instructions always go before the bundle, but the placement of + // INLINE_ASM depends on potential dependencies. By default, try to + // put it before the bundle, but if the asm writes to a register that + // other instructions in the bundle read, then we need to place it + // after the bundle (to preserve the bundle semantics). + bool InsertBeforeBundle; + if (MI->isInlineAsm()) + InsertBeforeBundle = !hasWriteToReadDep(MI, BundleIt, HRI); + else if (MI->isDebugValue()) + InsertBeforeBundle = true; + else + continue; + + BundleIt = moveInstrOut(MI, BundleIt, InsertBeforeBundle); + } } +} - // Two control flow instructions cannot go in the same packet. - if (IsControlFlow(I) && IsControlFlow(J)) { - Dependence = true; - return false; +// Check if a given instruction is of class "system". +static bool isSystemInstr(const MachineInstr *MI) { + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case Hexagon::Y2_barrier: + case Hexagon::Y2_dcfetchbo: + return true; } + return false; +} - // A LoopN instruction cannot appear in the same packet as a jump or call. - if (IsLoopN(I) && - (IsDirectJump(J) || MCIDJ.isCall() || QII->isDeallocRet(J))) { - Dependence = true; +bool HexagonPacketizerList::hasDeadDependence(const MachineInstr *I, + const MachineInstr *J) { + // The dependence graph may not include edges between dead definitions, + // so without extra checks, we could end up packetizing two instruction + // defining the same (dead) register. + if (I->isCall() || J->isCall()) return false; - } - if (IsLoopN(J) && - (IsDirectJump(I) || MCIDI.isCall() || QII->isDeallocRet(I))) { - Dependence = true; + if (HII->isPredicated(I) || HII->isPredicated(J)) return false; + + BitVector DeadDefs(Hexagon::NUM_TARGET_REGS); + for (auto &MO : I->operands()) { + if (!MO.isReg() || !MO.isDef() || !MO.isDead()) + continue; + DeadDefs[MO.getReg()] = true; } + for (auto &MO : J->operands()) { + if (!MO.isReg() || !MO.isDef() || !MO.isDead()) + continue; + unsigned R = MO.getReg(); + if (R != Hexagon::USR_OVF && DeadDefs[R]) + return true; + } + return false; +} + +bool HexagonPacketizerList::hasControlDependence(const MachineInstr *I, + const MachineInstr *J) { + // A save callee-save register function call can only be in a packet + // with instructions that don't write to the callee-save registers. + if ((HII->isSaveCalleeSavedRegsCall(I) && + doesModifyCalleeSavedReg(J, HRI)) || + (HII->isSaveCalleeSavedRegsCall(J) && + doesModifyCalleeSavedReg(I, HRI))) + return true; + + // Two control flow instructions cannot go in the same packet. + if (isControlFlow(I) && isControlFlow(J)) + return true; + + // \ref-manual (7.3.4) A loop setup packet in loopN or spNloop0 cannot + // contain a speculative indirect jump, + // a new-value compare jump or a dealloc_return. + auto isBadForLoopN = [this] (const MachineInstr *MI) -> bool { + if (MI->isCall() || HII->isDeallocRet(MI) || HII->isNewValueJump(MI)) + return true; + if (HII->isPredicated(MI) && HII->isPredicatedNew(MI) && HII->isJumpR(MI)) + return true; + return false; + }; + + if (HII->isLoopN(I) && isBadForLoopN(J)) + return true; + if (HII->isLoopN(J) && isBadForLoopN(I)) + return true; + // dealloc_return cannot appear in the same packet as a conditional or // unconditional jump. - if (QII->isDeallocRet(I) && - (MCIDJ.isBranch() || MCIDJ.isCall() || MCIDJ.isBarrier())) { - Dependence = true; - return false; + return HII->isDeallocRet(I) && + (J->isBranch() || J->isCall() || J->isBarrier()); +} + +bool HexagonPacketizerList::hasV4SpecificDependence(const MachineInstr *I, + const MachineInstr *J) { + bool SysI = isSystemInstr(I), SysJ = isSystemInstr(J); + bool StoreI = I->mayStore(), StoreJ = J->mayStore(); + if ((SysI && StoreJ) || (SysJ && StoreI)) + return true; + + if (StoreI && StoreJ) { + if (HII->isNewValueInst(J) || HII->isMemOp(J) || HII->isMemOp(I)) + return true; + } else { + // A memop cannot be in the same packet with another memop or a store. + // Two stores can be together, but here I and J cannot both be stores. + bool MopStI = HII->isMemOp(I) || StoreI; + bool MopStJ = HII->isMemOp(J) || StoreJ; + if (MopStI && MopStJ) + return true; } + return (StoreJ && HII->isDeallocRet(I)) || (StoreI && HII->isDeallocRet(J)); +} - // V4 allows dual store. But does not allow second store, if the - // first store is not in SLOT0. New value store, new value jump, - // dealloc_return and memop always take SLOT0. - // Arch spec 3.4.4.2 - if (MCIDI.mayStore() && MCIDJ.mayStore() && - (QII->isNewValueInst(J) || QII->isMemOp(J) || QII->isMemOp(I))) { - Dependence = true; +// SUI is the current instruction that is out side of the current packet. +// SUJ is the current instruction inside the current packet against which that +// SUI will be packetized. +bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { + MachineInstr *I = SUI->getInstr(); + MachineInstr *J = SUJ->getInstr(); + assert(I && J && "Unable to packetize null instruction!"); + + // Clear IgnoreDepMIs when Packet starts. + if (CurrentPacketMIs.size() == 1) + IgnoreDepMIs.clear(); + + MachineBasicBlock::iterator II = I; + const unsigned FrameSize = MF.getFrameInfo()->getStackSize(); + + // Solo instructions cannot go in the packet. + assert(!isSoloInstruction(I) && "Unexpected solo instr!"); + + if (cannotCoexist(I, J)) return false; - } - if ((QII->isMemOp(J) && MCIDI.mayStore()) - || (MCIDJ.mayStore() && QII->isMemOp(I)) - || (QII->isMemOp(J) && QII->isMemOp(I))) { - Dependence = true; + Dependence = hasDeadDependence(I, J) || hasControlDependence(I, J); + if (Dependence) return false; - } - //if dealloc_return - if (MCIDJ.mayStore() && QII->isDeallocRet(I)) { - Dependence = true; + // V4 allows dual stores. It does not allow second store, if the first + // store is not in SLOT0. New value store, new value jump, dealloc_return + // and memop always take SLOT0. Arch spec 3.4.4.2. + Dependence = hasV4SpecificDependence(I, J); + if (Dependence) return false; - } // If an instruction feeds new value jump, glue it. MachineBasicBlock::iterator NextMII = I; ++NextMII; - if (NextMII != I->getParent()->end() && QII->isNewValueJump(NextMII)) { + if (NextMII != I->getParent()->end() && HII->isNewValueJump(NextMII)) { MachineInstr *NextMI = NextMII; bool secondRegMatch = false; - bool maintainNewValueJump = false; + const MachineOperand &NOp0 = NextMI->getOperand(0); + const MachineOperand &NOp1 = NextMI->getOperand(1); - if (NextMI->getOperand(1).isReg() && - I->getOperand(0).getReg() == NextMI->getOperand(1).getReg()) { + if (NOp1.isReg() && I->getOperand(0).getReg() == NOp1.getReg()) secondRegMatch = true; - maintainNewValueJump = true; - } - - if (!secondRegMatch && - I->getOperand(0).getReg() == NextMI->getOperand(0).getReg()) { - maintainNewValueJump = true; - } - for (std::vector<MachineInstr*>::iterator - VI = CurrentPacketMIs.begin(), - VE = CurrentPacketMIs.end(); - (VI != VE && maintainNewValueJump); ++VI) { - SUnit *PacketSU = MIToSUnit.find(*VI)->second; - - // NVJ can not be part of the dual jump - Arch Spec: section 7.8 - if (PacketSU->getInstr()->getDesc().isCall()) { + for (auto I : CurrentPacketMIs) { + SUnit *PacketSU = MIToSUnit.find(I)->second; + MachineInstr *PI = PacketSU->getInstr(); + // NVJ can not be part of the dual jump - Arch Spec: section 7.8. + if (PI->isCall()) { Dependence = true; break; } - // Validate + // Validate: // 1. Packet does not have a store in it. // 2. If the first operand of the nvj is newified, and the second // operand is also a reg, it (second reg) is not defined in @@ -1108,302 +1184,413 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { // 3. If the second operand of the nvj is newified, (which means // first operand is also a reg), first reg is not defined in // the same packet. - if (PacketSU->getInstr()->getDesc().mayStore() || - PacketSU->getInstr()->getOpcode() == Hexagon::S2_allocframe || - // Check #2. - (!secondRegMatch && NextMI->getOperand(1).isReg() && - PacketSU->getInstr()->modifiesRegister( - NextMI->getOperand(1).getReg(), QRI)) || - // Check #3. - (secondRegMatch && - PacketSU->getInstr()->modifiesRegister( - NextMI->getOperand(0).getReg(), QRI))) { + if (PI->getOpcode() == Hexagon::S2_allocframe || PI->mayStore() || + HII->isLoopN(PI)) { + Dependence = true; + break; + } + // Check #2/#3. + const MachineOperand &OpR = secondRegMatch ? NOp0 : NOp1; + if (OpR.isReg() && PI->modifiesRegister(OpR.getReg(), HRI)) { Dependence = true; break; } } - if (!Dependence) - GlueToNewValueJump = true; - else + + if (Dependence) return false; + GlueToNewValueJump = true; } - if (SUJ->isSucc(SUI)) { - for (unsigned i = 0; - (i < SUJ->Succs.size()) && !FoundSequentialDependence; - ++i) { + // There no dependency between a prolog instruction and its successor. + if (!SUJ->isSucc(SUI)) + return true; - if (SUJ->Succs[i].getSUnit() != SUI) { - continue; - } + for (unsigned i = 0; i < SUJ->Succs.size(); ++i) { + if (FoundSequentialDependence) + break; - SDep::Kind DepType = SUJ->Succs[i].getKind(); + if (SUJ->Succs[i].getSUnit() != SUI) + continue; - // For direct calls: - // Ignore register dependences for call instructions for - // packetization purposes except for those due to r31 and - // predicate registers. - // - // For indirect calls: - // Same as direct calls + check for true dependences to the register - // used in the indirect call. - // - // We completely ignore Order dependences for call instructions - // - // For returns: - // Ignore register dependences for return instructions like jumpr, - // dealloc return unless we have dependencies on the explicit uses - // of the registers used by jumpr (like r31) or dealloc return - // (like r29 or r30). - // - // TODO: Currently, jumpr is handling only return of r31. So, the - // following logic (specificaly IsCallDependent) is working fine. - // We need to enable jumpr for register other than r31 and then, - // we need to rework the last part, where it handles indirect call - // of that (IsCallDependent) function. Bug 6216 is opened for this. - // - unsigned DepReg = 0; - const TargetRegisterClass* RC = nullptr; - if (DepType == SDep::Data) { - DepReg = SUJ->Succs[i].getReg(); - RC = QRI->getMinimalPhysRegClass(DepReg); - } - if ((MCIDI.isCall() || MCIDI.isReturn()) && - (!IsRegDependence(DepType) || - !IsCallDependent(I, DepType, SUJ->Succs[i].getReg()))) { - /* do nothing */ - } + SDep::Kind DepType = SUJ->Succs[i].getKind(); + // For direct calls: + // Ignore register dependences for call instructions for packetization + // purposes except for those due to r31 and predicate registers. + // + // For indirect calls: + // Same as direct calls + check for true dependences to the register + // used in the indirect call. + // + // We completely ignore Order dependences for call instructions. + // + // For returns: + // Ignore register dependences for return instructions like jumpr, + // dealloc return unless we have dependencies on the explicit uses + // of the registers used by jumpr (like r31) or dealloc return + // (like r29 or r30). + // + // TODO: Currently, jumpr is handling only return of r31. So, the + // following logic (specificaly isCallDependent) is working fine. + // We need to enable jumpr for register other than r31 and then, + // we need to rework the last part, where it handles indirect call + // of that (isCallDependent) function. Bug 6216 is opened for this. + unsigned DepReg = 0; + const TargetRegisterClass *RC = nullptr; + if (DepType == SDep::Data) { + DepReg = SUJ->Succs[i].getReg(); + RC = HRI->getMinimalPhysRegClass(DepReg); + } - // For instructions that can be promoted to dot-new, try to promote. - else if ((DepType == SDep::Data) && - CanPromoteToDotNew(I, SUJ, DepReg, MIToSUnit, II, RC) && - PromoteToDotNew(I, DepType, II, RC)) { - PromotedToDotNew = true; - /* do nothing */ - } + if (I->isCall() || I->isReturn()) { + if (!isRegDependence(DepType)) + continue; + if (!isCallDependent(I, DepType, SUJ->Succs[i].getReg())) + continue; + } - else if ((DepType == SDep::Data) && - (QII->isNewValueJump(I))) { - /* do nothing */ - } + if (DepType == SDep::Data) { + if (canPromoteToDotCur(J, SUJ, DepReg, II, RC)) + if (promoteToDotCur(J, DepType, II, RC)) + continue; + } - // For predicated instructions, if the predicates are complements - // then there can be no dependence. - else if (QII->isPredicated(I) && - QII->isPredicated(J) && - ArePredicatesComplements(I, J, MIToSUnit)) { - /* do nothing */ + // Data dpendence ok if we have load.cur. + if (DepType == SDep::Data && HII->isDotCurInst(J)) { + if (HII->isV60VectorInstruction(I)) + continue; + } + // For instructions that can be promoted to dot-new, try to promote. + if (DepType == SDep::Data) { + if (canPromoteToDotNew(I, SUJ, DepReg, II, RC)) { + if (promoteToDotNew(I, DepType, II, RC)) { + PromotedToDotNew = true; + continue; + } } - else if (IsDirectJump(I) && - !MCIDJ.isBranch() && - !MCIDJ.isCall() && - (DepType == SDep::Order)) { - // Ignore Order dependences between unconditional direct branches - // and non-control-flow instructions - /* do nothing */ - } - else if (MCIDI.isConditionalBranch() && (DepType != SDep::Data) && - (DepType != SDep::Output)) { - // Ignore all dependences for jumps except for true and output - // dependences - /* do nothing */ - } - - // Ignore output dependences due to superregs. We can - // write to two different subregisters of R1:0 for instance - // in the same cycle - // + if (HII->isNewValueJump(I)) + continue; + } + // For predicated instructions, if the predicates are complements then + // there can be no dependence. + if (HII->isPredicated(I) && HII->isPredicated(J) && + arePredicatesComplements(I, J)) { + // Not always safe to do this translation. + // DAG Builder attempts to reduce dependence edges using transitive + // nature of dependencies. Here is an example: // - // Let the - // If neither I nor J defines DepReg, then this is a - // superfluous output dependence. The dependence must be of the - // form: - // R0 = ... - // R1 = ... - // and there is an output dependence between the two instructions - // with - // DepReg = D0 - // We want to ignore these dependences. - // Ideally, the dependence constructor should annotate such - // dependences. We can then avoid this relatively expensive check. + // r0 = tfr_pt ... (1) + // r0 = tfr_pf ... (2) + // r0 = tfr_pt ... (3) // - else if (DepType == SDep::Output) { - // DepReg is the register that's responsible for the dependence. - unsigned DepReg = SUJ->Succs[i].getReg(); + // There will be an output dependence between (1)->(2) and (2)->(3). + // However, there is no dependence edge between (1)->(3). This results + // in all 3 instructions going in the same packet. We ignore dependce + // only once to avoid this situation. + auto Itr = std::find(IgnoreDepMIs.begin(), IgnoreDepMIs.end(), J); + if (Itr != IgnoreDepMIs.end()) { + Dependence = true; + return false; + } + IgnoreDepMIs.push_back(I); + continue; + } + + // Ignore Order dependences between unconditional direct branches + // and non-control-flow instructions. + if (isDirectJump(I) && !J->isBranch() && !J->isCall() && + DepType == SDep::Order) + continue; + + // Ignore all dependences for jumps except for true and output + // dependences. + if (I->isConditionalBranch() && DepType != SDep::Data && + DepType != SDep::Output) + continue; + + // Ignore output dependences due to superregs. We can write to two + // different subregisters of R1:0 for instance in the same cycle. + + // If neither I nor J defines DepReg, then this is a superfluous output + // dependence. The dependence must be of the form: + // R0 = ... + // R1 = ... + // and there is an output dependence between the two instructions with + // DepReg = D0. + // We want to ignore these dependences. Ideally, the dependence + // constructor should annotate such dependences. We can then avoid this + // relatively expensive check. + // + if (DepType == SDep::Output) { + // DepReg is the register that's responsible for the dependence. + unsigned DepReg = SUJ->Succs[i].getReg(); + + // Check if I and J really defines DepReg. + if (!I->definesRegister(DepReg) && !J->definesRegister(DepReg)) + continue; + FoundSequentialDependence = true; + break; + } - // Check if I and J really defines DepReg. - if (I->definesRegister(DepReg) || - J->definesRegister(DepReg)) { + // For Order dependences: + // 1. On V4 or later, volatile loads/stores can be packetized together, + // unless other rules prevent is. + // 2. Store followed by a load is not allowed. + // 3. Store followed by a store is only valid on V4 or later. + // 4. Load followed by any memory operation is allowed. + if (DepType == SDep::Order) { + if (!PacketizeVolatiles) { + bool OrdRefs = I->hasOrderedMemoryRef() || J->hasOrderedMemoryRef(); + if (OrdRefs) { FoundSequentialDependence = true; break; } } - - // We ignore Order dependences for - // 1. Two loads unless they are volatile. - // 2. Two stores in V4 unless they are volatile. - else if ((DepType == SDep::Order) && - !I->hasOrderedMemoryRef() && - !J->hasOrderedMemoryRef()) { - if (MCIDI.mayStore() && MCIDJ.mayStore()) { - /* do nothing */ - } - // store followed by store-- not OK on V2 - // store followed by load -- not OK on all (OK if addresses - // are not aliased) - // load followed by store -- OK on all - // load followed by load -- OK on all - else if ( !MCIDJ.mayStore()) { - /* do nothing */ - } - else { + // J is first, I is second. + bool LoadJ = J->mayLoad(), StoreJ = J->mayStore(); + bool LoadI = I->mayLoad(), StoreI = I->mayStore(); + if (StoreJ) { + // Two stores are only allowed on V4+. Load following store is never + // allowed. + if (LoadI) { FoundSequentialDependence = true; break; } - } - - // For V4, special case ALLOCFRAME. Even though there is dependency - // between ALLOCFRAME and subsequent store, allow it to be - // packetized in a same packet. This implies that the store is using - // caller's SP. Hence, offset needs to be updated accordingly. - else if (DepType == SDep::Data - && J->getOpcode() == Hexagon::S2_allocframe - && (I->getOpcode() == Hexagon::S2_storerd_io - || I->getOpcode() == Hexagon::S2_storeri_io - || I->getOpcode() == Hexagon::S2_storerb_io) - && I->getOperand(0).getReg() == QRI->getStackRegister() - && QII->isValidOffset(I->getOpcode(), - I->getOperand(1).getImm() - - (FrameSize + HEXAGON_LRFP_SIZE))) - { - GlueAllocframeStore = true; - // Since this store is to be glued with allocframe in the same - // packet, it will use SP of the previous stack frame, i.e - // caller's SP. Therefore, we need to recalculate offset according - // to this change. - I->getOperand(1).setImm(I->getOperand(1).getImm() - - (FrameSize + HEXAGON_LRFP_SIZE)); - } - - // - // Skip over anti-dependences. Two instructions that are - // anti-dependent can share a packet - // - else if (DepType != SDep::Anti) { + } else if (!LoadJ || (!LoadI && !StoreI)) { + // If J is neither load nor store, assume a dependency. + // If J is a load, but I is neither, also assume a dependency. FoundSequentialDependence = true; break; } + // Store followed by store: not OK on V2. + // Store followed by load: not OK on all. + // Load followed by store: OK on all. + // Load followed by load: OK on all. + continue; } - if (FoundSequentialDependence) { - Dependence = true; - return false; + // For V4, special case ALLOCFRAME. Even though there is dependency + // between ALLOCFRAME and subsequent store, allow it to be packetized + // in a same packet. This implies that the store is using the caller's + // SP. Hence, offset needs to be updated accordingly. + if (DepType == SDep::Data && J->getOpcode() == Hexagon::S2_allocframe) { + unsigned Opc = I->getOpcode(); + switch (Opc) { + case Hexagon::S2_storerd_io: + case Hexagon::S2_storeri_io: + case Hexagon::S2_storerh_io: + case Hexagon::S2_storerb_io: + if (I->getOperand(0).getReg() == HRI->getStackRegister()) { + int64_t Imm = I->getOperand(1).getImm(); + int64_t NewOff = Imm - (FrameSize + HEXAGON_LRFP_SIZE); + if (HII->isValidOffset(Opc, NewOff)) { + GlueAllocframeStore = true; + // Since this store is to be glued with allocframe in the same + // packet, it will use SP of the previous stack frame, i.e. + // caller's SP. Therefore, we need to recalculate offset + // according to this change. + I->getOperand(1).setImm(NewOff); + continue; + } + } + default: + break; + } + } + + // Skip over anti-dependences. Two instructions that are anti-dependent + // can share a packet. + if (DepType != SDep::Anti) { + FoundSequentialDependence = true; + break; } } + if (FoundSequentialDependence) { + Dependence = true; + return false; + } + return true; } -// isLegalToPruneDependencies bool HexagonPacketizerList::isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) { MachineInstr *I = SUI->getInstr(); - assert(I && SUJ->getInstr() && "Unable to packetize null instruction!"); - - const unsigned FrameSize = MF.getFrameInfo()->getStackSize(); - - if (Dependence) { + MachineInstr *J = SUJ->getInstr(); + assert(I && J && "Unable to packetize null instruction!"); - // Check if the instruction was promoted to a dot-new. If so, demote it - // back into a dot-old. - if (PromotedToDotNew) { - DemoteToDotOld(I); - } + if (cannotCoexist(I, J)) + return false; - // Check if the instruction (must be a store) was glued with an Allocframe - // instruction. If so, restore its offset to its original value, i.e. use - // curent SP instead of caller's SP. - if (GlueAllocframeStore) { - I->getOperand(1).setImm(I->getOperand(1).getImm() + - FrameSize + HEXAGON_LRFP_SIZE); - } + if (!Dependence) + return true; - return false; + // Check if the instruction was promoted to a dot-new. If so, demote it + // back into a dot-old. + if (PromotedToDotNew) + demoteToDotOld(I); + + cleanUpDotCur(); + // Check if the instruction (must be a store) was glued with an allocframe + // instruction. If so, restore its offset to its original value, i.e. use + // current SP instead of caller's SP. + if (GlueAllocframeStore) { + unsigned FrameSize = MF.getFrameInfo()->getStackSize(); + MachineOperand &MOff = I->getOperand(1); + MOff.setImm(MOff.getImm() + FrameSize + HEXAGON_LRFP_SIZE); } - return true; + return false; } + MachineBasicBlock::iterator HexagonPacketizerList::addToPacket(MachineInstr *MI) { + MachineBasicBlock::iterator MII = MI; + MachineBasicBlock *MBB = MI->getParent(); + if (MI->isImplicitDef()) { + unsigned R = MI->getOperand(0).getReg(); + if (Hexagon::IntRegsRegClass.contains(R)) { + MCSuperRegIterator S(R, HRI, false); + MI->addOperand(MachineOperand::CreateReg(*S, true, true)); + } + return MII; + } + assert(ResourceTracker->canReserveResources(MI)); + + bool ExtMI = HII->isExtended(MI) || HII->isConstExtended(MI); + bool Good = true; + + if (GlueToNewValueJump) { + MachineInstr *NvjMI = ++MII; + // We need to put both instructions in the same packet: MI and NvjMI. + // Either of them can require a constant extender. Try to add both to + // the current packet, and if that fails, end the packet and start a + // new one. + ResourceTracker->reserveResources(MI); + if (ExtMI) + Good = tryAllocateResourcesForConstExt(true); + + bool ExtNvjMI = HII->isExtended(NvjMI) || HII->isConstExtended(NvjMI); + if (Good) { + if (ResourceTracker->canReserveResources(NvjMI)) + ResourceTracker->reserveResources(NvjMI); + else + Good = false; + } + if (Good && ExtNvjMI) + Good = tryAllocateResourcesForConstExt(true); - MachineBasicBlock::iterator MII = MI; - MachineBasicBlock *MBB = MI->getParent(); - - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - - if (GlueToNewValueJump) { - - ++MII; - MachineInstr *nvjMI = MII; + if (!Good) { + endPacket(MBB, MI); assert(ResourceTracker->canReserveResources(MI)); ResourceTracker->reserveResources(MI); - if ((QII->isExtended(MI) || QII->isConstExtended(MI)) && - !tryAllocateResourcesForConstExt(MI)) { - endPacket(MBB, MI); - ResourceTracker->reserveResources(MI); - assert(canReserveResourcesForConstExt(MI) && - "Ensure that there is a slot"); - reserveResourcesForConstExt(MI); - // Reserve resources for new value jump constant extender. - assert(canReserveResourcesForConstExt(MI) && - "Ensure that there is a slot"); - reserveResourcesForConstExt(nvjMI); - assert(ResourceTracker->canReserveResources(nvjMI) && - "Ensure that there is a slot"); - - } else if ( // Extended instruction takes two slots in the packet. - // Try reserve and allocate 4-byte in the current packet first. - (QII->isExtended(nvjMI) - && (!tryAllocateResourcesForConstExt(nvjMI) - || !ResourceTracker->canReserveResources(nvjMI))) - || // For non-extended instruction, no need to allocate extra 4 bytes. - (!QII->isExtended(nvjMI) && - !ResourceTracker->canReserveResources(nvjMI))) - { - endPacket(MBB, MI); - // A new and empty packet starts. - // We are sure that the resources requirements can be satisfied. - // Therefore, do not need to call "canReserveResources" anymore. - ResourceTracker->reserveResources(MI); - if (QII->isExtended(nvjMI)) - reserveResourcesForConstExt(nvjMI); + if (ExtMI) { + assert(canReserveResourcesForConstExt()); + tryAllocateResourcesForConstExt(true); } - // Here, we are sure that "reserveResources" would succeed. - ResourceTracker->reserveResources(nvjMI); - CurrentPacketMIs.push_back(MI); - CurrentPacketMIs.push_back(nvjMI); - } else { - if ( (QII->isExtended(MI) || QII->isConstExtended(MI)) - && ( !tryAllocateResourcesForConstExt(MI) - || !ResourceTracker->canReserveResources(MI))) - { - endPacket(MBB, MI); - // Check if the instruction was promoted to a dot-new. If so, demote it - // back into a dot-old - if (PromotedToDotNew) { - DemoteToDotOld(MI); - } - reserveResourcesForConstExt(MI); + assert(ResourceTracker->canReserveResources(NvjMI)); + ResourceTracker->reserveResources(NvjMI); + if (ExtNvjMI) { + assert(canReserveResourcesForConstExt()); + reserveResourcesForConstExt(); } - // In case that "MI" is not an extended insn, - // the resource availability has already been checked. - ResourceTracker->reserveResources(MI); - CurrentPacketMIs.push_back(MI); } + CurrentPacketMIs.push_back(MI); + CurrentPacketMIs.push_back(NvjMI); return MII; + } + + ResourceTracker->reserveResources(MI); + if (ExtMI && !tryAllocateResourcesForConstExt(true)) { + endPacket(MBB, MI); + if (PromotedToDotNew) + demoteToDotOld(MI); + ResourceTracker->reserveResources(MI); + reserveResourcesForConstExt(); + } + + CurrentPacketMIs.push_back(MI); + return MII; +} + +void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB, + MachineInstr *MI) { + OldPacketMIs = CurrentPacketMIs; + VLIWPacketizerList::endPacket(MBB, MI); +} + +bool HexagonPacketizerList::shouldAddToPacket(const MachineInstr *MI) { + return !producesStall(MI); +} + + +// Return true when ConsMI uses a register defined by ProdMI. +static bool isDependent(const MachineInstr *ProdMI, + const MachineInstr *ConsMI) { + if (!ProdMI->getOperand(0).isReg()) + return false; + unsigned DstReg = ProdMI->getOperand(0).getReg(); + + for (auto &Op : ConsMI->operands()) + if (Op.isReg() && Op.isUse() && Op.getReg() == DstReg) + // The MIs depend on each other. + return true; + + return false; } +// V60 forward scheduling. +bool HexagonPacketizerList::producesStall(const MachineInstr *I) { + // Check whether the previous packet is in a different loop. If this is the + // case, there is little point in trying to avoid a stall because that would + // favor the rare case (loop entry) over the common case (loop iteration). + // + // TODO: We should really be able to check all the incoming edges if this is + // the first packet in a basic block, so we can avoid stalls from the loop + // backedge. + if (!OldPacketMIs.empty()) { + auto *OldBB = OldPacketMIs.front()->getParent(); + auto *ThisBB = I->getParent(); + if (MLI->getLoopFor(OldBB) != MLI->getLoopFor(ThisBB)) + return false; + } + + // Check for stall between two vector instructions. + if (HII->isV60VectorInstruction(I)) { + for (auto J : OldPacketMIs) { + if (!HII->isV60VectorInstruction(J)) + continue; + if (isDependent(J, I) && !HII->isVecUsableNextPacket(J, I)) + return true; + } + return false; + } + + // Check for stall between two scalar instructions. First, check that + // there is no definition of a use in the current packet, because it + // may be a candidate for .new. + for (auto J : CurrentPacketMIs) + if (!HII->isV60VectorInstruction(J) && isDependent(J, I)) + return false; + + // Check for stall between I and instructions in the previous packet. + if (MF.getSubtarget<HexagonSubtarget>().useBSBScheduling()) { + for (auto J : OldPacketMIs) { + if (HII->isV60VectorInstruction(J)) + continue; + if (!HII->isLateInstrFeedsEarlyInstr(J, I)) + continue; + if (isDependent(J, I) && !HII->canExecuteInBundle(J, I)) + return true; + } + } + + return false; +} + + //===----------------------------------------------------------------------===// // Public Constructor Functions //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h new file mode 100644 index 000000000000..960cf6ca5bbc --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h @@ -0,0 +1,114 @@ +#ifndef HEXAGONVLIWPACKETIZER_H +#define HEXAGONVLIWPACKETIZER_H + +#include "llvm/CodeGen/DFAPacketizer.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" + +namespace llvm { +class HexagonPacketizerList : public VLIWPacketizerList { + // Vector of instructions assigned to the packet that has just been created. + std::vector<MachineInstr*> OldPacketMIs; + + // Has the instruction been promoted to a dot-new instruction. + bool PromotedToDotNew; + + // Has the instruction been glued to allocframe. + bool GlueAllocframeStore; + + // Has the feeder instruction been glued to new value jump. + bool GlueToNewValueJump; + + // Check if there is a dependence between some instruction already in this + // packet and this instruction. + bool Dependence; + + // Only check for dependence if there are resources available to + // schedule this instruction. + bool FoundSequentialDependence; + + // Track MIs with ignored dependence. + std::vector<MachineInstr*> IgnoreDepMIs; + +protected: + /// \brief A handle to the branch probability pass. + const MachineBranchProbabilityInfo *MBPI; + const MachineLoopInfo *MLI; + +private: + const HexagonInstrInfo *HII; + const HexagonRegisterInfo *HRI; + +public: + // Ctor. + HexagonPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI, + AliasAnalysis *AA, + const MachineBranchProbabilityInfo *MBPI); + + // initPacketizerState - initialize some internal flags. + void initPacketizerState() override; + + // ignorePseudoInstruction - Ignore bundling of pseudo instructions. + bool ignorePseudoInstruction(const MachineInstr *MI, + const MachineBasicBlock *MBB) override; + + // isSoloInstruction - return true if instruction MI can not be packetized + // with any other instruction, which means that MI itself is a packet. + bool isSoloInstruction(const MachineInstr *MI) override; + + // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ + // together. + bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override; + + // isLegalToPruneDependencies - Is it legal to prune dependece between SUI + // and SUJ. + bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override; + + MachineBasicBlock::iterator addToPacket(MachineInstr *MI) override; + void endPacket(MachineBasicBlock *MBB, MachineInstr *MI) override; + bool shouldAddToPacket(const MachineInstr *MI) override; + + void unpacketizeSoloInstrs(MachineFunction &MF); + +protected: + bool isCallDependent(const MachineInstr* MI, SDep::Kind DepType, + unsigned DepReg); + bool promoteToDotCur(MachineInstr* MI, SDep::Kind DepType, + MachineBasicBlock::iterator &MII, + const TargetRegisterClass* RC); + bool canPromoteToDotCur(const MachineInstr* MI, const SUnit* PacketSU, + unsigned DepReg, MachineBasicBlock::iterator &MII, + const TargetRegisterClass* RC); + void cleanUpDotCur(); + + bool promoteToDotNew(MachineInstr* MI, SDep::Kind DepType, + MachineBasicBlock::iterator &MII, + const TargetRegisterClass* RC); + bool canPromoteToDotNew(const MachineInstr* MI, const SUnit* PacketSU, + unsigned DepReg, MachineBasicBlock::iterator &MII, + const TargetRegisterClass* RC); + bool canPromoteToNewValue(const MachineInstr* MI, const SUnit* PacketSU, + unsigned DepReg, MachineBasicBlock::iterator &MII); + bool canPromoteToNewValueStore(const MachineInstr* MI, + const MachineInstr* PacketMI, unsigned DepReg); + bool demoteToDotOld(MachineInstr* MI); + bool arePredicatesComplements(MachineInstr* MI1, MachineInstr* MI2); + bool restrictingDepExistInPacket(MachineInstr*, unsigned); + bool isNewifiable(const MachineInstr *MI); + bool isCurifiable(MachineInstr* MI); + bool cannotCoexist(const MachineInstr *MI, const MachineInstr *MJ); + inline bool isPromotedToDotNew() const { + return PromotedToDotNew; + } + bool tryAllocateResourcesForConstExt(bool Reserve); + bool canReserveResourcesForConstExt(); + void reserveResourcesForConstExt(); + bool hasDeadDependence(const MachineInstr *I, const MachineInstr *J); + bool hasControlDependence(const MachineInstr *I, const MachineInstr *J); + bool hasV4SpecificDependence(const MachineInstr *I, const MachineInstr *J); + bool producesStall(const MachineInstr *MI); +}; +} // namespace llvm +#endif // HEXAGONVLIWPACKETIZER_H + diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp index 99ea2fabf867..b73af8249cb5 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp @@ -13,7 +13,9 @@ #include "MCTargetDesc/HexagonBaseInfo.h" #include "MCTargetDesc/HexagonMCInstrInfo.h" #include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCInstrInfo.h" @@ -33,14 +35,28 @@ class HexagonAsmBackend : public MCAsmBackend { mutable uint64_t relaxedCnt; std::unique_ptr <MCInstrInfo> MCII; std::unique_ptr <MCInst *> RelaxTarget; + MCInst * Extender; public: HexagonAsmBackend(Target const &T, uint8_t OSABI, StringRef CPU) : - OSABI(OSABI), MCII (T.createMCInstrInfo()), RelaxTarget(new MCInst *){} + OSABI(OSABI), MCII (T.createMCInstrInfo()), RelaxTarget(new MCInst *), + Extender(nullptr) {} MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { return createHexagonELFObjectWriter(OS, OSABI, CPU); } + void setExtender(MCContext &Context) const { + if (Extender == nullptr) + const_cast<HexagonAsmBackend *>(this)->Extender = new (Context) MCInst; + } + + MCInst *takeExtender() const { + assert(Extender != nullptr); + MCInst * Result = Extender; + const_cast<HexagonAsmBackend *>(this)->Extender = nullptr; + return Result; + } + unsigned getNumFixupKinds() const override { return Hexagon::NumTargetFixupKinds; } @@ -222,6 +238,7 @@ public: if (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_SIZE) { ++relaxedCnt; *RelaxTarget = &MCI; + setExtender(Layout.getAssembler().getContext()); return true; } else { return false; @@ -262,6 +279,7 @@ public: if (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_SIZE) { ++relaxedCnt; *RelaxTarget = &MCI; + setExtender(Layout.getAssembler().getContext()); return true; } } @@ -276,9 +294,35 @@ public: llvm_unreachable("Handled by fixupNeedsRelaxationAdvanced"); } - void relaxInstruction(MCInst const & /*Inst*/, - MCInst & /*Res*/) const override { - llvm_unreachable("relaxInstruction() unimplemented"); + void relaxInstruction(MCInst const & Inst, + MCInst & Res) const override { + assert(HexagonMCInstrInfo::isBundle(Inst) && + "Hexagon relaxInstruction only works on bundles"); + + Res = HexagonMCInstrInfo::createBundle(); + // Copy the results into the bundle. + bool Update = false; + for (auto &I : HexagonMCInstrInfo::bundleInstructions(Inst)) { + MCInst &CrntHMI = const_cast<MCInst &>(*I.getInst()); + + // if immediate extender needed, add it in + if (*RelaxTarget == &CrntHMI) { + Update = true; + assert((HexagonMCInstrInfo::bundleSize(Res) < HEXAGON_PACKET_SIZE) && + "No room to insert extender for relaxation"); + + MCInst *HMIx = takeExtender(); + *HMIx = HexagonMCInstrInfo::deriveExtender( + *MCII, CrntHMI, + HexagonMCInstrInfo::getExtendableOperand(*MCII, CrntHMI)); + Res.addOperand(MCOperand::createInst(HMIx)); + *RelaxTarget = nullptr; + } + // now copy over the original instruction(the one we may have extended) + Res.addOperand(MCOperand::createInst(I.getInst())); + } + (void)Update; + assert(Update && "Didn't find relaxation target"); } bool writeNopData(uint64_t Count, diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h index f4d162ccf6a8..47a6f8636276 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h @@ -44,6 +44,25 @@ namespace HexagonII { TypeMEMOP = 9, TypeNV = 10, TypeDUPLEX = 11, + TypeCOMPOUND = 12, + TypeCVI_FIRST = 13, + TypeCVI_VA = TypeCVI_FIRST, + TypeCVI_VA_DV = 14, + TypeCVI_VX = 15, + TypeCVI_VX_DV = 16, + TypeCVI_VP = 17, + TypeCVI_VP_VS = 18, + TypeCVI_VS = 19, + TypeCVI_VINLANESAT= 20, + TypeCVI_VM_LD = 21, + TypeCVI_VM_TMP_LD = 22, + TypeCVI_VM_CUR_LD = 23, + TypeCVI_VM_VP_LDU = 24, + TypeCVI_VM_ST = 25, + TypeCVI_VM_NEW_ST = 26, + TypeCVI_VM_STU = 27, + TypeCVI_HIST = 28, + TypeCVI_LAST = TypeCVI_HIST, TypePREFIX = 30, // Such as extenders. TypeENDLOOP = 31 // Such as end of a HW loop. }; @@ -71,12 +90,16 @@ namespace HexagonII { PostInc = 6 // Post increment addressing mode }; + // MemAccessSize is represented as 1+log2(N) where N is size in bits. enum class MemAccessSize { NoMemAccess = 0, // Not a memory acces instruction. ByteAccess = 1, // Byte access instruction (memb). HalfWordAccess = 2, // Half word access instruction (memh). WordAccess = 3, // Word access instruction (memw). - DoubleWordAccess = 4 // Double word access instruction (memd) + DoubleWordAccess = 4, // Double word access instruction (memd) + // 5, // We do not have a 16 byte vector access. + Vector64Access = 7, // 64 Byte vector access instruction (vmem). + Vector128Access = 8 // 128 Byte vector access instruction (vmem). }; // MCInstrDesc TSFlags @@ -156,7 +179,7 @@ namespace HexagonII { AddrModeMask = 0x7, // Access size for load/store instructions. MemAccessSizePos = 43, - MemAccesSizeMask = 0x7, + MemAccesSizeMask = 0xf, // Branch predicted taken. TakenPos = 47, @@ -164,7 +187,23 @@ namespace HexagonII { // Floating-point instructions. FPPos = 48, - FPMask = 0x1 + FPMask = 0x1, + + // New-Value producer-2 instructions. + hasNewValuePos2 = 50, + hasNewValueMask2 = 0x1, + + // Which operand consumes or produces a new value. + NewValueOpPos2 = 51, + NewValueOpMask2 = 0x7, + + // Accumulator instructions. + AccumulatorPos = 54, + AccumulatorMask = 0x1, + + // Complex XU, prevent xu competition by prefering slot3 + PrefersSlot3Pos = 55, + PrefersSlot3Mask = 0x1, }; // *** The code above must match HexagonInstrFormat*.td *** // @@ -219,6 +258,26 @@ namespace HexagonII { INST_PARSE_EXTENDER = 0x00000000 }; + enum InstIClassBits : unsigned { + INST_ICLASS_MASK = 0xf0000000, + INST_ICLASS_EXTENDER = 0x00000000, + INST_ICLASS_J_1 = 0x10000000, + INST_ICLASS_J_2 = 0x20000000, + INST_ICLASS_LD_ST_1 = 0x30000000, + INST_ICLASS_LD_ST_2 = 0x40000000, + INST_ICLASS_J_3 = 0x50000000, + INST_ICLASS_CR = 0x60000000, + INST_ICLASS_ALU32_1 = 0x70000000, + INST_ICLASS_XTYPE_1 = 0x80000000, + INST_ICLASS_LD = 0x90000000, + INST_ICLASS_ST = 0xa0000000, + INST_ICLASS_ALU32_2 = 0xb0000000, + INST_ICLASS_XTYPE_2 = 0xc0000000, + INST_ICLASS_XTYPE_3 = 0xd0000000, + INST_ICLASS_XTYPE_4 = 0xe0000000, + INST_ICLASS_ALU32_3 = 0xf0000000 + }; + } // End namespace HexagonII. } // End namespace llvm. diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp index 36f81465eef6..06ccec532211 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp @@ -12,13 +12,13 @@ //===----------------------------------------------------------------------===// #include "HexagonAsmPrinter.h" -#include "Hexagon.h" #include "HexagonInstPrinter.h" +#include "MCTargetDesc/HexagonBaseInfo.h" #include "MCTargetDesc/HexagonMCInstrInfo.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -28,104 +28,33 @@ using namespace llvm; #define GET_INSTRUCTION_NAME #include "HexagonGenAsmWriter.inc" -HexagonAsmInstPrinter::HexagonAsmInstPrinter(MCInstPrinter *RawPrinter) - : MCInstPrinter(*RawPrinter), RawPrinter(RawPrinter) {} - -void HexagonAsmInstPrinter::printInst(MCInst const *MI, raw_ostream &O, - StringRef Annot, - MCSubtargetInfo const &STI) { - assert(HexagonMCInstrInfo::isBundle(*MI)); - assert(HexagonMCInstrInfo::bundleSize(*MI) <= HEXAGON_PACKET_SIZE); - std::string Buffer; - { - raw_string_ostream TempStream(Buffer); - RawPrinter->printInst(MI, TempStream, "", STI); - } - StringRef Contents(Buffer); - auto PacketBundle = Contents.rsplit('\n'); - auto HeadTail = PacketBundle.first.split('\n'); - auto Preamble = "\t{\n\t\t"; - auto Separator = ""; - while(!HeadTail.first.empty()) { - O << Separator; - StringRef Inst; - auto Duplex = HeadTail.first.split('\v'); - if(!Duplex.second.empty()){ - O << Duplex.first << "\n"; - Inst = Duplex.second; - } - else - Inst = Duplex.first; - O << Preamble; - O << Inst; - HeadTail = HeadTail.second.split('\n'); - Preamble = ""; - Separator = "\n\t\t"; - } - O << "\n\t}" << PacketBundle.second; -} - -void HexagonAsmInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const { - RawPrinter->printRegName(O, RegNo); -} - -// Return the minimum value that a constant extendable operand can have -// without being extended. -static int getMinValue(uint64_t TSFlags) { - unsigned isSigned = - (TSFlags >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask; - unsigned bits = - (TSFlags >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask; - - if (isSigned) - return -1U << (bits - 1); - - return 0; -} - -// Return the maximum value that a constant extendable operand can have -// without being extended. -static int getMaxValue(uint64_t TSFlags) { - unsigned isSigned = - (TSFlags >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask; - unsigned bits = - (TSFlags >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask; - - if (isSigned) - return ~(-1U << (bits - 1)); - - return ~(-1U << bits); -} - -// Return true if the instruction must be extended. -static bool isExtended(uint64_t TSFlags) { - return (TSFlags >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask; -} - -// Currently just used in an assert statement -static bool isExtendable(uint64_t TSFlags) LLVM_ATTRIBUTE_UNUSED; -// Return true if the instruction may be extended based on the operand value. -static bool isExtendable(uint64_t TSFlags) { - return (TSFlags >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask; +HexagonInstPrinter::HexagonInstPrinter(MCAsmInfo const &MAI, + MCInstrInfo const &MII, + MCRegisterInfo const &MRI) + : MCInstPrinter(MAI, MII, MRI), MII(MII), HasExtender(false) { } StringRef HexagonInstPrinter::getOpcodeName(unsigned Opcode) const { return MII.getName(Opcode); } -void HexagonInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { - OS << getRegisterName(RegNo); +void HexagonInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const { + O << getRegName(RegNo); +} + +StringRef HexagonInstPrinter::getRegName(unsigned RegNo) const { + return getRegisterName(RegNo); } void HexagonInstPrinter::setExtender(MCInst const &MCI) { HasExtender = HexagonMCInstrInfo::isImmext(MCI); } -void HexagonInstPrinter::printInst(MCInst const *MI, raw_ostream &OS, - StringRef Annot, - MCSubtargetInfo const &STI) { +void HexagonInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, + StringRef Annot, const MCSubtargetInfo &STI) { assert(HexagonMCInstrInfo::isBundle(*MI)); assert(HexagonMCInstrInfo::bundleSize(*MI) <= HEXAGON_PACKET_SIZE); + assert(HexagonMCInstrInfo::bundleSize(*MI) > 0); HasExtender = false; for (auto const &I : HexagonMCInstrInfo::bundleInstructions(*MI)) { MCInst const &MCI = *I.getInst(); @@ -157,145 +86,148 @@ void HexagonInstPrinter::printInst(MCInst const *MI, raw_ostream &OS, } } -void HexagonInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, +void HexagonInstPrinter::printOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const { - const MCOperand& MO = MI->getOperand(OpNo); - + if (HexagonMCInstrInfo::getExtendableOp(MII, *MI) == OpNo && + (HasExtender || HexagonMCInstrInfo::isConstExtended(MII, *MI))) + O << "#"; + MCOperand const &MO = MI->getOperand(OpNo); if (MO.isReg()) { - printRegName(O, MO.getReg()); - } else if(MO.isExpr()) { - MO.getExpr()->print(O, &MAI); - } else if(MO.isImm()) { - printImmOperand(MI, OpNo, O); - } else { - llvm_unreachable("Unknown operand"); - } -} - -void HexagonInstPrinter::printImmOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) const { - const MCOperand& MO = MI->getOperand(OpNo); - - if(MO.isExpr()) { - MO.getExpr()->print(O, &MAI); - } else if(MO.isImm()) { - O << MI->getOperand(OpNo).getImm(); + O << getRegisterName(MO.getReg()); + } else if (MO.isExpr()) { + int64_t Value; + if (MO.getExpr()->evaluateAsAbsolute(Value)) + O << formatImm(Value); + else + O << *MO.getExpr(); } else { llvm_unreachable("Unknown operand"); } } -void HexagonInstPrinter::printExtOperand(const MCInst *MI, unsigned OpNo, +void HexagonInstPrinter::printExtOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const { - const MCOperand &MO = MI->getOperand(OpNo); - const MCInstrDesc &MII = getMII().get(MI->getOpcode()); - - assert((isExtendable(MII.TSFlags) || isExtended(MII.TSFlags)) && - "Expecting an extendable operand"); - - if (MO.isExpr() || isExtended(MII.TSFlags)) { - O << "#"; - } else if (MO.isImm()) { - int ImmValue = MO.getImm(); - if (ImmValue < getMinValue(MII.TSFlags) || - ImmValue > getMaxValue(MII.TSFlags)) - O << "#"; - } printOperand(MI, OpNo, O); } -void HexagonInstPrinter::printUnsignedImmOperand(const MCInst *MI, - unsigned OpNo, raw_ostream &O) const { +void HexagonInstPrinter::printUnsignedImmOperand(MCInst const *MI, + unsigned OpNo, + raw_ostream &O) const { O << MI->getOperand(OpNo).getImm(); } -void HexagonInstPrinter::printNegImmOperand(const MCInst *MI, unsigned OpNo, +void HexagonInstPrinter::printNegImmOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const { O << -MI->getOperand(OpNo).getImm(); } -void HexagonInstPrinter::printNOneImmOperand(const MCInst *MI, unsigned OpNo, +void HexagonInstPrinter::printNOneImmOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const { O << -1; } -void HexagonInstPrinter::printMEMriOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) const { - const MCOperand& MO0 = MI->getOperand(OpNo); - const MCOperand& MO1 = MI->getOperand(OpNo + 1); +void HexagonInstPrinter::prints3_6ImmOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const { + int64_t Imm; + bool Success = MI->getOperand(OpNo).getExpr()->evaluateAsAbsolute(Imm); + Imm = SignExtend64<9>(Imm); + assert(Success); (void)Success; + assert(((Imm & 0x3f) == 0) && "Lower 6 bits must be ZERO."); + O << formatImm(Imm/64); +} - printRegName(O, MO0.getReg()); - O << " + #" << MO1.getImm(); +void HexagonInstPrinter::prints3_7ImmOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const { + int64_t Imm; + bool Success = MI->getOperand(OpNo).getExpr()->evaluateAsAbsolute(Imm); + Imm = SignExtend64<10>(Imm); + assert(Success); (void)Success; + assert(((Imm & 0x7f) == 0) && "Lower 7 bits must be ZERO."); + O << formatImm(Imm/128); } -void HexagonInstPrinter::printFrameIndexOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) const { - const MCOperand& MO0 = MI->getOperand(OpNo); - const MCOperand& MO1 = MI->getOperand(OpNo + 1); +void HexagonInstPrinter::prints4_6ImmOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const { + int64_t Imm; + bool Success = MI->getOperand(OpNo).getExpr()->evaluateAsAbsolute(Imm); + Imm = SignExtend64<10>(Imm); + assert(Success); (void)Success; + assert(((Imm & 0x3f) == 0) && "Lower 6 bits must be ZERO."); + O << formatImm(Imm/64); +} - printRegName(O, MO0.getReg()); - O << ", #" << MO1.getImm(); +void HexagonInstPrinter::prints4_7ImmOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const { + int64_t Imm; + bool Success = MI->getOperand(OpNo).getExpr()->evaluateAsAbsolute(Imm); + Imm = SignExtend64<11>(Imm); + assert(Success); (void)Success; + assert(((Imm & 0x7f) == 0) && "Lower 7 bits must be ZERO."); + O << formatImm(Imm/128); } -void HexagonInstPrinter::printGlobalOperand(const MCInst *MI, unsigned OpNo, +void HexagonInstPrinter::printGlobalOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const { - assert(MI->getOperand(OpNo).isExpr() && "Expecting expression"); - printOperand(MI, OpNo, O); } -void HexagonInstPrinter::printJumpTable(const MCInst *MI, unsigned OpNo, +void HexagonInstPrinter::printJumpTable(MCInst const *MI, unsigned OpNo, raw_ostream &O) const { assert(MI->getOperand(OpNo).isExpr() && "Expecting expression"); printOperand(MI, OpNo, O); } -void HexagonInstPrinter::printConstantPool(const MCInst *MI, unsigned OpNo, +void HexagonInstPrinter::printConstantPool(MCInst const *MI, unsigned OpNo, raw_ostream &O) const { assert(MI->getOperand(OpNo).isExpr() && "Expecting expression"); printOperand(MI, OpNo, O); } -void HexagonInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo, +void HexagonInstPrinter::printBranchOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const { // Branches can take an immediate operand. This is used by the branch // selection pass to print $+8, an eight byte displacement from the PC. llvm_unreachable("Unknown branch operand."); } -void HexagonInstPrinter::printCallOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) const { -} +void HexagonInstPrinter::printCallOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const {} -void HexagonInstPrinter::printAbsAddrOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) const { -} +void HexagonInstPrinter::printAbsAddrOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const {} -void HexagonInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) const { -} +void HexagonInstPrinter::printPredicateOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const {} -void HexagonInstPrinter::printSymbol(const MCInst *MI, unsigned OpNo, +void HexagonInstPrinter::printSymbol(MCInst const *MI, unsigned OpNo, raw_ostream &O, bool hi) const { - assert(MI->getOperand(OpNo).isImm() && "Unknown symbol operand"); + MCOperand const &MO = MI->getOperand(OpNo); - O << '#' << (hi ? "HI" : "LO") << "(#"; - printOperand(MI, OpNo, O); + O << '#' << (hi ? "HI" : "LO") << '('; + if (MO.isImm()) { + O << '#'; + printOperand(MI, OpNo, O); + } else { + printOperand(MI, OpNo, O); + assert("Unknown symbol operand"); + } O << ')'; } -void HexagonInstPrinter::printExtBrtarget(const MCInst *MI, unsigned OpNo, - raw_ostream &O) const { - const MCOperand &MO = MI->getOperand(OpNo); - const MCInstrDesc &MII = getMII().get(MI->getOpcode()); - - assert((isExtendable(MII.TSFlags) || isExtended(MII.TSFlags)) && - "Expecting an extendable operand"); - - if (MO.isExpr() || isExtended(MII.TSFlags)) { - O << "##"; +void HexagonInstPrinter::printBrtarget(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const { + MCOperand const &MO = MI->getOperand(OpNo); + assert (MO.isExpr()); + MCExpr const &Expr = *MO.getExpr(); + int64_t Value; + if (Expr.evaluateAsAbsolute(Value)) + O << format("0x%" PRIx64, Value); + else { + if (HasExtender || HexagonMCInstrInfo::isConstExtended(MII, *MI)) + if (HexagonMCInstrInfo::getExtendableOp(MII, *MI) == OpNo) + O << "##"; + O << Expr; } - printOperand(MI, OpNo, O); } diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h index 534ac237d635..5f421184b20a 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h @@ -7,7 +7,6 @@ // //===----------------------------------------------------------------------===// // -// This class prints an Hexagon MCInst to a .s file. // //===----------------------------------------------------------------------===// @@ -15,17 +14,8 @@ #define LLVM_LIB_TARGET_HEXAGON_INSTPRINTER_HEXAGONINSTPRINTER_H #include "llvm/MC/MCInstPrinter.h" -#include "llvm/MC/MCInstrInfo.h" namespace llvm { -class HexagonAsmInstPrinter : public MCInstPrinter { -public: - HexagonAsmInstPrinter(MCInstPrinter *RawPrinter); - void printInst(MCInst const *MI, raw_ostream &O, StringRef Annot, - MCSubtargetInfo const &STI) override; - void printRegName(raw_ostream &O, unsigned RegNo) const override; - std::unique_ptr<MCInstPrinter> RawPrinter; -}; /// Prints bundles as a newline separated list of individual instructions /// Duplexes are separated by a vertical tab \v character /// A trailing line includes bundle properties such as endloop0/1 @@ -33,68 +23,69 @@ public: /// r0 = add(r1, r2) /// r0 = #0 \v jump 0x0 /// :endloop0 :endloop1 - class HexagonInstPrinter : public MCInstPrinter { - public: - explicit HexagonInstPrinter(MCAsmInfo const &MAI, - MCInstrInfo const &MII, - MCRegisterInfo const &MRI) - : MCInstPrinter(MAI, MII, MRI), MII(MII) {} - - void printInst(MCInst const *MI, raw_ostream &O, StringRef Annot, - const MCSubtargetInfo &STI) override; - virtual StringRef getOpcodeName(unsigned Opcode) const; - void printInstruction(const MCInst *MI, raw_ostream &O); - void printRegName(raw_ostream &OS, unsigned RegNo) const override; - static const char *getRegisterName(unsigned RegNo); +class HexagonInstPrinter : public MCInstPrinter { +public: + explicit HexagonInstPrinter(MCAsmInfo const &MAI, MCInstrInfo const &MII, + MCRegisterInfo const &MRI); + void printInst(MCInst const *MI, raw_ostream &O, StringRef Annot, + const MCSubtargetInfo &STI) override; + virtual StringRef getOpcodeName(unsigned Opcode) const; + void printInstruction(MCInst const *MI, raw_ostream &O); - void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; - void printImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; - void printExtOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; - void printUnsignedImmOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) const; - void printNegImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) - const; - void printNOneImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) - const; - void printMEMriOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) - const; - void printFrameIndexOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) const; - void printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) - const; - void printCallOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) - const; - void printAbsAddrOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) - const; - void printPredicateOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) - const; - void printGlobalOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) - const; - void printJumpTable(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; - void printExtBrtarget(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; + StringRef getRegName(unsigned RegNo) const; + static char const *getRegisterName(unsigned RegNo); + void printRegName(raw_ostream &O, unsigned RegNo) const override; - void printConstantPool(const MCInst *MI, unsigned OpNo, + void printOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const; + void printExtOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const; + void printUnsignedImmOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const; + void printNegImmOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const; + void printNOneImmOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const; + void prints3_6ImmOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const; + void prints3_7ImmOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const; + void prints4_6ImmOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const; + void prints4_7ImmOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const; + void printBranchOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const; + void printCallOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const; + void printAbsAddrOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const; + void printPredicateOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const; + void printGlobalOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const; + void printJumpTable(MCInst const *MI, unsigned OpNo, raw_ostream &O) const; + void printBrtarget(MCInst const *MI, unsigned OpNo, raw_ostream &O) const; + + void printConstantPool(MCInst const *MI, unsigned OpNo, raw_ostream &O) const; - void printSymbolHi(const MCInst *MI, unsigned OpNo, raw_ostream &O) const - { printSymbol(MI, OpNo, O, true); } - void printSymbolLo(const MCInst *MI, unsigned OpNo, raw_ostream &O) const - { printSymbol(MI, OpNo, O, false); } + void printSymbolHi(MCInst const *MI, unsigned OpNo, raw_ostream &O) const { + printSymbol(MI, OpNo, O, true); + } + void printSymbolLo(MCInst const *MI, unsigned OpNo, raw_ostream &O) const { + printSymbol(MI, OpNo, O, false); + } - const MCInstrInfo &getMII() const { - return MII; - } + MCAsmInfo const &getMAI() const { return MAI; } + MCInstrInfo const &getMII() const { return MII; } - protected: - void printSymbol(const MCInst *MI, unsigned OpNo, raw_ostream &O, bool hi) - const; +protected: + void printSymbol(MCInst const *MI, unsigned OpNo, raw_ostream &O, + bool hi) const; - private: - const MCInstrInfo &MII; +private: + MCInstrInfo const &MII; - bool HasExtender; - void setExtender(MCInst const &MCI); - }; + bool HasExtender; + void setExtender(MCInst const &MCI); +}; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h index dc0706994786..a8456b4ead9c 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h @@ -18,13 +18,14 @@ #include "llvm/MC/MCAsmInfoELF.h" namespace llvm { - class Triple; +class Triple; - class HexagonMCAsmInfo : public MCAsmInfoELF { - void anchor() override; - public: - explicit HexagonMCAsmInfo(const Triple &TT); - }; +class HexagonMCAsmInfo : public MCAsmInfoELF { + void anchor() override; + +public: + explicit HexagonMCAsmInfo(const Triple &TT); +}; } // namespace llvm diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp new file mode 100644 index 000000000000..46b7b41fec3b --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp @@ -0,0 +1,581 @@ +//===----- HexagonMCChecker.cpp - Instruction bundle checking -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the checking of insns inside a bundle according to the +// packet constraint rules of the Hexagon ISA. +// +//===----------------------------------------------------------------------===// + +#include "HexagonMCChecker.h" + +#include "HexagonBaseInfo.h" + +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +static cl::opt<bool> RelaxNVChecks("relax-nv-checks", cl::init(false), + cl::ZeroOrMore, cl::Hidden, cl::desc("Relax checks of new-value validity")); + +const HexagonMCChecker::PredSense + HexagonMCChecker::Unconditional(Hexagon::NoRegister, false); + +void HexagonMCChecker::init() { + // Initialize read-only registers set. + ReadOnly.insert(Hexagon::PC); + + // Figure out the loop-registers definitions. + if (HexagonMCInstrInfo::isInnerLoop(MCB)) { + Defs[Hexagon::SA0].insert(Unconditional); // FIXME: define or change SA0? + Defs[Hexagon::LC0].insert(Unconditional); + } + if (HexagonMCInstrInfo::isOuterLoop(MCB)) { + Defs[Hexagon::SA1].insert(Unconditional); // FIXME: define or change SA0? + Defs[Hexagon::LC1].insert(Unconditional); + } + + if (HexagonMCInstrInfo::isBundle(MCB)) + // Unfurl a bundle. + for (auto const&I : HexagonMCInstrInfo::bundleInstructions(MCB)) { + init(*I.getInst()); + } + else + init(MCB); +} + +void HexagonMCChecker::init(MCInst const& MCI) { + const MCInstrDesc& MCID = HexagonMCInstrInfo::getDesc(MCII, MCI); + unsigned PredReg = Hexagon::NoRegister; + bool isTrue = false; + + // Get used registers. + for (unsigned i = MCID.getNumDefs(); i < MCID.getNumOperands(); ++i) + if (MCI.getOperand(i).isReg()) { + unsigned R = MCI.getOperand(i).getReg(); + + if (HexagonMCInstrInfo::isPredicated(MCII, MCI) && isPredicateRegister(R)) { + // Note an used predicate register. + PredReg = R; + isTrue = HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI); + + // Note use of new predicate register. + if (HexagonMCInstrInfo::isPredicatedNew(MCII, MCI)) + NewPreds.insert(PredReg); + } + else + // Note register use. Super-registers are not tracked directly, + // but their components. + for(MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid()); + SRI.isValid(); + ++SRI) + if (!MCSubRegIterator(*SRI, &RI).isValid()) + // Skip super-registers used indirectly. + Uses.insert(*SRI); + } + + // Get implicit register definitions. + if (const MCPhysReg *ImpDef = MCID.getImplicitDefs()) + for (; *ImpDef; ++ImpDef) { + unsigned R = *ImpDef; + + if (Hexagon::R31 != R && MCID.isCall()) + // Any register other than the LR and the PC are actually volatile ones + // as defined by the ABI, not modified implicitly by the call insn. + continue; + if (Hexagon::PC == R) + // Branches are the only insns that can change the PC, + // otherwise a read-only register. + continue; + + if (Hexagon::USR_OVF == R) + // Many insns change the USR implicitly, but only one or another flag. + // The instruction table models the USR.OVF flag, which can be implicitly + // modified more than once, but cannot be modified in the same packet + // with an instruction that modifies is explicitly. Deal with such situ- + // ations individually. + SoftDefs.insert(R); + else if (isPredicateRegister(R) && + HexagonMCInstrInfo::isPredicateLate(MCII, MCI)) + // Include implicit late predicates. + LatePreds.insert(R); + else + Defs[R].insert(PredSense(PredReg, isTrue)); + } + + // Figure out explicit register definitions. + for (unsigned i = 0; i < MCID.getNumDefs(); ++i) { + unsigned R = MCI.getOperand(i).getReg(), + S = Hexagon::NoRegister; + + // Note register definitions, direct ones as well as indirect side-effects. + // Super-registers are not tracked directly, but their components. + for(MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid()); + SRI.isValid(); + ++SRI) { + if (MCSubRegIterator(*SRI, &RI).isValid()) + // Skip super-registers defined indirectly. + continue; + + if (R == *SRI) { + if (S == R) + // Avoid scoring the defined register multiple times. + continue; + else + // Note that the defined register has already been scored. + S = R; + } + + if (Hexagon::P3_0 != R && Hexagon::P3_0 == *SRI) + // P3:0 is a special case, since multiple predicate register definitions + // in a packet is allowed as the equivalent of their logical "and". + // Only an explicit definition of P3:0 is noted as such; if a + // side-effect, then note as a soft definition. + SoftDefs.insert(*SRI); + else if (HexagonMCInstrInfo::isPredicateLate(MCII, MCI) && isPredicateRegister(*SRI)) + // Some insns produce predicates too late to be used in the same packet. + LatePreds.insert(*SRI); + else if (i == 0 && llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCVI_VM_CUR_LD) + // Current loads should be used in the same packet. + // TODO: relies on the impossibility of a current and a temporary loads + // in the same packet. + CurDefs.insert(*SRI), Defs[*SRI].insert(PredSense(PredReg, isTrue)); + else if (i == 0 && llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCVI_VM_TMP_LD) + // Temporary loads should be used in the same packet, but don't commit + // results, so it should be disregarded if another insn changes the same + // register. + // TODO: relies on the impossibility of a current and a temporary loads + // in the same packet. + TmpDefs.insert(*SRI); + else if (i <= 1 && llvm::HexagonMCInstrInfo::hasNewValue2(MCII, MCI) ) + // vshuff(Vx, Vy, Rx) <- Vx(0) and Vy(1) are both source and + // destination registers with this instruction. same for vdeal(Vx,Vy,Rx) + Uses.insert(*SRI); + else + Defs[*SRI].insert(PredSense(PredReg, isTrue)); + } + } + + // Figure out register definitions that produce new values. + if (HexagonMCInstrInfo::hasNewValue(MCII, MCI)) { + unsigned R = HexagonMCInstrInfo::getNewValueOperand(MCII, MCI).getReg(); + + if (HexagonMCInstrInfo::isCompound(MCII, MCI)) + compoundRegisterMap(R); // Compound insns have a limited register range. + + for(MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid()); + SRI.isValid(); + ++SRI) + if (!MCSubRegIterator(*SRI, &RI).isValid()) + // No super-registers defined indirectly. + NewDefs[*SRI].push_back(NewSense::Def(PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI), + HexagonMCInstrInfo::isFloat(MCII, MCI))); + + // For fairly unique 2-dot-new producers, example: + // vdeal(V1, V9, R0) V1.new and V9.new can be used by consumers. + if (HexagonMCInstrInfo::hasNewValue2(MCII, MCI)) { + unsigned R2 = HexagonMCInstrInfo::getNewValueOperand2(MCII, MCI).getReg(); + + for(MCRegAliasIterator SRI(R2, &RI, !MCSubRegIterator(R2, &RI).isValid()); + SRI.isValid(); + ++SRI) + if (!MCSubRegIterator(*SRI, &RI).isValid()) + NewDefs[*SRI].push_back(NewSense::Def(PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI), + HexagonMCInstrInfo::isFloat(MCII, MCI))); + } + } + + // Figure out definitions of new predicate registers. + if (HexagonMCInstrInfo::isPredicatedNew(MCII, MCI)) + for (unsigned i = MCID.getNumDefs(); i < MCID.getNumOperands(); ++i) + if (MCI.getOperand(i).isReg()) { + unsigned P = MCI.getOperand(i).getReg(); + + if (isPredicateRegister(P)) + NewPreds.insert(P); + } + + // Figure out uses of new values. + if (HexagonMCInstrInfo::isNewValue(MCII, MCI)) { + unsigned N = HexagonMCInstrInfo::getNewValueOperand(MCII, MCI).getReg(); + + if (!MCSubRegIterator(N, &RI).isValid()) { + // Super-registers cannot use new values. + if (MCID.isBranch()) + NewUses[N] = NewSense::Jmp(llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeNV); + else + NewUses[N] = NewSense::Use(PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI)); + } + } +} + +HexagonMCChecker::HexagonMCChecker(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst &mcb, MCInst &mcbdx, + MCRegisterInfo const &ri) + : MCB(mcb), MCBDX(mcbdx), RI(ri), MCII(MCII), STI(STI), + bLoadErrInfo(false) { + init(); +} + +bool HexagonMCChecker::check() { + bool chkB = checkBranches(); + bool chkP = checkPredicates(); + bool chkNV = checkNewValues(); + bool chkR = checkRegisters(); + bool chkS = checkSolo(); + bool chkSh = checkShuffle(); + bool chkSl = checkSlots(); + bool chk = chkB && chkP && chkNV && chkR && chkS && chkSh && chkSl; + + return chk; +} + +bool HexagonMCChecker::checkSlots() + +{ + unsigned slotsUsed = 0; + for (auto HMI: HexagonMCInstrInfo::bundleInstructions(MCBDX)) { + MCInst const& MCI = *HMI.getInst(); + if (HexagonMCInstrInfo::isImmext(MCI)) + continue; + if (HexagonMCInstrInfo::isDuplex(MCII, MCI)) + slotsUsed += 2; + else + ++slotsUsed; + } + + if (slotsUsed > HEXAGON_PACKET_SIZE) { + HexagonMCErrInfo errInfo; + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_NOSLOTS); + addErrInfo(errInfo); + return false; + } + return true; +} + +// Check legal use of branches. +bool HexagonMCChecker::checkBranches() { + HexagonMCErrInfo errInfo; + if (HexagonMCInstrInfo::isBundle(MCB)) { + bool hasConditional = false; + unsigned Branches = 0, Returns = 0, NewIndirectBranches = 0, + NewValueBranches = 0, Conditional = HEXAGON_PRESHUFFLE_PACKET_SIZE, + Unconditional = HEXAGON_PRESHUFFLE_PACKET_SIZE; + + for (unsigned i = HexagonMCInstrInfo::bundleInstructionsOffset; + i < MCB.size(); ++i) { + MCInst const &MCI = *MCB.begin()[i].getInst(); + + if (HexagonMCInstrInfo::isImmext(MCI)) + continue; + if (HexagonMCInstrInfo::getDesc(MCII, MCI).isBranch() || + HexagonMCInstrInfo::getDesc(MCII, MCI).isCall()) { + ++Branches; + if (HexagonMCInstrInfo::getDesc(MCII, MCI).isIndirectBranch() && + HexagonMCInstrInfo::isPredicatedNew(MCII, MCI)) + ++NewIndirectBranches; + if (HexagonMCInstrInfo::isNewValue(MCII, MCI)) + ++NewValueBranches; + + if (HexagonMCInstrInfo::isPredicated(MCII, MCI) || + HexagonMCInstrInfo::isPredicatedNew(MCII, MCI)) { + hasConditional = true; + Conditional = i; // Record the position of the conditional branch. + } else { + Unconditional = i; // Record the position of the unconditional branch. + } + } + if (HexagonMCInstrInfo::getDesc(MCII, MCI).isReturn() && + HexagonMCInstrInfo::getDesc(MCII, MCI).mayLoad()) + ++Returns; + } + + if (Branches) // FIXME: should "Defs.count(Hexagon::PC)" be here too? + if (HexagonMCInstrInfo::isInnerLoop(MCB) || + HexagonMCInstrInfo::isOuterLoop(MCB)) { + // Error out if there's any branch in a loop-end packet. + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_ENDLOOP, Hexagon::PC); + addErrInfo(errInfo); + return false; + } + if (Branches > 1) + if (!hasConditional || Conditional > Unconditional) { + // Error out if more than one unconditional branch or + // the conditional branch appears after the unconditional one. + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_BRANCHES); + addErrInfo(errInfo); + return false; + } + } + + return true; +} + +// Check legal use of predicate registers. +bool HexagonMCChecker::checkPredicates() { + HexagonMCErrInfo errInfo; + // Check for proper use of new predicate registers. + for (const auto& I : NewPreds) { + unsigned P = I; + + if (!Defs.count(P) || LatePreds.count(P)) { + // Error out if the new predicate register is not defined, + // or defined "late" + // (e.g., "{ if (p3.new)... ; p3 = sp1loop0(#r7:2, Rs) }"). + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_NEWP, P); + addErrInfo(errInfo); + return false; + } + } + + // Check for proper use of auto-anded of predicate registers. + for (const auto& I : LatePreds) { + unsigned P = I; + + if (LatePreds.count(P) > 1 || Defs.count(P)) { + // Error out if predicate register defined "late" multiple times or + // defined late and regularly defined + // (e.g., "{ p3 = sp1loop0(...); p3 = cmp.eq(...) }". + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, P); + addErrInfo(errInfo); + return false; + } + } + + return true; +} + +// Check legal use of new values. +bool HexagonMCChecker::checkNewValues() { + HexagonMCErrInfo errInfo; + memset(&errInfo, 0, sizeof(errInfo)); + for (auto& I : NewUses) { + unsigned R = I.first; + NewSense &US = I.second; + + if (!hasValidNewValueDef(US, NewDefs[R])) { + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_NEWV, R); + addErrInfo(errInfo); + return false; + } + } + + return true; +} + +// Check for legal register uses and definitions. +bool HexagonMCChecker::checkRegisters() { + HexagonMCErrInfo errInfo; + // Check for proper register definitions. + for (const auto& I : Defs) { + unsigned R = I.first; + + if (ReadOnly.count(R)) { + // Error out for definitions of read-only registers. + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_READONLY, R); + addErrInfo(errInfo); + return false; + } + if (isLoopRegister(R) && Defs.count(R) > 1 && + (HexagonMCInstrInfo::isInnerLoop(MCB) || + HexagonMCInstrInfo::isOuterLoop(MCB))) { + // Error out for definitions of loop registers at the end of a loop. + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_LOOP, R); + addErrInfo(errInfo); + return false; + } + if (SoftDefs.count(R)) { + // Error out for explicit changes to registers also weakly defined + // (e.g., "{ usr = r0; r0 = sfadd(...) }"). + unsigned UsrR = Hexagon::USR; // Silence warning about mixed types in ?:. + unsigned BadR = RI.isSubRegister(Hexagon::USR, R) ? UsrR : R; + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, BadR); + addErrInfo(errInfo); + return false; + } + if (!isPredicateRegister(R) && Defs[R].size() > 1) { + // Check for multiple register definitions. + PredSet &PM = Defs[R]; + + // Check for multiple unconditional register definitions. + if (PM.count(Unconditional)) { + // Error out on an unconditional change when there are any other + // changes, conditional or not. + unsigned UsrR = Hexagon::USR; + unsigned BadR = RI.isSubRegister(Hexagon::USR, R) ? UsrR : R; + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, BadR); + addErrInfo(errInfo); + return false; + } + // Check for multiple conditional register definitions. + for (const auto& J : PM) { + PredSense P = J; + + // Check for multiple uses of the same condition. + if (PM.count(P) > 1) { + // Error out on conditional changes based on the same predicate + // (e.g., "{ if (!p0) r0 =...; if (!p0) r0 =... }"). + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, R); + addErrInfo(errInfo); + return false; + } + // Check for the use of the complementary condition. + P.second = !P.second; + if (PM.count(P) && PM.size() > 2) { + // Error out on conditional changes based on the same predicate + // multiple times + // (e.g., "{ if (p0) r0 =...; if (!p0) r0 =... }; if (!p0) r0 =... }"). + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, R); + addErrInfo(errInfo); + return false; + } + } + } + } + + // Check for use of current definitions. + for (const auto& I : CurDefs) { + unsigned R = I; + + if (!Uses.count(R)) { + // Warn on an unused current definition. + errInfo.setWarning(HexagonMCErrInfo::CHECK_WARN_CURRENT, R); + addErrInfo(errInfo); + return true; + } + } + + // Check for use of temporary definitions. + for (const auto& I : TmpDefs) { + unsigned R = I; + + if (!Uses.count(R)) { + // special case for vhist + bool vHistFound = false; + for (auto const&HMI : HexagonMCInstrInfo::bundleInstructions(MCB)) { + if(llvm::HexagonMCInstrInfo::getType(MCII, *HMI.getInst()) == HexagonII::TypeCVI_HIST) { + vHistFound = true; // vhist() implicitly uses ALL REGxx.tmp + break; + } + } + // Warn on an unused temporary definition. + if (vHistFound == false) { + errInfo.setWarning(HexagonMCErrInfo::CHECK_WARN_TEMPORARY, R); + addErrInfo(errInfo); + return true; + } + } + } + + return true; +} + +// Check for legal use of solo insns. +bool HexagonMCChecker::checkSolo() { + HexagonMCErrInfo errInfo; + if (HexagonMCInstrInfo::isBundle(MCB) && + HexagonMCInstrInfo::bundleSize(MCB) > 1) { + for (auto const&I : HexagonMCInstrInfo::bundleInstructions(MCB)) { + if (llvm::HexagonMCInstrInfo::isSolo(MCII, *I.getInst())) { + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_SOLO); + addErrInfo(errInfo); + return false; + } + } + } + + return true; +} + +bool HexagonMCChecker::checkShuffle() { + HexagonMCErrInfo errInfo; + // Branch info is lost when duplexing. The unduplexed insns must be + // checked and only branch errors matter for this case. + HexagonMCShuffler MCS(MCII, STI, MCB); + if (!MCS.check()) { + if (MCS.getError() == HexagonShuffler::SHUFFLE_ERROR_BRANCHES) { + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_SHUFFLE); + errInfo.setShuffleError(MCS.getError()); + addErrInfo(errInfo); + return false; + } + } + HexagonMCShuffler MCSDX(MCII, STI, MCBDX); + if (!MCSDX.check()) { + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_SHUFFLE); + errInfo.setShuffleError(MCSDX.getError()); + addErrInfo(errInfo); + return false; + } + return true; +} + +void HexagonMCChecker::compoundRegisterMap(unsigned& Register) { + switch (Register) { + default: + break; + case Hexagon::R15: + Register = Hexagon::R23; + break; + case Hexagon::R14: + Register = Hexagon::R22; + break; + case Hexagon::R13: + Register = Hexagon::R21; + break; + case Hexagon::R12: + Register = Hexagon::R20; + break; + case Hexagon::R11: + Register = Hexagon::R19; + break; + case Hexagon::R10: + Register = Hexagon::R18; + break; + case Hexagon::R9: + Register = Hexagon::R17; + break; + case Hexagon::R8: + Register = Hexagon::R16; + break; + } +} + +bool HexagonMCChecker::hasValidNewValueDef(const NewSense &Use, + const NewSenseList &Defs) const { + bool Strict = !RelaxNVChecks; + + for (unsigned i = 0, n = Defs.size(); i < n; ++i) { + const NewSense &Def = Defs[i]; + // NVJ cannot use a new FP value [7.6.1] + if (Use.IsNVJ && (Def.IsFloat || Def.PredReg != 0)) + continue; + // If the definition was not predicated, then it does not matter if + // the use is. + if (Def.PredReg == 0) + return true; + // With the strict checks, both the definition and the use must be + // predicated on the same register and condition. + if (Strict) { + if (Def.PredReg == Use.PredReg && Def.Cond == Use.Cond) + return true; + } else { + // With the relaxed checks, if the definition was predicated, the only + // detectable violation is if the use is predicated on the opposing + // condition, otherwise, it's ok. + if (Def.PredReg != Use.PredReg || Def.Cond == Use.Cond) + return true; + } + } + return false; +} + diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h new file mode 100644 index 000000000000..5fc0bdeaccbb --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h @@ -0,0 +1,218 @@ +//===----- HexagonMCChecker.h - Instruction bundle checking ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the checking of insns inside a bundle according to the +// packet constraint rules of the Hexagon ISA. +// +//===----------------------------------------------------------------------===// + +#ifndef HEXAGONMCCHECKER_H +#define HEXAGONMCCHECKER_H + +#include <map> +#include <set> +#include <queue> +#include "MCTargetDesc/HexagonMCShuffler.h" + +using namespace llvm; + +namespace llvm { +class MCOperandInfo; + +typedef struct { + unsigned Error, Warning, ShuffleError; + unsigned Register; +} ErrInfo_T; + +class HexagonMCErrInfo { +public: + enum { + CHECK_SUCCESS = 0, + // Errors. + CHECK_ERROR_BRANCHES = 0x00001, + CHECK_ERROR_NEWP = 0x00002, + CHECK_ERROR_NEWV = 0x00004, + CHECK_ERROR_REGISTERS = 0x00008, + CHECK_ERROR_READONLY = 0x00010, + CHECK_ERROR_LOOP = 0x00020, + CHECK_ERROR_ENDLOOP = 0x00040, + CHECK_ERROR_SOLO = 0x00080, + CHECK_ERROR_SHUFFLE = 0x00100, + CHECK_ERROR_NOSLOTS = 0x00200, + CHECK_ERROR_UNKNOWN = 0x00400, + // Warnings. + CHECK_WARN_CURRENT = 0x10000, + CHECK_WARN_TEMPORARY = 0x20000 + }; + ErrInfo_T s; + + void reset() { + s.Error = CHECK_SUCCESS; + s.Warning = CHECK_SUCCESS; + s.ShuffleError = HexagonShuffler::SHUFFLE_SUCCESS; + s.Register = Hexagon::NoRegister; + }; + HexagonMCErrInfo() { + reset(); + }; + + void setError(unsigned e, unsigned r = Hexagon::NoRegister) + { s.Error = e; s.Register = r; }; + void setWarning(unsigned w, unsigned r = Hexagon::NoRegister) + { s.Warning = w; s.Register = r; }; + void setShuffleError(unsigned e) { s.ShuffleError = e; }; +}; + +/// Check for a valid bundle. +class HexagonMCChecker { + /// Insn bundle. + MCInst& MCB; + MCInst& MCBDX; + const MCRegisterInfo& RI; + MCInstrInfo const &MCII; + MCSubtargetInfo const &STI; + bool bLoadErrInfo; + + /// Set of definitions: register #, if predicated, if predicated true. + typedef std::pair<unsigned, bool> PredSense; + static const PredSense Unconditional; + typedef std::multiset<PredSense> PredSet; + typedef std::multiset<PredSense>::iterator PredSetIterator; + + typedef llvm::DenseMap<unsigned, PredSet>::iterator DefsIterator; + llvm::DenseMap<unsigned, PredSet> Defs; + + /// Information about how a new-value register is defined or used: + /// PredReg = predicate register, 0 if use/def not predicated, + /// Cond = true/false for if(PredReg)/if(!PredReg) respectively, + /// IsFloat = true if definition produces a floating point value + /// (not valid for uses), + /// IsNVJ = true if the use is a new-value branch (not valid for + /// definitions). + struct NewSense { + unsigned PredReg; + bool IsFloat, IsNVJ, Cond; + // The special-case "constructors": + static NewSense Jmp(bool isNVJ) { + NewSense NS = { /*PredReg=*/ 0, /*IsFloat=*/ false, /*IsNVJ=*/ isNVJ, + /*Cond=*/ false }; + return NS; + } + static NewSense Use(unsigned PR, bool True) { + NewSense NS = { /*PredReg=*/ PR, /*IsFloat=*/ false, /*IsNVJ=*/ false, + /*Cond=*/ True }; + return NS; + } + static NewSense Def(unsigned PR, bool True, bool Float) { + NewSense NS = { /*PredReg=*/ PR, /*IsFloat=*/ Float, /*IsNVJ=*/ false, + /*Cond=*/ True }; + return NS; + } + }; + /// Set of definitions that produce new register: + typedef llvm::SmallVector<NewSense,2> NewSenseList; + typedef llvm::DenseMap<unsigned, NewSenseList>::iterator NewDefsIterator; + llvm::DenseMap<unsigned, NewSenseList> NewDefs; + + /// Set of weak definitions whose clashes should be enforced selectively. + typedef std::set<unsigned>::iterator SoftDefsIterator; + std::set<unsigned> SoftDefs; + + /// Set of current definitions committed to the register file. + typedef std::set<unsigned>::iterator CurDefsIterator; + std::set<unsigned> CurDefs; + + /// Set of temporary definitions not committed to the register file. + typedef std::set<unsigned>::iterator TmpDefsIterator; + std::set<unsigned> TmpDefs; + + /// Set of new predicates used. + typedef std::set<unsigned>::iterator NewPredsIterator; + std::set<unsigned> NewPreds; + + /// Set of predicates defined late. + typedef std::multiset<unsigned>::iterator LatePredsIterator; + std::multiset<unsigned> LatePreds; + + /// Set of uses. + typedef std::set<unsigned>::iterator UsesIterator; + std::set<unsigned> Uses; + + /// Set of new values used: new register, if new-value jump. + typedef llvm::DenseMap<unsigned, NewSense>::iterator NewUsesIterator; + llvm::DenseMap<unsigned, NewSense> NewUses; + + /// Pre-defined set of read-only registers. + typedef std::set<unsigned>::iterator ReadOnlyIterator; + std::set<unsigned> ReadOnly; + + std::queue<ErrInfo_T> ErrInfoQ; + HexagonMCErrInfo CrntErrInfo; + + void getErrInfo() { + if (bLoadErrInfo == true) { + if (ErrInfoQ.empty()) { + CrntErrInfo.reset(); + } else { + CrntErrInfo.s = ErrInfoQ.front(); + ErrInfoQ.pop(); + } + } + bLoadErrInfo = false; + } + + void init(); + void init(MCInst const&); + + // Checks performed. + bool checkBranches(); + bool checkPredicates(); + bool checkNewValues(); + bool checkRegisters(); + bool checkSolo(); + bool checkShuffle(); + bool checkSlots(); + + static void compoundRegisterMap(unsigned&); + + bool isPredicateRegister(unsigned R) const { + return (Hexagon::P0 == R || Hexagon::P1 == R || + Hexagon::P2 == R || Hexagon::P3 == R); + }; + bool isLoopRegister(unsigned R) const { + return (Hexagon::SA0 == R || Hexagon::LC0 == R || + Hexagon::SA1 == R || Hexagon::LC1 == R); + }; + + bool hasValidNewValueDef(const NewSense &Use, + const NewSenseList &Defs) const; + + public: + explicit HexagonMCChecker(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst& mcb, MCInst &mcbdx, + const MCRegisterInfo& ri); + + bool check(); + + /// add a new error/warning + void addErrInfo(HexagonMCErrInfo &err) { ErrInfoQ.push(err.s); }; + + /// Return the error code for the last operation in the insn bundle. + unsigned getError() { getErrInfo(); return CrntErrInfo.s.Error; }; + unsigned getWarning() { getErrInfo(); return CrntErrInfo.s.Warning; }; + unsigned getShuffleError() { getErrInfo(); return CrntErrInfo.s.ShuffleError; }; + unsigned getErrRegister() { getErrInfo(); return CrntErrInfo.s.Register; }; + bool getNextErrInfo() { + bLoadErrInfo = true; + return (ErrInfoQ.empty()) ? false : (getErrInfo(), true); + } +}; + +} + +#endif // HEXAGONMCCHECKER_H diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp index 9fc4e2aeaba6..c2c6275e7e8d 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp @@ -96,6 +96,12 @@ void HexagonMCCodeEmitter::EncodeSingleInstruction( assert(!HexagonMCInstrInfo::isBundle(HMB)); uint64_t Binary; + // Compound instructions are limited to using registers 0-7 and 16-23 + // and here we make a map 16-23 to 8-15 so they can be correctly encoded. + static unsigned RegMap[8] = {Hexagon::R8, Hexagon::R9, Hexagon::R10, + Hexagon::R11, Hexagon::R12, Hexagon::R13, + Hexagon::R14, Hexagon::R15}; + // Pseudo instructions don't get encoded and shouldn't be here // in the first place! assert(!HexagonMCInstrInfo::getDesc(MCII, HMB).isPseudo() && @@ -104,6 +110,16 @@ void HexagonMCCodeEmitter::EncodeSingleInstruction( " `" << HexagonMCInstrInfo::getName(MCII, HMB) << "'" "\n"); + if (llvm::HexagonMCInstrInfo::getType(MCII, HMB) == HexagonII::TypeCOMPOUND) { + for (unsigned i = 0; i < HMB.getNumOperands(); ++i) + if (HMB.getOperand(i).isReg()) { + unsigned Reg = + MCT.getRegisterInfo()->getEncodingValue(HMB.getOperand(i).getReg()); + if ((Reg <= 23) && (Reg >= 16)) + HMB.getOperand(i).setReg(RegMap[Reg - 16]); + } + } + if (HexagonMCInstrInfo::isNewValue(MCII, HMB)) { // Calculate the new value distance to the associated producer MCOperand &MCO = @@ -318,7 +334,7 @@ static Hexagon::Fixups getFixupNoBits(MCInstrInfo const &MCII, const MCInst &MI, // The only relocs left should be GP relative: default: if (MCID.mayStore() || MCID.mayLoad()) { - for (const uint16_t *ImpUses = MCID.getImplicitUses(); *ImpUses; + for (const MCPhysReg *ImpUses = MCID.getImplicitUses(); *ImpUses; ++ImpUses) { if (*ImpUses == Hexagon::GP) { switch (HexagonMCInstrInfo::getAccessSize(MCII, MI)) { @@ -389,10 +405,8 @@ unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI, return cast<MCConstantExpr>(ME)->getValue(); } if (MK == MCExpr::Binary) { - unsigned Res; - Res = getExprOpValue(MI, MO, cast<MCBinaryExpr>(ME)->getLHS(), Fixups, STI); - Res += - getExprOpValue(MI, MO, cast<MCBinaryExpr>(ME)->getRHS(), Fixups, STI); + getExprOpValue(MI, MO, cast<MCBinaryExpr>(ME)->getLHS(), Fixups, STI); + getExprOpValue(MI, MO, cast<MCBinaryExpr>(ME)->getRHS(), Fixups, STI); return 0; } diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp index 886f8db3bc63..d194bea3d8dc 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp @@ -115,8 +115,8 @@ unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) { SrcReg = MI.getOperand(1).getReg(); if ((Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) && HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && - MI.getOperand(2).isImm() && ((isUInt<5>(MI.getOperand(2).getImm())) || - (MI.getOperand(2).getImm() == -1))) + (HexagonMCInstrInfo::inRange<5>(MI, 2) || + HexagonMCInstrInfo::minConstant(MI, 2) == -1)) return HexagonII::HCG_A; break; case Hexagon::A2_tfr: @@ -134,8 +134,8 @@ unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) { return false; // Rd = #u6 DstReg = MI.getOperand(0).getReg(); - if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() <= 63 && - MI.getOperand(1).getImm() >= 0 && + if (HexagonMCInstrInfo::minConstant(MI, 1) <= 63 && + HexagonMCInstrInfo::minConstant(MI, 1) >= 0 && HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) return HexagonII::HCG_A; break; @@ -145,9 +145,8 @@ unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) { DstReg = MI.getOperand(0).getReg(); Src1Reg = MI.getOperand(1).getReg(); if ((Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) && - MI.getOperand(2).isImm() && HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) && - (MI.getOperand(2).getImm() == 0)) + HexagonMCInstrInfo::minConstant(MI, 2) == 0) return HexagonII::HCG_A; break; // The fact that .new form is used pretty much guarantees @@ -206,6 +205,8 @@ MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) { MCInst *CompoundInsn = 0; unsigned compoundOpcode; MCOperand Rs, Rt; + int64_t Value; + bool Success; switch (L.getOpcode()) { default: @@ -277,7 +278,10 @@ MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) { case Hexagon::C2_cmpeqi: DEBUG(dbgs() << "CX: C2_cmpeqi\n"); - if (L.getOperand(2).getImm() == -1) + Success = L.getOperand(2).getExpr()->evaluateAsAbsolute(Value); + (void)Success; + assert(Success); + if (Value == -1) compoundOpcode = cmpeqn1BitOpcode[getCompoundOp(R)]; else compoundOpcode = cmpeqiBitOpcode[getCompoundOp(R)]; @@ -286,14 +290,17 @@ MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) { CompoundInsn = new (Context) MCInst; CompoundInsn->setOpcode(compoundOpcode); CompoundInsn->addOperand(Rs); - if (L.getOperand(2).getImm() != -1) + if (Value != -1) CompoundInsn->addOperand(L.getOperand(2)); CompoundInsn->addOperand(R.getOperand(1)); break; case Hexagon::C2_cmpgti: DEBUG(dbgs() << "CX: C2_cmpgti\n"); - if (L.getOperand(2).getImm() == -1) + Success = L.getOperand(2).getExpr()->evaluateAsAbsolute(Value); + (void)Success; + assert(Success); + if (Value == -1) compoundOpcode = cmpgtn1BitOpcode[getCompoundOp(R)]; else compoundOpcode = cmpgtiBitOpcode[getCompoundOp(R)]; @@ -302,7 +309,7 @@ MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) { CompoundInsn = new (Context) MCInst; CompoundInsn->setOpcode(compoundOpcode); CompoundInsn->addOperand(Rs); - if (L.getOperand(2).getImm() != -1) + if (Value != -1) CompoundInsn->addOperand(L.getOperand(2)); CompoundInsn->addOperand(R.getOperand(1)); break; @@ -404,7 +411,7 @@ bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) { /// additional slot. void HexagonMCInstrInfo::tryCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) { - assert(MCI.getOpcode() == Hexagon::BUNDLE && + assert(HexagonMCInstrInfo::isBundle(MCI) && "Non-Bundle where Bundle expected"); // By definition a compound must have 2 insn. diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp index 7e9247cef6ad..e6194f61a6ba 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp @@ -26,7 +26,7 @@ using namespace Hexagon; #define DEBUG_TYPE "hexagon-mcduplex-info" // pair table of subInstructions with opcodes -static std::pair<unsigned, unsigned> opcodeData[] = { +static const std::pair<unsigned, unsigned> opcodeData[] = { std::make_pair((unsigned)V4_SA1_addi, 0), std::make_pair((unsigned)V4_SA1_addrx, 6144), std::make_pair((unsigned)V4_SA1_addsp, 3072), @@ -81,8 +81,7 @@ static std::pair<unsigned, unsigned> opcodeData[] = { std::make_pair((unsigned)V4_SS2_storewi1, 4352)}; static std::map<unsigned, unsigned> - subinstOpcodeMap(opcodeData, - opcodeData + sizeof(opcodeData) / sizeof(opcodeData[0])); + subinstOpcodeMap(std::begin(opcodeData), std::end(opcodeData)); bool HexagonMCInstrInfo::isDuplexPairMatch(unsigned Ga, unsigned Gb) { switch (Ga) { @@ -195,15 +194,13 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { // Special case this one from Group L2. // Rd = memw(r29+#u5:2) if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) { - if (HexagonMCInstrInfo::isIntReg(SrcReg) && Hexagon::R29 == SrcReg && - MCI.getOperand(2).isImm() && - isShiftedUInt<5, 2>(MCI.getOperand(2).getImm())) { + if (HexagonMCInstrInfo::isIntReg(SrcReg) && + Hexagon::R29 == SrcReg && inRange<5, 2>(MCI, 2)) { return HexagonII::HSIG_L2; } // Rd = memw(Rs+#u4:2) if (HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && - (MCI.getOperand(2).isImm() && - isShiftedUInt<4, 2>(MCI.getOperand(2).getImm()))) { + inRange<4, 2>(MCI, 2)) { return HexagonII::HSIG_L1; } } @@ -214,7 +211,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { SrcReg = MCI.getOperand(1).getReg(); if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && - MCI.getOperand(2).isImm() && isUInt<4>(MCI.getOperand(2).getImm())) { + inRange<4>(MCI, 2)) { return HexagonII::HSIG_L1; } break; @@ -235,8 +232,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { SrcReg = MCI.getOperand(1).getReg(); if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && - MCI.getOperand(2).isImm() && - isShiftedUInt<3, 1>(MCI.getOperand(2).getImm())) { + inRange<3, 1>(MCI, 2)) { return HexagonII::HSIG_L2; } break; @@ -246,7 +242,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { SrcReg = MCI.getOperand(1).getReg(); if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && - MCI.getOperand(2).isImm() && isUInt<3>(MCI.getOperand(2).getImm())) { + inRange<3>(MCI, 2)) { return HexagonII::HSIG_L2; } break; @@ -256,8 +252,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { SrcReg = MCI.getOperand(1).getReg(); if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) && HexagonMCInstrInfo::isIntReg(SrcReg) && Hexagon::R29 == SrcReg && - MCI.getOperand(2).isImm() && - isShiftedUInt<5, 3>(MCI.getOperand(2).getImm())) { + inRange<5, 3>(MCI, 2)) { return HexagonII::HSIG_L2; } break; @@ -326,15 +321,13 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { Src2Reg = MCI.getOperand(2).getReg(); if (HexagonMCInstrInfo::isIntReg(Src1Reg) && HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) && - Hexagon::R29 == Src1Reg && MCI.getOperand(1).isImm() && - isShiftedUInt<5, 2>(MCI.getOperand(1).getImm())) { + Hexagon::R29 == Src1Reg && inRange<5, 2>(MCI, 1)) { return HexagonII::HSIG_S2; } // memw(Rs+#u4:2) = Rt if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) && HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) && - MCI.getOperand(1).isImm() && - isShiftedUInt<4, 2>(MCI.getOperand(1).getImm())) { + inRange<4, 2>(MCI, 1)) { return HexagonII::HSIG_S1; } break; @@ -344,7 +337,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { Src2Reg = MCI.getOperand(2).getReg(); if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) && HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) && - MCI.getOperand(1).isImm() && isUInt<4>(MCI.getOperand(1).getImm())) { + inRange<4>(MCI, 1)) { return HexagonII::HSIG_S1; } break; @@ -363,8 +356,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { Src2Reg = MCI.getOperand(2).getReg(); if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) && HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) && - MCI.getOperand(1).isImm() && - isShiftedUInt<3, 1>(MCI.getOperand(1).getImm())) { + inRange<3, 1>(MCI, 1)) { return HexagonII::HSIG_S2; } break; @@ -374,8 +366,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { Src2Reg = MCI.getOperand(2).getReg(); if (HexagonMCInstrInfo::isDblRegForSubInst(Src2Reg) && HexagonMCInstrInfo::isIntReg(Src1Reg) && Hexagon::R29 == Src1Reg && - MCI.getOperand(1).isImm() && - isShiftedInt<6, 3>(MCI.getOperand(1).getImm())) { + inSRange<6, 3>(MCI, 1)) { return HexagonII::HSIG_S2; } break; @@ -383,9 +374,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { // memw(Rs+#u4:2) = #U1 Src1Reg = MCI.getOperand(0).getReg(); if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) && - MCI.getOperand(1).isImm() && - isShiftedUInt<4, 2>(MCI.getOperand(1).getImm()) && - MCI.getOperand(2).isImm() && isUInt<1>(MCI.getOperand(2).getImm())) { + inRange<4, 2>(MCI, 1) && inRange<1>(MCI, 2)) { return HexagonII::HSIG_S2; } break; @@ -393,16 +382,13 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { // memb(Rs+#u4) = #U1 Src1Reg = MCI.getOperand(0).getReg(); if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) && - MCI.getOperand(1).isImm() && isUInt<4>(MCI.getOperand(1).getImm()) && - MCI.getOperand(2).isImm() && isUInt<1>(MCI.getOperand(2).getImm())) { + inRange<4>(MCI, 1) && inRange<1>(MCI, 2)) { return HexagonII::HSIG_S2; } break; case Hexagon::S2_allocframe: - if (MCI.getOperand(0).isImm() && - isShiftedUInt<5, 3>(MCI.getOperand(0).getImm())) { + if (inRange<5, 3>(MCI, 0)) return HexagonII::HSIG_S2; - } break; // // Group A: @@ -428,8 +414,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) { // Rd = add(r29,#u6:2) if (HexagonMCInstrInfo::isIntReg(SrcReg) && Hexagon::R29 == SrcReg && - MCI.getOperand(2).isImm() && - isShiftedUInt<6, 2>(MCI.getOperand(2).getImm())) { + inRange<6, 2>(MCI, 2)) { return HexagonII::HSIG_A; } // Rx = add(Rx,#s7) @@ -439,8 +424,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { // Rd = add(Rs,#1) // Rd = add(Rs,#-1) if (HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && - MCI.getOperand(2).isImm() && ((MCI.getOperand(2).getImm() == 1) || - (MCI.getOperand(2).getImm() == -1))) { + (minConstant(MCI, 2) == 1 || minConstant(MCI, 2) == -1)) { return HexagonII::HSIG_A; } } @@ -460,8 +444,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { SrcReg = MCI.getOperand(1).getReg(); if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && - MCI.getOperand(2).isImm() && ((MCI.getOperand(2).getImm() == 1) || - (MCI.getOperand(2).getImm() == 255))) { + (minConstant(MCI, 2) == 1 || minConstant(MCI, 2) == 255)) { return HexagonII::HSIG_A; } break; @@ -491,8 +474,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { DstReg = MCI.getOperand(0).getReg(); // Rd PredReg = MCI.getOperand(1).getReg(); // P0 if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && - Hexagon::P0 == PredReg && MCI.getOperand(2).isImm() && - MCI.getOperand(2).getImm() == 0) { + Hexagon::P0 == PredReg && minConstant(MCI, 2) == 0) { return HexagonII::HSIG_A; } break; @@ -502,7 +484,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { SrcReg = MCI.getOperand(1).getReg(); if (Hexagon::P0 == DstReg && HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && - MCI.getOperand(2).isImm() && isUInt<2>(MCI.getOperand(2).getImm())) { + inRange<2>(MCI, 2)) { return HexagonII::HSIG_A; } break; @@ -511,10 +493,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { // Rdd = combine(#u2,#U2) DstReg = MCI.getOperand(0).getReg(); if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) && - // TODO: Handle Globals/Symbols - (MCI.getOperand(1).isImm() && isUInt<2>(MCI.getOperand(1).getImm())) && - ((MCI.getOperand(2).isImm() && - isUInt<2>(MCI.getOperand(2).getImm())))) { + inRange<2>(MCI, 1) && inRange<2>(MCI, 2)) { return HexagonII::HSIG_A; } break; @@ -524,7 +503,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { SrcReg = MCI.getOperand(1).getReg(); if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) && HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && - (MCI.getOperand(2).isImm() && MCI.getOperand(2).getImm() == 0)) { + minConstant(MCI, 2) == 0) { return HexagonII::HSIG_A; } break; @@ -534,7 +513,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { SrcReg = MCI.getOperand(2).getReg(); if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) && HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && - (MCI.getOperand(1).isImm() && MCI.getOperand(1).getImm() == 0)) { + minConstant(MCI, 1) == 0) { return HexagonII::HSIG_A; } break; @@ -556,19 +535,17 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { } bool HexagonMCInstrInfo::subInstWouldBeExtended(MCInst const &potentialDuplex) { - unsigned DstReg, SrcReg; - switch (potentialDuplex.getOpcode()) { case Hexagon::A2_addi: // testing for case of: Rx = add(Rx,#s7) DstReg = potentialDuplex.getOperand(0).getReg(); SrcReg = potentialDuplex.getOperand(1).getReg(); if (DstReg == SrcReg && HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) { - if (potentialDuplex.getOperand(2).isExpr()) + int64_t Value; + if (!potentialDuplex.getOperand(2).getExpr()->evaluateAsAbsolute(Value)) return true; - if (potentialDuplex.getOperand(2).isImm() && - !(isShiftedInt<7, 0>(potentialDuplex.getOperand(2).getImm()))) + if (!isShiftedInt<7, 0>(Value)) return true; } break; @@ -576,15 +553,14 @@ bool HexagonMCInstrInfo::subInstWouldBeExtended(MCInst const &potentialDuplex) { DstReg = potentialDuplex.getOperand(0).getReg(); if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) { - if (potentialDuplex.getOperand(1).isExpr()) + int64_t Value; + if (!potentialDuplex.getOperand(1).getExpr()->evaluateAsAbsolute(Value)) return true; // Check for case of Rd = #-1. - if (potentialDuplex.getOperand(1).isImm() && - (potentialDuplex.getOperand(1).getImm() == -1)) + if (Value == -1) return false; // Check for case of Rd = #u6. - if (potentialDuplex.getOperand(1).isImm() && - !isShiftedUInt<6, 0>(potentialDuplex.getOperand(1).getImm())) + if (!isShiftedUInt<6, 0>(Value)) return true; } break; @@ -712,19 +688,23 @@ inline static void addOps(MCInst &subInstPtr, MCInst const &Inst, MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) { MCInst Result; + bool Absolute; + int64_t Value; switch (Inst.getOpcode()) { default: // dbgs() << "opcode: "<< Inst->getOpcode() << "\n"; llvm_unreachable("Unimplemented subinstruction \n"); break; case Hexagon::A2_addi: - if (Inst.getOperand(2).isImm() && Inst.getOperand(2).getImm() == 1) { + Absolute = Inst.getOperand(2).getExpr()->evaluateAsAbsolute(Value); + assert(Absolute);(void)Absolute; + if (Value == 1) { Result.setOpcode(Hexagon::V4_SA1_inc); addOps(Result, Inst, 0); addOps(Result, Inst, 1); break; } // 1,2 SUBInst $Rd = add($Rs, #1) - else if (Inst.getOperand(2).isImm() && Inst.getOperand(2).getImm() == -1) { + else if (Value == -1) { Result.setOpcode(Hexagon::V4_SA1_dec); addOps(Result, Inst, 0); addOps(Result, Inst, 1); @@ -754,7 +734,7 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) { addOps(Result, Inst, 0); break; // 1 SUBInst allocframe(#$u5_3) case Hexagon::A2_andir: - if (Inst.getOperand(2).getImm() == 255) { + if (minConstant(Inst, 2) == 255) { Result.setOpcode(Hexagon::V4_SA1_zxtb); addOps(Result, Inst, 0); addOps(Result, Inst, 1); @@ -772,26 +752,27 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) { break; // 2,3 SUBInst p0 = cmp.eq($Rs, #$u2) case Hexagon::A4_combineii: case Hexagon::A2_combineii: - if (Inst.getOperand(1).getImm() == 1) { + Absolute = Inst.getOperand(1).getExpr()->evaluateAsAbsolute(Value); + assert(Absolute);(void)Absolute; + if (Value == 1) { Result.setOpcode(Hexagon::V4_SA1_combine1i); addOps(Result, Inst, 0); addOps(Result, Inst, 2); break; // 1,3 SUBInst $Rdd = combine(#1, #$u2) } - - if (Inst.getOperand(1).getImm() == 3) { + if (Value == 3) { Result.setOpcode(Hexagon::V4_SA1_combine3i); addOps(Result, Inst, 0); addOps(Result, Inst, 2); break; // 1,3 SUBInst $Rdd = combine(#3, #$u2) } - if (Inst.getOperand(1).getImm() == 0) { + if (Value == 0) { Result.setOpcode(Hexagon::V4_SA1_combine0i); addOps(Result, Inst, 0); addOps(Result, Inst, 2); break; // 1,3 SUBInst $Rdd = combine(#0, #$u2) } - if (Inst.getOperand(1).getImm() == 2) { + if (Value == 2) { Result.setOpcode(Hexagon::V4_SA1_combine2i); addOps(Result, Inst, 0); addOps(Result, Inst, 2); @@ -894,12 +875,14 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) { break; // 1,2,3 SUBInst $Rd = memw($Rs + #$u4_2) } case Hexagon::S4_storeirb_io: - if (Inst.getOperand(2).getImm() == 0) { + Absolute = Inst.getOperand(2).getExpr()->evaluateAsAbsolute(Value); + assert(Absolute);(void)Absolute; + if (Value == 0) { Result.setOpcode(Hexagon::V4_SS2_storebi0); addOps(Result, Inst, 0); addOps(Result, Inst, 1); break; // 1,2 SUBInst memb($Rs + #$u4_0)=#0 - } else if (Inst.getOperand(2).getImm() == 1) { + } else if (Value == 1) { Result.setOpcode(Hexagon::V4_SS2_storebi1); addOps(Result, Inst, 0); addOps(Result, Inst, 1); @@ -923,12 +906,14 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) { addOps(Result, Inst, 2); break; // 1,2,3 SUBInst memb($Rs + #$u4_0) = $Rt case Hexagon::S4_storeiri_io: - if (Inst.getOperand(2).getImm() == 0) { + Absolute = Inst.getOperand(2).getExpr()->evaluateAsAbsolute(Value); + assert(Absolute);(void)Absolute; + if (Value == 0) { Result.setOpcode(Hexagon::V4_SS2_storewi0); addOps(Result, Inst, 0); addOps(Result, Inst, 1); break; // 3 1,2 SUBInst memw($Rs + #$u4_2)=#0 - } else if (Inst.getOperand(2).getImm() == 1) { + } else if (Value == 1) { Result.setOpcode(Hexagon::V4_SS2_storewi1); addOps(Result, Inst, 0); addOps(Result, Inst, 1); @@ -983,7 +968,8 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) { addOps(Result, Inst, 0); break; // 2 SUBInst if (p0) $Rd = #0 case Hexagon::A2_tfrsi: - if (Inst.getOperand(1).isImm() && Inst.getOperand(1).getImm() == -1) { + Absolute = Inst.getOperand(1).getExpr()->evaluateAsAbsolute(Value); + if (Absolute && Value == -1) { Result.setOpcode(Hexagon::V4_SA1_setin1); addOps(Result, Inst, 0); break; // 2 1 SUBInst $Rd = #-1 @@ -1044,6 +1030,8 @@ HexagonMCInstrInfo::getDuplexPossibilties(MCInstrInfo const &MCII, << "\n"); bisReversable = false; } + if (HexagonMCInstrInfo::isMemReorderDisabled(MCB)) // }:mem_noshuf + bisReversable = false; // Try in order. if (isOrderedDuplexPair( diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp index bf51c3515e95..eaa3550d07f6 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp @@ -37,9 +37,7 @@ static cl::opt<unsigned> void HexagonMCELFStreamer::EmitInstruction(const MCInst &MCK, const MCSubtargetInfo &STI) { - MCInst HMI; - HMI.setOpcode(Hexagon::BUNDLE); - HMI.addOperand(MCOperand::createImm(0)); + MCInst HMI = HexagonMCInstrInfo::createBundle(); MCInst *MCB; if (MCK.getOpcode() != Hexagon::BUNDLE) { @@ -50,7 +48,7 @@ void HexagonMCELFStreamer::EmitInstruction(const MCInst &MCK, // Examines packet and pad the packet, if needed, when an // end-loop is in the bundle. - HexagonMCInstrInfo::padEndloop(*MCB); + HexagonMCInstrInfo::padEndloop(getContext(), *MCB); HexagonMCShuffle(*MCII, STI, *MCB); assert(HexagonMCInstrInfo::bundleSize(*MCB) <= HEXAGON_PACKET_SIZE); @@ -60,9 +58,9 @@ void HexagonMCELFStreamer::EmitInstruction(const MCInst &MCK, if (Extended) { if (HexagonMCInstrInfo::isDuplex(*MCII, *MCI)) { MCInst *SubInst = const_cast<MCInst *>(MCI->getOperand(1).getInst()); - HexagonMCInstrInfo::clampExtended(*MCII, *SubInst); + HexagonMCInstrInfo::clampExtended(*MCII, getContext(), *SubInst); } else { - HexagonMCInstrInfo::clampExtended(*MCII, *MCI); + HexagonMCInstrInfo::clampExtended(*MCII, getContext(), *MCI); } Extended = false; } else { @@ -114,7 +112,7 @@ void HexagonMCELFStreamer::HexagonMCEmitCommonSymbol(MCSymbol *Symbol, MCSection *Section = getAssembler().getContext().getELFSection( SectionName, ELF::SHT_NOBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC); SwitchSection(Section); - AssignSection(Symbol, Section); + AssignFragment(Symbol, getCurrentFragment()); MCELFStreamer::EmitCommonSymbol(Symbol, Size, ByteAlignment); SwitchSection(CrntSection); diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp new file mode 100644 index 000000000000..fc6262657514 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp @@ -0,0 +1,49 @@ +//===-- HexagonMCExpr.cpp - Hexagon specific MC expression classes +//----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "HexagonMCExpr.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "hexagon-mcexpr" + +HexagonNoExtendOperand *HexagonNoExtendOperand::Create(MCExpr const *Expr, + MCContext &Ctx) { + return new (Ctx) HexagonNoExtendOperand(Expr); +} + +bool HexagonNoExtendOperand::evaluateAsRelocatableImpl( + MCValue &Res, MCAsmLayout const *Layout, MCFixup const *Fixup) const { + return Expr->evaluateAsRelocatable(Res, Layout, Fixup); +} + +void HexagonNoExtendOperand::visitUsedExpr(MCStreamer &Streamer) const {} + +MCFragment *llvm::HexagonNoExtendOperand::findAssociatedFragment() const { + return Expr->findAssociatedFragment(); +} + +void HexagonNoExtendOperand::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {} + +MCExpr const *HexagonNoExtendOperand::getExpr() const { return Expr; } + +bool HexagonNoExtendOperand::classof(MCExpr const *E) { + return E->getKind() == MCExpr::Target; +} + +HexagonNoExtendOperand::HexagonNoExtendOperand(MCExpr const *Expr) + : Expr(Expr) {} + +void HexagonNoExtendOperand::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { + Expr->print(OS, MAI); +} diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h new file mode 100644 index 000000000000..60f180fb2bc4 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h @@ -0,0 +1,35 @@ +//==- HexagonMCExpr.h - Hexagon specific MC expression classes --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONMCEXPR_H +#define LLVM_LIB_TARGET_HEXAGON_HEXAGONMCEXPR_H + +#include "llvm/MC/MCExpr.h" + +namespace llvm { +class MCInst; +class HexagonNoExtendOperand : public MCTargetExpr { +public: + static HexagonNoExtendOperand *Create(MCExpr const *Expr, MCContext &Ctx); + void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override; + bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, + const MCFixup *Fixup) const override; + void visitUsedExpr(MCStreamer &Streamer) const override; + MCFragment *findAssociatedFragment() const override; + void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override; + static bool classof(MCExpr const *E); + MCExpr const *getExpr() const; + +private: + HexagonNoExtendOperand(MCExpr const *Expr); + MCExpr const *Expr; +}; +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONMCEXPR_H diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp index 48b15f85a783..e6842076db2a 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp @@ -15,17 +15,37 @@ #include "Hexagon.h" #include "HexagonBaseInfo.h" +#include "HexagonMCChecker.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCSubtargetInfo.h" namespace llvm { +void HexagonMCInstrInfo::addConstant(MCInst &MI, uint64_t Value, + MCContext &Context) { + MI.addOperand(MCOperand::createExpr(MCConstantExpr::create(Value, Context))); +} + +void HexagonMCInstrInfo::addConstExtender(MCContext &Context, + MCInstrInfo const &MCII, MCInst &MCB, + MCInst const &MCI) { + assert(HexagonMCInstrInfo::isBundle(MCB)); + MCOperand const &exOp = + MCI.getOperand(HexagonMCInstrInfo::getExtendableOp(MCII, MCI)); + + // Create the extender. + MCInst *XMCI = + new (Context) MCInst(HexagonMCInstrInfo::deriveExtender(MCII, MCI, exOp)); + + MCB.addOperand(MCOperand::createInst(XMCI)); +} + iterator_range<MCInst::const_iterator> HexagonMCInstrInfo::bundleInstructions(MCInst const &MCI) { assert(isBundle(MCI)); - return iterator_range<MCInst::const_iterator>( - MCI.begin() + bundleInstructionsOffset, MCI.end()); + return make_range(MCI.begin() + bundleInstructionsOffset, MCI.end()); } size_t HexagonMCInstrInfo::bundleSize(MCInst const &MCI) { @@ -35,7 +55,40 @@ size_t HexagonMCInstrInfo::bundleSize(MCInst const &MCI) { return (1); } -void HexagonMCInstrInfo::clampExtended(MCInstrInfo const &MCII, MCInst &MCI) { +bool HexagonMCInstrInfo::canonicalizePacket(MCInstrInfo const &MCII, + MCSubtargetInfo const &STI, + MCContext &Context, MCInst &MCB, + HexagonMCChecker *Check) { + // Examine the packet and convert pairs of instructions to compound + // instructions when possible. + if (!HexagonDisableCompound) + HexagonMCInstrInfo::tryCompound(MCII, Context, MCB); + // Check the bundle for errors. + bool CheckOk = Check ? Check->check() : true; + if (!CheckOk) + return false; + HexagonMCShuffle(MCII, STI, MCB); + // Examine the packet and convert pairs of instructions to duplex + // instructions when possible. + MCInst InstBundlePreDuplex = MCInst(MCB); + if (!HexagonDisableDuplex) { + SmallVector<DuplexCandidate, 8> possibleDuplexes; + possibleDuplexes = HexagonMCInstrInfo::getDuplexPossibilties(MCII, MCB); + HexagonMCShuffle(MCII, STI, Context, MCB, possibleDuplexes); + } + // Examines packet and pad the packet, if needed, when an + // end-loop is in the bundle. + HexagonMCInstrInfo::padEndloop(Context, MCB); + // If compounding and duplexing didn't reduce the size below + // 4 or less we have a packet that is too big. + if (HexagonMCInstrInfo::bundleSize(MCB) > HEXAGON_PACKET_SIZE) + return false; + HexagonMCShuffle(MCII, STI, MCB); + return true; +} + +void HexagonMCInstrInfo::clampExtended(MCInstrInfo const &MCII, + MCContext &Context, MCInst &MCI) { assert(HexagonMCInstrInfo::isExtendable(MCII, MCI) || HexagonMCInstrInfo::isExtended(MCII, MCI)); MCOperand &exOp = @@ -43,13 +96,20 @@ void HexagonMCInstrInfo::clampExtended(MCInstrInfo const &MCII, MCInst &MCI) { // If the extended value is a constant, then use it for the extended and // for the extender instructions, masking off the lower 6 bits and // including the assumed bits. - if (exOp.isImm()) { + int64_t Value; + if (exOp.getExpr()->evaluateAsAbsolute(Value)) { unsigned Shift = HexagonMCInstrInfo::getExtentAlignment(MCII, MCI); - int64_t Bits = exOp.getImm(); - exOp.setImm((Bits & 0x3f) << Shift); + exOp.setExpr(MCConstantExpr::create((Value & 0x3f) << Shift, Context)); } } +MCInst HexagonMCInstrInfo::createBundle() { + MCInst Result; + Result.setOpcode(Hexagon::BUNDLE); + Result.addOperand(MCOperand::createImm(0)); + return Result; +} + MCInst *HexagonMCInstrInfo::deriveDuplex(MCContext &Context, unsigned iClass, MCInst const &inst0, MCInst const &inst1) { @@ -64,6 +124,27 @@ MCInst *HexagonMCInstrInfo::deriveDuplex(MCContext &Context, unsigned iClass, return duplexInst; } +MCInst HexagonMCInstrInfo::deriveExtender(MCInstrInfo const &MCII, + MCInst const &Inst, + MCOperand const &MO) { + assert(HexagonMCInstrInfo::isExtendable(MCII, Inst) || + HexagonMCInstrInfo::isExtended(MCII, Inst)); + + MCInstrDesc const &Desc = HexagonMCInstrInfo::getDesc(MCII, Inst); + MCInst XMI; + XMI.setOpcode((Desc.isBranch() || Desc.isCall() || + HexagonMCInstrInfo::getType(MCII, Inst) == HexagonII::TypeCR) + ? Hexagon::A4_ext_b + : Hexagon::A4_ext); + if (MO.isImm()) + XMI.addOperand(MCOperand::createImm(MO.getImm() & (~0x3f))); + else if (MO.isExpr()) + XMI.addOperand(MCOperand::createExpr(MO.getExpr())); + else + llvm_unreachable("invalid extendable operand"); + return XMI; +} + MCInst const *HexagonMCInstrInfo::extenderForIndex(MCInst const &MCB, size_t Index) { assert(Index <= bundleSize(MCB)); @@ -76,6 +157,13 @@ MCInst const *HexagonMCInstrInfo::extenderForIndex(MCInst const &MCB, return nullptr; } +void HexagonMCInstrInfo::extendIfNeeded(MCContext &Context, + MCInstrInfo const &MCII, MCInst &MCB, + MCInst const &MCI, bool MustExtend) { + if (isConstExtended(MCII, MCI) || MustExtend) + addConstExtender(Context, MCII, MCB, MCI); +} + HexagonII::MemAccessSize HexagonMCInstrInfo::getAccessSize(MCInstrInfo const &MCII, MCInst const &MCI) { const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; @@ -186,6 +274,25 @@ MCOperand const &HexagonMCInstrInfo::getNewValueOperand(MCInstrInfo const &MCII, return (MCO); } +/// Return the new value or the newly produced value. +unsigned short HexagonMCInstrInfo::getNewValueOp2(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::NewValueOpPos2) & HexagonII::NewValueOpMask2); +} + +MCOperand const & +HexagonMCInstrInfo::getNewValueOperand2(MCInstrInfo const &MCII, + MCInst const &MCI) { + unsigned O = HexagonMCInstrInfo::getNewValueOp2(MCII, MCI); + MCOperand const &MCO = MCI.getOperand(O); + + assert((HexagonMCInstrInfo::isNewValue(MCII, MCI) || + HexagonMCInstrInfo::hasNewValue2(MCII, MCI)) && + MCO.isReg()); + return (MCO); +} + int HexagonMCInstrInfo::getSubTarget(MCInstrInfo const &MCII, MCInst const &MCI) { const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; @@ -242,6 +349,13 @@ bool HexagonMCInstrInfo::hasNewValue(MCInstrInfo const &MCII, return ((F >> HexagonII::hasNewValuePos) & HexagonII::hasNewValueMask); } +/// Return whether the insn produces a second value. +bool HexagonMCInstrInfo::hasNewValue2(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::hasNewValuePos2) & HexagonII::hasNewValueMask2); +} + MCInst const &HexagonMCInstrInfo::instruction(MCInst const &MCB, size_t Index) { assert(isBundle(MCB)); assert(Index < HEXAGON_PACKET_SIZE); @@ -261,6 +375,11 @@ bool HexagonMCInstrInfo::isCanon(MCInstrInfo const &MCII, MCInst const &MCI) { HexagonMCInstrInfo::getType(MCII, MCI) != HexagonII::TypeENDLOOP); } +bool HexagonMCInstrInfo::isCompound(MCInstrInfo const &MCII, + MCInst const &MCI) { + return (getType(MCII, MCI) == HexagonII::TypeCOMPOUND); +} + bool HexagonMCInstrInfo::isDblRegForSubInst(unsigned Reg) { return ((Reg >= Hexagon::D0 && Reg <= Hexagon::D3) || (Reg >= Hexagon::D8 && Reg <= Hexagon::D11)); @@ -282,14 +401,21 @@ bool HexagonMCInstrInfo::isConstExtended(MCInstrInfo const &MCII, MCInst const &MCI) { if (HexagonMCInstrInfo::isExtended(MCII, MCI)) return true; - - if (!HexagonMCInstrInfo::isExtendable(MCII, MCI)) + // Branch insns are handled as necessary by relaxation. + if ((HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeJ) || + (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCOMPOUND && + HexagonMCInstrInfo::getDesc(MCII, MCI).isBranch()) || + (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeNV && + HexagonMCInstrInfo::getDesc(MCII, MCI).isBranch())) + return false; + // Otherwise loop instructions and other CR insts are handled by relaxation + else if ((HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCR) && + (MCI.getOpcode() != Hexagon::C4_addipc)) + return false; + else if (!HexagonMCInstrInfo::isExtendable(MCII, MCI)) return false; - short ExtOpNum = HexagonMCInstrInfo::getCExtOpNum(MCII, MCI); - int MinValue = HexagonMCInstrInfo::getMinValue(MCII, MCI); - int MaxValue = HexagonMCInstrInfo::getMaxValue(MCII, MCI); - MCOperand const &MO = MCI.getOperand(ExtOpNum); + MCOperand const &MO = HexagonMCInstrInfo::getExtendableOperand(MCII, MCI); // We could be using an instruction with an extendable immediate and shoehorn // a global address into it. If it is a global address it will be constant @@ -297,15 +423,13 @@ bool HexagonMCInstrInfo::isConstExtended(MCInstrInfo const &MCII, // We currently only handle isGlobal() because it is the only kind of // object we are going to end up with here for now. // In the future we probably should add isSymbol(), etc. - if (MO.isExpr()) + assert(!MO.isImm()); + int64_t Value; + if (!MO.getExpr()->evaluateAsAbsolute(Value)) return true; - - // If the extendable operand is not 'Immediate' type, the instruction should - // have 'isExtended' flag set. - assert(MO.isImm() && "Extendable operand must be Immediate type"); - - int ImmValue = MO.getImm(); - return (ImmValue < MinValue || ImmValue > MaxValue); + int MinValue = HexagonMCInstrInfo::getMinValue(MCII, MCI); + int MaxValue = HexagonMCInstrInfo::getMaxValue(MCII, MCI); + return (MinValue > Value || Value > MaxValue); } bool HexagonMCInstrInfo::isExtendable(MCInstrInfo const &MCII, @@ -374,6 +498,19 @@ bool HexagonMCInstrInfo::isPredicated(MCInstrInfo const &MCII, return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask); } +bool HexagonMCInstrInfo::isPredicateLate(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return (F >> HexagonII::PredicateLatePos & HexagonII::PredicateLateMask); +} + +/// Return whether the insn is newly predicated. +bool HexagonMCInstrInfo::isPredicatedNew(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask); +} + bool HexagonMCInstrInfo::isPredicatedTrue(MCInstrInfo const &MCII, MCInst const &MCI) { const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; @@ -394,6 +531,18 @@ bool HexagonMCInstrInfo::isSolo(MCInstrInfo const &MCII, MCInst const &MCI) { return ((F >> HexagonII::SoloPos) & HexagonII::SoloMask); } +bool HexagonMCInstrInfo::isMemReorderDisabled(MCInst const &MCI) { + assert(isBundle(MCI)); + auto Flags = MCI.getOperand(0).getImm(); + return (Flags & memReorderDisabledMask) != 0; +} + +bool HexagonMCInstrInfo::isMemStoreReorderEnabled(MCInst const &MCI) { + assert(isBundle(MCI)); + auto Flags = MCI.getOperand(0).getImm(); + return (Flags & memStoreReorderEnabledMask) != 0; +} + bool HexagonMCInstrInfo::isSoloAX(MCInstrInfo const &MCII, MCInst const &MCI) { const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; return ((F >> HexagonII::SoloAXPos) & HexagonII::SoloAXMask); @@ -405,7 +554,28 @@ bool HexagonMCInstrInfo::isSoloAin1(MCInstrInfo const &MCII, return ((F >> HexagonII::SoloAin1Pos) & HexagonII::SoloAin1Mask); } -void HexagonMCInstrInfo::padEndloop(MCInst &MCB) { +bool HexagonMCInstrInfo::isVector(MCInstrInfo const &MCII, MCInst const &MCI) { + if ((getType(MCII, MCI) <= HexagonII::TypeCVI_LAST) && + (getType(MCII, MCI) >= HexagonII::TypeCVI_FIRST)) + return true; + return false; +} + +int64_t HexagonMCInstrInfo::minConstant(MCInst const &MCI, size_t Index) { + auto Sentinal = static_cast<int64_t>(std::numeric_limits<uint32_t>::max()) + << 8; + if (MCI.size() <= Index) + return Sentinal; + MCOperand const &MCO = MCI.getOperand(Index); + if (!MCO.isExpr()) + return Sentinal; + int64_t Value; + if (!MCO.getExpr()->evaluateAsAbsolute(Value)) + return Sentinal; + return Value; +} + +void HexagonMCInstrInfo::padEndloop(MCContext &Context, MCInst &MCB) { MCInst Nop; Nop.setOpcode(Hexagon::A2_nop); assert(isBundle(MCB)); @@ -413,7 +583,7 @@ void HexagonMCInstrInfo::padEndloop(MCInst &MCB) { (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_INNER_SIZE)) || ((HexagonMCInstrInfo::isOuterLoop(MCB) && (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_OUTER_SIZE)))) - MCB.addOperand(MCOperand::createInst(new MCInst(Nop))); + MCB.addOperand(MCOperand::createInst(new (Context) MCInst(Nop))); } bool HexagonMCInstrInfo::prefersSlot3(MCInstrInfo const &MCII, @@ -456,6 +626,20 @@ void HexagonMCInstrInfo::setInnerLoop(MCInst &MCI) { Operand.setImm(Operand.getImm() | innerLoopMask); } +void HexagonMCInstrInfo::setMemReorderDisabled(MCInst &MCI) { + assert(isBundle(MCI)); + MCOperand &Operand = MCI.getOperand(0); + Operand.setImm(Operand.getImm() | memReorderDisabledMask); + assert(isMemReorderDisabled(MCI)); +} + +void HexagonMCInstrInfo::setMemStoreReorderEnabled(MCInst &MCI) { + assert(isBundle(MCI)); + MCOperand &Operand = MCI.getOperand(0); + Operand.setImm(Operand.getImm() | memStoreReorderEnabledMask); + assert(isMemStoreReorderEnabled(MCI)); +} + void HexagonMCInstrInfo::setOuterLoop(MCInst &MCI) { assert(isBundle(MCI)); MCOperand &Operand = MCI.getOperand(0); diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h index 32d61a4a7be5..0237b2884a3b 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h @@ -14,9 +14,11 @@ #ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H #define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H +#include "HexagonMCExpr.h" #include "llvm/MC/MCInst.h" namespace llvm { +class HexagonMCChecker; class MCContext; class MCInstrDesc; class MCInstrInfo; @@ -39,20 +41,47 @@ int64_t const innerLoopMask = 1 << innerLoopOffset; size_t const outerLoopOffset = 1; int64_t const outerLoopMask = 1 << outerLoopOffset; +// do not reorder memory load/stores by default load/stores are re-ordered +// and by default loads can be re-ordered +size_t const memReorderDisabledOffset = 2; +int64_t const memReorderDisabledMask = 1 << memReorderDisabledOffset; + +// allow re-ordering of memory stores by default stores cannot be re-ordered +size_t const memStoreReorderEnabledOffset = 3; +int64_t const memStoreReorderEnabledMask = 1 << memStoreReorderEnabledOffset; + size_t const bundleInstructionsOffset = 1; +void addConstant(MCInst &MI, uint64_t Value, MCContext &Context); +void addConstExtender(MCContext &Context, MCInstrInfo const &MCII, MCInst &MCB, + MCInst const &MCI); + // Returns a iterator range of instructions in this bundle iterator_range<MCInst::const_iterator> bundleInstructions(MCInst const &MCI); // Returns the number of instructions in the bundle size_t bundleSize(MCInst const &MCI); +// Put the packet in to canonical form, compound, duplex, pad, and shuffle +bool canonicalizePacket(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, + MCContext &Context, MCInst &MCB, + HexagonMCChecker *Checker); + // Clamp off upper 26 bits of extendable operand for emission -void clampExtended(MCInstrInfo const &MCII, MCInst &MCI); +void clampExtended(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI); + +MCInst createBundle(); + +// Return the extender for instruction at Index or nullptr if none +MCInst const *extenderForIndex(MCInst const &MCB, size_t Index); +void extendIfNeeded(MCContext &Context, MCInstrInfo const &MCII, MCInst &MCB, + MCInst const &MCI, bool MustExtend); // Create a duplex instruction given the two subinsts MCInst *deriveDuplex(MCContext &Context, unsigned iClass, MCInst const &inst0, MCInst const &inst1); +MCInst deriveExtender(MCInstrInfo const &MCII, MCInst const &Inst, + MCOperand const &MO); // Convert this instruction in to a duplex subinst MCInst deriveSubInst(MCInst const &Inst); @@ -108,6 +137,9 @@ unsigned short getNewValueOp(MCInstrInfo const &MCII, MCInst const &MCI); // Return the operand that consumes or produces a new value. MCOperand const &getNewValueOperand(MCInstrInfo const &MCII, MCInst const &MCI); +unsigned short getNewValueOp2(MCInstrInfo const &MCII, MCInst const &MCI); +MCOperand const &getNewValueOperand2(MCInstrInfo const &MCII, + MCInst const &MCI); int getSubTarget(MCInstrInfo const &MCII, MCInst const &MCI); @@ -125,6 +157,7 @@ bool hasImmExt(MCInst const &MCI); // Return whether the instruction is a legal new-value producer. bool hasNewValue(MCInstrInfo const &MCII, MCInst const &MCI); +bool hasNewValue2(MCInstrInfo const &MCII, MCInst const &MCI); // Return the instruction at Index MCInst const &instruction(MCInst const &MCB, size_t Index); @@ -134,10 +167,24 @@ bool isBundle(MCInst const &MCI); // Return whether the insn is an actual insn. bool isCanon(MCInstrInfo const &MCII, MCInst const &MCI); +bool isCompound(MCInstrInfo const &MCII, MCInst const &MCI); // Return the duplex iclass given the two duplex classes unsigned iClassOfDuplexPair(unsigned Ga, unsigned Gb); +int64_t minConstant(MCInst const &MCI, size_t Index); +template <unsigned N, unsigned S> +bool inRange(MCInst const &MCI, size_t Index) { + return isShiftedUInt<N, S>(minConstant(MCI, Index)); +} +template <unsigned N, unsigned S> +bool inSRange(MCInst const &MCI, size_t Index) { + return isShiftedInt<N, S>(minConstant(MCI, Index)); +} +template <unsigned N> bool inRange(MCInst const &MCI, size_t Index) { + return isUInt<N>(minConstant(MCI, Index)); +} + // Return whether the instruction needs to be constant extended. bool isConstExtended(MCInstrInfo const &MCII, MCInst const &MCI); @@ -173,6 +220,8 @@ bool isIntReg(unsigned Reg); // Is this register suitable for use in a duplex subinst bool isIntRegForSubInst(unsigned Reg); +bool isMemReorderDisabled(MCInst const &MCI); +bool isMemStoreReorderEnabled(MCInst const &MCI); // Return whether the insn is a new-value consumer. bool isNewValue(MCInstrInfo const &MCII, MCInst const &MCI); @@ -191,6 +240,8 @@ bool isOuterLoop(MCInst const &MCI); // Return whether this instruction is predicated bool isPredicated(MCInstrInfo const &MCII, MCInst const &MCI); +bool isPredicateLate(MCInstrInfo const &MCII, MCInst const &MCI); +bool isPredicatedNew(MCInstrInfo const &MCII, MCInst const &MCI); // Return whether the predicate sense is true bool isPredicatedTrue(MCInstrInfo const &MCII, MCInst const &MCI); @@ -209,9 +260,10 @@ bool isSoloAX(MCInstrInfo const &MCII, MCInst const &MCI); /// Return whether the insn can be packaged only with an A-type insn in slot #1. bool isSoloAin1(MCInstrInfo const &MCII, MCInst const &MCI); +bool isVector(MCInstrInfo const &MCII, MCInst const &MCI); // Pad the bundle with nops to satisfy endloop requirements -void padEndloop(MCInst &MCI); +void padEndloop(MCContext &Context, MCInst &MCI); bool prefersSlot3(MCInstrInfo const &MCII, MCInst const &MCI); @@ -220,6 +272,8 @@ void replaceDuplex(MCContext &Context, MCInst &MCB, DuplexCandidate Candidate); // Marks a bundle as endloop0 void setInnerLoop(MCInst &MCI); +void setMemReorderDisabled(MCInst &MCI); +void setMemStoreReorderEnabled(MCInst &MCI); // Marks a bundle as endloop1 void setOuterLoop(MCInst &MCI); diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index 53305d85fd80..9a292577a8f3 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -40,6 +40,20 @@ using namespace llvm; #define GET_REGINFO_MC_DESC #include "HexagonGenRegisterInfo.inc" +cl::opt<bool> llvm::HexagonDisableCompound + ("mno-compound", + cl::desc("Disable looking for compound instructions for Hexagon")); + +cl::opt<bool> llvm::HexagonDisableDuplex + ("mno-pairing", + cl::desc("Disable looking for duplex instructions for Hexagon")); + +StringRef HEXAGON_MC::selectHexagonCPU(const Triple &TT, StringRef CPU) { + if (CPU.empty()) + CPU = "hexagonv60"; + return CPU; +} + MCInstrInfo *llvm::createHexagonMCInstrInfo() { MCInstrInfo *X = new MCInstrInfo(); InitHexagonMCInstrInfo(X); @@ -54,6 +68,7 @@ static MCRegisterInfo *createHexagonMCRegisterInfo(const Triple &TT) { static MCSubtargetInfo * createHexagonMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { + CPU = HEXAGON_MC::selectHexagonCPU(TT, CPU); return createHexagonMCSubtargetInfoImpl(TT, CPU, FS); } @@ -76,28 +91,23 @@ public: StringRef Contents(Buffer); auto PacketBundle = Contents.rsplit('\n'); auto HeadTail = PacketBundle.first.split('\n'); - auto Preamble = "\t{\n\t\t"; - auto Separator = ""; - while(!HeadTail.first.empty()) { - OS << Separator; - StringRef Inst; + StringRef Separator = "\n"; + StringRef Indent = "\t\t"; + OS << "\t{\n"; + while (!HeadTail.first.empty()) { + StringRef InstTxt; auto Duplex = HeadTail.first.split('\v'); - if(!Duplex.second.empty()){ - OS << Duplex.first << "\n"; - Inst = Duplex.second; - } - else { - if(!HeadTail.first.startswith("immext")) - Inst = Duplex.first; + if (!Duplex.second.empty()) { + OS << Indent << Duplex.first << Separator; + InstTxt = Duplex.second; + } else if (!HeadTail.first.trim().startswith("immext")) { + InstTxt = Duplex.first; } - OS << Preamble; - OS << Inst; + if (!InstTxt.empty()) + OS << Indent << InstTxt << Separator; HeadTail = HeadTail.second.split('\n'); - Preamble = ""; - Separator = "\n\t\t"; } - if(HexagonMCInstrInfo::bundleSize(Inst) != 0) - OS << "\n\t}" << PacketBundle.second; + OS << "\t}" << PacketBundle.second; } }; } @@ -154,9 +164,9 @@ static MCCodeGenInfo *createHexagonMCCodeGenInfo(const Triple &TT, CodeModel::Model CM, CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); - // For the time being, use static relocations, since there's really no - // support for PIC yet. - X->initMCCodeGenInfo(Reloc::Static, CM, OL); + if (RM == Reloc::Default) + RM = Reloc::Static; + X->initMCCodeGenInfo(RM, CM, OL); return X; } diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h index cb626503313f..a005a014416b 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h @@ -16,6 +16,8 @@ #include <cstdint> +#include "llvm/Support/CommandLine.h" + namespace llvm { struct InstrItinerary; struct InstrStage; @@ -33,22 +35,27 @@ class raw_ostream; class raw_pwrite_stream; extern Target TheHexagonTarget; - +extern cl::opt<bool> HexagonDisableCompound; +extern cl::opt<bool> HexagonDisableDuplex; extern const InstrStage HexagonStages[]; MCInstrInfo *createHexagonMCInstrInfo(); -MCCodeEmitter *createHexagonMCCodeEmitter(MCInstrInfo const &MCII, - MCRegisterInfo const &MRI, +MCCodeEmitter *createHexagonMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, MCContext &MCT); -MCAsmBackend *createHexagonAsmBackend(Target const &T, - MCRegisterInfo const &MRI, +MCAsmBackend *createHexagonAsmBackend(const Target &T, + const MCRegisterInfo &MRI, const Triple &TT, StringRef CPU); MCObjectWriter *createHexagonELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI, StringRef CPU); +namespace HEXAGON_MC { + StringRef selectHexagonCPU(const Triple &TT, StringRef CPU); +} + } // End llvm namespace // Define symbolic names for Hexagon registers. This defines a mapping from diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp index 41112ac0b46e..6ceb848ba20c 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp @@ -27,6 +27,7 @@ using namespace llvm; +namespace { // Insn shuffling priority. class HexagonBid { // The priority is directly proportional to how restricted the insn is based @@ -75,6 +76,7 @@ public: return false; }; }; +} // end anonymous namespace unsigned HexagonResource::setWeight(unsigned s) { const unsigned SlotWeight = 8; @@ -93,6 +95,60 @@ unsigned HexagonResource::setWeight(unsigned s) { return (Weight); } +HexagonCVIResource::TypeUnitsAndLanes *HexagonCVIResource::TUL; + +bool HexagonCVIResource::SetUp = HexagonCVIResource::setup(); + +bool HexagonCVIResource::setup() { + assert(!TUL); + TUL = new (TypeUnitsAndLanes); + + (*TUL)[HexagonII::TypeCVI_VA] = + UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); + (*TUL)[HexagonII::TypeCVI_VA_DV] = UnitsAndLanes(CVI_XLANE | CVI_MPY0, 2); + (*TUL)[HexagonII::TypeCVI_VX] = UnitsAndLanes(CVI_MPY0 | CVI_MPY1, 1); + (*TUL)[HexagonII::TypeCVI_VX_DV] = UnitsAndLanes(CVI_MPY0, 2); + (*TUL)[HexagonII::TypeCVI_VP] = UnitsAndLanes(CVI_XLANE, 1); + (*TUL)[HexagonII::TypeCVI_VP_VS] = UnitsAndLanes(CVI_XLANE, 2); + (*TUL)[HexagonII::TypeCVI_VS] = UnitsAndLanes(CVI_SHIFT, 1); + (*TUL)[HexagonII::TypeCVI_VINLANESAT] = UnitsAndLanes(CVI_SHIFT, 1); + (*TUL)[HexagonII::TypeCVI_VM_LD] = + UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); + (*TUL)[HexagonII::TypeCVI_VM_TMP_LD] = UnitsAndLanes(CVI_NONE, 0); + (*TUL)[HexagonII::TypeCVI_VM_CUR_LD] = + UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); + (*TUL)[HexagonII::TypeCVI_VM_VP_LDU] = UnitsAndLanes(CVI_XLANE, 1); + (*TUL)[HexagonII::TypeCVI_VM_ST] = + UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); + (*TUL)[HexagonII::TypeCVI_VM_NEW_ST] = UnitsAndLanes(CVI_NONE, 0); + (*TUL)[HexagonII::TypeCVI_VM_STU] = UnitsAndLanes(CVI_XLANE, 1); + (*TUL)[HexagonII::TypeCVI_HIST] = UnitsAndLanes(CVI_XLANE, 4); + + return true; +} + +HexagonCVIResource::HexagonCVIResource(MCInstrInfo const &MCII, unsigned s, + MCInst const *id) + : HexagonResource(s) { + unsigned T = HexagonMCInstrInfo::getType(MCII, *id); + + if (TUL->count(T)) { + // For an HVX insn. + Valid = true; + setUnits((*TUL)[T].first); + setLanes((*TUL)[T].second); + setLoad(HexagonMCInstrInfo::getDesc(MCII, *id).mayLoad()); + setStore(HexagonMCInstrInfo::getDesc(MCII, *id).mayStore()); + } else { + // For core insns. + Valid = false; + setUnits(0); + setLanes(0); + setLoad(false); + setStore(false); + } +} + HexagonShuffler::HexagonShuffler(MCInstrInfo const &MCII, MCSubtargetInfo const &STI) : MCII(MCII), STI(STI) { @@ -107,7 +163,7 @@ void HexagonShuffler::reset() { void HexagonShuffler::append(MCInst const *ID, MCInst const *Extender, unsigned S, bool X) { - HexagonInstr PI(ID, Extender, S, X); + HexagonInstr PI(MCII, ID, Extender, S, X); Packet.push_back(PI); } @@ -126,6 +182,8 @@ bool HexagonShuffler::check() { // Number of memory operations, loads, solo loads, stores, solo stores, single // stores. unsigned memory = 0, loads = 0, load0 = 0, stores = 0, store0 = 0, store1 = 0; + // Number of HVX loads, HVX stores. + unsigned CVIloads = 0, CVIstores = 0; // Number of duplex insns, solo insns. unsigned duplex = 0, solo = 0; // Number of insns restricting other insns in the packet to A and X types, @@ -168,6 +226,12 @@ bool HexagonShuffler::check() { case HexagonII::TypeJ: ++jumps; break; + case HexagonII::TypeCVI_VM_VP_LDU: + ++onlyNo1; + case HexagonII::TypeCVI_VM_LD: + case HexagonII::TypeCVI_VM_TMP_LD: + case HexagonII::TypeCVI_VM_CUR_LD: + ++CVIloads; case HexagonII::TypeLD: ++loads; ++memory; @@ -176,6 +240,11 @@ bool HexagonShuffler::check() { if (HexagonMCInstrInfo::getDesc(MCII, *ID).isReturn()) ++jumps, ++jump1; // DEALLOC_RETURN is of type LD. break; + case HexagonII::TypeCVI_VM_STU: + ++onlyNo1; + case HexagonII::TypeCVI_VM_ST: + case HexagonII::TypeCVI_VM_NEW_ST: + ++CVIstores; case HexagonII::TypeST: ++stores; ++memory; @@ -203,9 +272,9 @@ bool HexagonShuffler::check() { } // Check if the packet is legal. - if ((load0 > 1 || store0 > 1) || (duplex > 1 || (duplex && memory)) || - (solo && size() > 1) || (onlyAX && neitherAnorX > 1) || - (onlyAX && xtypeFloat)) { + if ((load0 > 1 || store0 > 1 || CVIloads > 1 || CVIstores > 1) || + (duplex > 1 || (duplex && memory)) || (solo && size() > 1) || + (onlyAX && neitherAnorX > 1) || (onlyAX && xtypeFloat)) { Error = SHUFFLE_ERROR_INVALID; return false; } @@ -336,6 +405,19 @@ bool HexagonShuffler::check() { return false; } } + // Verify the CVI slot subscriptions. + { + HexagonUnitAuction AuctionCVI; + + std::sort(begin(), end(), HexagonInstr::lessCVI); + + for (iterator I = begin(); I != end(); ++I) + for (unsigned i = 0; i < I->CVI.getLanes(); ++i) // TODO: I->CVI.isValid? + if (!AuctionCVI.bid(I->CVI.getUnits() << i)) { + Error = SHUFFLE_ERROR_SLOTS; + return false; + } + } Error = SHUFFLE_SUCCESS; return true; diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h index 8b6c72ee25e6..174f10fb2580 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h @@ -51,6 +51,44 @@ public: }; }; +// HVX insn resources. +class HexagonCVIResource : public HexagonResource { + typedef std::pair<unsigned, unsigned> UnitsAndLanes; + typedef llvm::DenseMap<unsigned, UnitsAndLanes> TypeUnitsAndLanes; + + // Available HVX slots. + enum { + CVI_NONE = 0, + CVI_XLANE = 1 << 0, + CVI_SHIFT = 1 << 1, + CVI_MPY0 = 1 << 2, + CVI_MPY1 = 1 << 3 + }; + + static bool SetUp; + static bool setup(); + static TypeUnitsAndLanes *TUL; + + // Count of adjacent slots that the insn requires to be executed. + unsigned Lanes; + // Flag whether the insn is a load or a store. + bool Load, Store; + // Flag whether the HVX resources are valid. + bool Valid; + + void setLanes(unsigned l) { Lanes = l; }; + void setLoad(bool f = true) { Load = f; }; + void setStore(bool f = true) { Store = f; }; + +public: + HexagonCVIResource(MCInstrInfo const &MCII, unsigned s, MCInst const *id); + + bool isValid() const { return (Valid); }; + unsigned getLanes() const { return (Lanes); }; + bool mayLoad() const { return (Load); }; + bool mayStore() const { return (Store); }; +}; + // Handle to an insn used by the shuffling algorithm. class HexagonInstr { friend class HexagonShuffler; @@ -58,12 +96,14 @@ class HexagonInstr { MCInst const *ID; MCInst const *Extender; HexagonResource Core; + HexagonCVIResource CVI; bool SoloException; public: - HexagonInstr(MCInst const *id, MCInst const *Extender, unsigned s, - bool x = false) - : ID(id), Extender(Extender), Core(s), SoloException(x){}; + HexagonInstr(MCInstrInfo const &MCII, MCInst const *id, + MCInst const *Extender, unsigned s, bool x = false) + : ID(id), Extender(Extender), Core(s), CVI(MCII, s, id), + SoloException(x){}; MCInst const *getDesc() const { return (ID); }; @@ -79,6 +119,10 @@ public: static bool lessCore(const HexagonInstr &A, const HexagonInstr &B) { return (HexagonResource::lessUnits(A.Core, B.Core)); }; + // Check if the handles are in ascending order by HVX slots. + static bool lessCVI(const HexagonInstr &A, const HexagonInstr &B) { + return (HexagonResource::lessUnits(A.CVI, B.CVI)); + }; }; // Bundle shuffler. @@ -108,6 +152,8 @@ public: SHUFFLE_ERROR_BRANCHES, ///< No free slots for branch insns. SHUFFLE_ERROR_NOSLOTS, ///< No free slots for other insns. SHUFFLE_ERROR_SLOTS, ///< Over-subscribed slots. + SHUFFLE_ERROR_ERRATA2, ///< Errata violation (v60). + SHUFFLE_ERROR_STORE_LOAD_CONFLICT, ///< store/load conflict SHUFFLE_ERROR_UNKNOWN ///< Unknown error. }; |