diff options
Diffstat (limited to 'contrib/llvm/lib/Target/Hexagon')
67 files changed, 29453 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Target/Hexagon/Hexagon.h b/contrib/llvm/lib/Target/Hexagon/Hexagon.h new file mode 100644 index 000000000000..5467ee361257 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/Hexagon.h @@ -0,0 +1,82 @@ +//=-- Hexagon.h - Top-level interface for Hexagon representation --*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in the LLVM +// Hexagon back-end. +// +//===----------------------------------------------------------------------===// + +#ifndef TARGET_Hexagon_H +#define TARGET_Hexagon_H + +#include "MCTargetDesc/HexagonMCTargetDesc.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + class FunctionPass; + class ModulePass; + class TargetMachine; + class MachineInstr; + class HexagonMCInst; + class HexagonAsmPrinter; + class HexagonTargetMachine; + class raw_ostream; + + FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM, + CodeGenOpt::Level OptLevel); + FunctionPass *createHexagonDelaySlotFillerPass(const TargetMachine &TM); + FunctionPass *createHexagonFPMoverPass(const TargetMachine &TM); + FunctionPass *createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM); + FunctionPass *createHexagonCFGOptimizer(const HexagonTargetMachine &TM); + + FunctionPass *createHexagonSplitTFRCondSets(const HexagonTargetMachine &TM); + FunctionPass *createHexagonSplitConst32AndConst64( + const HexagonTargetMachine &TM); + FunctionPass *createHexagonExpandPredSpillCode( + const HexagonTargetMachine &TM); + FunctionPass *createHexagonHardwareLoops(); + FunctionPass *createHexagonPeephole(); + FunctionPass *createHexagonFixupHwLoops(); + FunctionPass *createHexagonNewValueJump(); + FunctionPass *createHexagonCopyToCombine(); + FunctionPass *createHexagonPacketizer(); + FunctionPass *createHexagonNewValueJump(); + +/* TODO: object output. + MCCodeEmitter *createHexagonMCCodeEmitter(const Target &, + const TargetMachine &TM, + MCContext &Ctx); +*/ +/* TODO: assembler input. + TargetAsmBackend *createHexagonAsmBackend(const Target &, + const std::string &); +*/ + void HexagonLowerToMC(const MachineInstr *MI, HexagonMCInst &MCI, + HexagonAsmPrinter &AP); +} // end namespace llvm; + +#define Hexagon_POINTER_SIZE 4 + +#define Hexagon_PointerSize (Hexagon_POINTER_SIZE) +#define Hexagon_PointerSize_Bits (Hexagon_POINTER_SIZE * 8) +#define Hexagon_WordSize Hexagon_PointerSize +#define Hexagon_WordSize_Bits Hexagon_PointerSize_Bits + +// allocframe saves LR and FP on stack before allocating +// a new stack frame. This takes 8 bytes. +#define HEXAGON_LRFP_SIZE 8 + +// Normal instruction size (in bytes). +#define HEXAGON_INSTR_SIZE 4 + +// Maximum number of words and instructions in a packet. +#define HEXAGON_PACKET_SIZE 4 + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/Hexagon.td b/contrib/llvm/lib/Target/Hexagon/Hexagon.td new file mode 100644 index 000000000000..568798c3a412 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/Hexagon.td @@ -0,0 +1,225 @@ +//===-- Hexagon.td - Describe the Hexagon Target Machine --*- tablegen -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is the top level entry point for the Hexagon target. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces which we are implementing +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// Hexagon Subtarget features. +//===----------------------------------------------------------------------===// + +// Hexagon Archtectures +def ArchV2 : SubtargetFeature<"v2", "HexagonArchVersion", "V2", + "Hexagon v2">; +def ArchV3 : SubtargetFeature<"v3", "HexagonArchVersion", "V3", + "Hexagon v3">; +def ArchV4 : SubtargetFeature<"v4", "HexagonArchVersion", "V4", + "Hexagon v4">; +def ArchV5 : SubtargetFeature<"v5", "HexagonArchVersion", "V5", + "Hexagon v5">; + +//===----------------------------------------------------------------------===// +// Hexagon Instruction Predicate Definitions. +//===----------------------------------------------------------------------===// +def HasV2T : Predicate<"Subtarget.hasV2TOps()">; +def HasV2TOnly : Predicate<"Subtarget.hasV2TOpsOnly()">; +def NoV2T : Predicate<"!Subtarget.hasV2TOps()">; +def HasV3T : Predicate<"Subtarget.hasV3TOps()">; +def HasV3TOnly : Predicate<"Subtarget.hasV3TOpsOnly()">; +def NoV3T : Predicate<"!Subtarget.hasV3TOps()">; +def HasV4T : Predicate<"Subtarget.hasV4TOps()">; +def NoV4T : Predicate<"!Subtarget.hasV4TOps()">; +def HasV5T : Predicate<"Subtarget.hasV5TOps()">; +def NoV5T : Predicate<"!Subtarget.hasV5TOps()">; +def UseMEMOP : Predicate<"Subtarget.useMemOps()">; +def IEEERndNearV5T : Predicate<"Subtarget.modeIEEERndNear()">; + +//===----------------------------------------------------------------------===// +// Classes used for relation maps. +//===----------------------------------------------------------------------===// +// PredRel - Filter class used to relate non-predicated instructions with their +// predicated forms. +class PredRel; +// PredNewRel - Filter class used to relate predicated instructions with their +// predicate-new forms. +class PredNewRel: PredRel; +// ImmRegRel - Filter class used to relate instructions having reg-reg form +// with their reg-imm counterparts. +class ImmRegRel; +// NewValueRel - Filter class used to relate regular store instructions with +// their new-value store form. +class NewValueRel: PredNewRel; +// NewValueRel - Filter class used to relate load/store instructions having +// different addressing modes with each other. +class AddrModeRel: NewValueRel; + +//===----------------------------------------------------------------------===// +// Generate mapping table to relate non-predicate instructions with their +// predicated formats - true and false. +// + +def getPredOpcode : InstrMapping { + let FilterClass = "PredRel"; + // Instructions with the same BaseOpcode and isNVStore values form a row. + let RowFields = ["BaseOpcode", "isNVStore", "PNewValue"]; + // Instructions with the same predicate sense form a column. + let ColFields = ["PredSense"]; + // The key column is the unpredicated instructions. + let KeyCol = [""]; + // Value columns are PredSense=true and PredSense=false + let ValueCols = [["true"], ["false"]]; +} + +//===----------------------------------------------------------------------===// +// Generate mapping table to relate predicate-true instructions with their +// predicate-false forms +// +def getFalsePredOpcode : InstrMapping { + let FilterClass = "PredRel"; + let RowFields = ["BaseOpcode", "PNewValue", "isNVStore", "isBrTaken"]; + let ColFields = ["PredSense"]; + let KeyCol = ["true"]; + let ValueCols = [["false"]]; +} + +//===----------------------------------------------------------------------===// +// Generate mapping table to relate predicate-false instructions with their +// predicate-true forms +// +def getTruePredOpcode : InstrMapping { + let FilterClass = "PredRel"; + let RowFields = ["BaseOpcode", "PNewValue", "isNVStore", "isBrTaken"]; + let ColFields = ["PredSense"]; + let KeyCol = ["false"]; + let ValueCols = [["true"]]; +} + +//===----------------------------------------------------------------------===// +// Generate mapping table to relate predicated instructions with their .new +// format. +// +def getPredNewOpcode : InstrMapping { + let FilterClass = "PredNewRel"; + let RowFields = ["BaseOpcode", "PredSense", "isNVStore", "isBrTaken"]; + let ColFields = ["PNewValue"]; + let KeyCol = [""]; + let ValueCols = [["new"]]; +} + +//===----------------------------------------------------------------------===// +// Generate mapping table to relate .new predicated instructions with their old +// format. +// +def getPredOldOpcode : InstrMapping { + let FilterClass = "PredNewRel"; + let RowFields = ["BaseOpcode", "PredSense", "isNVStore"]; + let ColFields = ["PNewValue"]; + let KeyCol = ["new"]; + let ValueCols = [[""]]; +} + +//===----------------------------------------------------------------------===// +// Generate mapping table to relate store instructions with their new-value +// format. +// +def getNewValueOpcode : InstrMapping { + let FilterClass = "NewValueRel"; + let RowFields = ["BaseOpcode", "PredSense", "PNewValue"]; + let ColFields = ["NValueST"]; + let KeyCol = ["false"]; + let ValueCols = [["true"]]; +} + +//===----------------------------------------------------------------------===// +// Generate mapping table to relate new-value store instructions with their old +// format. +// +def getNonNVStore : InstrMapping { + let FilterClass = "NewValueRel"; + let RowFields = ["BaseOpcode", "PredSense", "PNewValue"]; + let ColFields = ["NValueST"]; + let KeyCol = ["true"]; + let ValueCols = [["false"]]; +} + +def getBasedWithImmOffset : InstrMapping { + let FilterClass = "AddrModeRel"; + let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore", + "isMEMri", "isFloat"]; + let ColFields = ["addrMode"]; + let KeyCol = ["Absolute"]; + let ValueCols = [["BaseImmOffset"]]; +} + +def getBaseWithRegOffset : InstrMapping { + let FilterClass = "AddrModeRel"; + let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore"]; + let ColFields = ["addrMode"]; + let KeyCol = ["BaseImmOffset"]; + let ValueCols = [["BaseRegOffset"]]; +} + +def getRegForm : InstrMapping { + let FilterClass = "ImmRegRel"; + let RowFields = ["CextOpcode", "PredSense", "PNewValue"]; + let ColFields = ["InputType"]; + let KeyCol = ["imm"]; + let ValueCols = [["reg"]]; +} + +//===----------------------------------------------------------------------===// +// Register File, Calling Conv, Instruction Descriptions +//===----------------------------------------------------------------------===// +include "HexagonSchedule.td" +include "HexagonRegisterInfo.td" +include "HexagonCallingConv.td" +include "HexagonInstrInfo.td" +include "HexagonIntrinsics.td" +include "HexagonIntrinsicsDerived.td" + +def HexagonInstrInfo : InstrInfo; + +//===----------------------------------------------------------------------===// +// Hexagon processors supported. +//===----------------------------------------------------------------------===// + +class Proc<string Name, SchedMachineModel Model, + list<SubtargetFeature> Features> + : ProcessorModel<Name, Model, Features>; + +def : Proc<"hexagonv2", HexagonModel, [ArchV2]>; +def : Proc<"hexagonv3", HexagonModel, [ArchV2, ArchV3]>; +def : Proc<"hexagonv4", HexagonModelV4, [ArchV2, ArchV3, ArchV4]>; +def : Proc<"hexagonv5", HexagonModelV4, [ArchV2, ArchV3, ArchV4, ArchV5]>; + + +// Hexagon Uses the MC printer for assembler output, so make sure the TableGen +// AsmWriter bits get associated with the correct class. +def HexagonAsmWriter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; + bit isMCAsmWriter = 1; +} + +//===----------------------------------------------------------------------===// +// Declare the target which we are implementing +//===----------------------------------------------------------------------===// + +def Hexagon : Target { + // Pull in Instruction Info: + let InstructionSet = HexagonInstrInfo; + + let AssemblyWriters = [HexagonAsmWriter]; +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp new file mode 100644 index 000000000000..a2e04baea76a --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -0,0 +1,314 @@ +//===-- HexagonAsmPrinter.cpp - Print machine instrs to Hexagon assembly --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to Hexagon assembly language. This printer is +// the output mechanism used by `llc'. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "Hexagon.h" +#include "HexagonAsmPrinter.h" +#include "HexagonMachineFunctionInfo.h" +#include "HexagonTargetMachine.h" +#include "HexagonSubtarget.h" +#include "MCTargetDesc/HexagonMCInst.h" +#include "InstPrinter/HexagonInstPrinter.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +static cl::opt<bool> AlignCalls( + "hexagon-align-calls", cl::Hidden, cl::init(true), + cl::desc("Insert falign after call instruction for Hexagon target")); + +void HexagonAsmPrinter::EmitAlignment(unsigned NumBits, + const GlobalValue *GV) const { + // For basic block level alignment, use ".falign". + if (!GV) { + OutStreamer.EmitRawText(StringRef("\t.falign")); + return; + } + + AsmPrinter::EmitAlignment(NumBits, GV); +} + +void HexagonAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + const MachineOperand &MO = MI->getOperand(OpNo); + + switch (MO.getType()) { + default: llvm_unreachable ("<unknown operand type>"); + case MachineOperand::MO_Register: + O << HexagonInstPrinter::getRegisterName(MO.getReg()); + return; + case MachineOperand::MO_Immediate: + O << MO.getImm(); + return; + case MachineOperand::MO_MachineBasicBlock: + O << *MO.getMBB()->getSymbol(); + return; + case MachineOperand::MO_JumpTableIndex: + O << *GetJTISymbol(MO.getIndex()); + // FIXME: PIC relocation model. + return; + case MachineOperand::MO_ConstantPoolIndex: + O << *GetCPISymbol(MO.getIndex()); + return; + case MachineOperand::MO_ExternalSymbol: + O << *GetExternalSymbolSymbol(MO.getSymbolName()); + return; + case MachineOperand::MO_GlobalAddress: + // Computing the address of a global symbol, not calling it. + O << *getSymbol(MO.getGlobal()); + printOffset(MO.getOffset(), O); + return; + } +} + +// +// isBlockOnlyReachableByFallthrough - We need to override this since the +// default AsmPrinter does not print labels for any basic block that +// is only reachable by a fall through. That works for all cases except +// for the case in which the basic block is reachable by a fall through but +// through an indirect from a jump table. In this case, the jump table +// will contain a label not defined by AsmPrinter. +// +bool HexagonAsmPrinter:: +isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { + if (MBB->hasAddressTaken()) { + return false; + } + return AsmPrinter::isBlockOnlyReachableByFallthrough(MBB); +} + + +/// PrintAsmOperand - Print out an operand for an inline asm expression. +/// +bool HexagonAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &OS) { + // Does this asm operand have a single letter operand modifier? + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, OS); + case 'c': // Don't print "$" before a global var name or constant. + // Hexagon never has a prefix. + printOperand(MI, OpNo, OS); + return false; + case 'L': // Write second word of DImode reference. + // Verify that this operand has two consecutive registers. + if (!MI->getOperand(OpNo).isReg() || + OpNo+1 == MI->getNumOperands() || + !MI->getOperand(OpNo+1).isReg()) + return true; + ++OpNo; // Return the high-part. + break; + case 'I': + // Write 'i' if an integer constant, otherwise nothing. Used to print + // addi vs add, etc. + if (MI->getOperand(OpNo).isImm()) + OS << "i"; + return false; + } + } + + printOperand(MI, OpNo, OS); + return false; +} + +bool HexagonAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &O) { + if (ExtraCode && ExtraCode[0]) + return true; // Unknown modifier. + + const MachineOperand &Base = MI->getOperand(OpNo); + const MachineOperand &Offset = MI->getOperand(OpNo+1); + + if (Base.isReg()) + printOperand(MI, OpNo, O); + else + llvm_unreachable("Unimplemented"); + + if (Offset.isImm()) { + if (Offset.getImm()) + O << " + #" << Offset.getImm(); + } + else + llvm_unreachable("Unimplemented"); + + return false; +} + +void HexagonAsmPrinter::printPredicateOperand(const MachineInstr *MI, + unsigned OpNo, + raw_ostream &O) { + llvm_unreachable("Unimplemented"); +} + + +/// printMachineInstruction -- Print out a single Hexagon MI in Darwin syntax to +/// the current output stream. +/// +void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) { + if (MI->isBundle()) { + std::vector<const MachineInstr*> BundleMIs; + + const MachineBasicBlock *MBB = MI->getParent(); + MachineBasicBlock::const_instr_iterator MII = MI; + ++MII; + unsigned int IgnoreCount = 0; + while (MII != MBB->end() && MII->isInsideBundle()) { + const MachineInstr *MInst = MII; + if (MInst->getOpcode() == TargetOpcode::DBG_VALUE || + MInst->getOpcode() == TargetOpcode::IMPLICIT_DEF) { + IgnoreCount++; + ++MII; + continue; + } + //BundleMIs.push_back(&*MII); + BundleMIs.push_back(MInst); + ++MII; + } + unsigned Size = BundleMIs.size(); + assert((Size+IgnoreCount) == MI->getBundleSize() && "Corrupt Bundle!"); + for (unsigned Index = 0; Index < Size; Index++) { + HexagonMCInst MCI; + MCI.setPacketStart(Index == 0); + MCI.setPacketEnd(Index == (Size-1)); + + HexagonLowerToMC(BundleMIs[Index], MCI, *this); + OutStreamer.EmitInstruction(MCI); + } + } + else { + HexagonMCInst MCI; + if (MI->getOpcode() == Hexagon::ENDLOOP0) { + MCI.setPacketStart(true); + MCI.setPacketEnd(true); + } + HexagonLowerToMC(MI, MCI, *this); + OutStreamer.EmitInstruction(MCI); + } + + return; +} + +/// PrintUnmangledNameSafely - Print out the printable characters in the name. +/// Don't print things like \n or \0. +// static void PrintUnmangledNameSafely(const Value *V, raw_ostream &OS) { +// for (const char *Name = V->getNameStart(), *E = Name+V->getNameLen(); +// Name != E; ++Name) +// if (isprint(*Name)) +// OS << *Name; +// } + + +void HexagonAsmPrinter::printAddrModeBasePlusOffset(const MachineInstr *MI, + int OpNo, raw_ostream &O) { + const MachineOperand &MO1 = MI->getOperand(OpNo); + const MachineOperand &MO2 = MI->getOperand(OpNo+1); + + O << HexagonInstPrinter::getRegisterName(MO1.getReg()) + << " + #" + << MO2.getImm(); +} + + +void HexagonAsmPrinter::printGlobalOperand(const MachineInstr *MI, int OpNo, + raw_ostream &O) { + const MachineOperand &MO = MI->getOperand(OpNo); + assert( (MO.getType() == MachineOperand::MO_GlobalAddress) && + "Expecting global address"); + + O << *getSymbol(MO.getGlobal()); + if (MO.getOffset() != 0) { + O << " + "; + O << MO.getOffset(); + } +} + +void HexagonAsmPrinter::printJumpTable(const MachineInstr *MI, int OpNo, + raw_ostream &O) { + const MachineOperand &MO = MI->getOperand(OpNo); + assert( (MO.getType() == MachineOperand::MO_JumpTableIndex) && + "Expecting jump table index"); + + // Hexagon_TODO: Do we need name mangling? + O << *GetJTISymbol(MO.getIndex()); +} + +void HexagonAsmPrinter::printConstantPool(const MachineInstr *MI, int OpNo, + raw_ostream &O) { + const MachineOperand &MO = MI->getOperand(OpNo); + assert( (MO.getType() == MachineOperand::MO_ConstantPoolIndex) && + "Expecting constant pool index"); + + // Hexagon_TODO: Do we need name mangling? + O << *GetCPISymbol(MO.getIndex()); +} + +static MCInstPrinter *createHexagonMCInstPrinter(const Target &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI) { + if (SyntaxVariant == 0) + return(new HexagonInstPrinter(MAI, MII, MRI)); + else + return NULL; +} + +extern "C" void LLVMInitializeHexagonAsmPrinter() { + RegisterAsmPrinter<HexagonAsmPrinter> X(TheHexagonTarget); + + TargetRegistry::RegisterMCInstPrinter(TheHexagonTarget, + createHexagonMCInstPrinter); +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h new file mode 100644 index 000000000000..bc2af636124c --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h @@ -0,0 +1,165 @@ +//===-- HexagonAsmPrinter.h - Print machine code to an Hexagon .s file ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Hexagon Assembly printer class. +// +//===----------------------------------------------------------------------===// + +#ifndef HEXAGONASMPRINTER_H +#define HEXAGONASMPRINTER_H + +#include "Hexagon.h" +#include "HexagonTargetMachine.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + class HexagonAsmPrinter : public AsmPrinter { + const HexagonSubtarget *Subtarget; + + public: + explicit HexagonAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) + : AsmPrinter(TM, Streamer) { + Subtarget = &TM.getSubtarget<HexagonSubtarget>(); + } + + virtual const char *getPassName() const { + return "Hexagon Assembly Printer"; + } + + bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const; + + virtual void EmitInstruction(const MachineInstr *MI); + virtual void EmitAlignment(unsigned NumBits, + const GlobalValue *GV = 0) const; + + void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O); + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &OS); + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &OS); + + /// printInstruction - This method is automatically generated by tablegen + /// from the instruction set description. This method returns true if the + /// machine instruction was sufficiently described to print it, otherwise it + /// returns false. + void printInstruction(const MachineInstr *MI, raw_ostream &O); + + // void printMachineInstruction(const MachineInstr *MI); + void printOp(const MachineOperand &MO, raw_ostream &O); + + /// printRegister - Print register according to target requirements. + /// + void printRegister(const MachineOperand &MO, bool R0AsZero, + raw_ostream &O) { + unsigned RegNo = MO.getReg(); + assert(TargetRegisterInfo::isPhysicalRegister(RegNo) && "Not physreg??"); + O << getRegisterName(RegNo); + } + + void printImmOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + int value = MI->getOperand(OpNo).getImm(); + O << value; + } + + void printNegImmOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + int value = MI->getOperand(OpNo).getImm(); + O << -value; + } + + void printMEMriOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + const MachineOperand &MO1 = MI->getOperand(OpNo); + const MachineOperand &MO2 = MI->getOperand(OpNo+1); + + O << getRegisterName(MO1.getReg()) + << " + #" + << (int) MO2.getImm(); + } + + void printFrameIndexOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + const MachineOperand &MO1 = MI->getOperand(OpNo); + const MachineOperand &MO2 = MI->getOperand(OpNo+1); + + O << getRegisterName(MO1.getReg()) + << ", #" + << MO2.getImm(); + } + + void printBranchOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + // Branches can take an immediate operand. This is used by the branch + // selection pass to print $+8, an eight byte displacement from the PC. + if (MI->getOperand(OpNo).isImm()) { + O << "$+" << MI->getOperand(OpNo).getImm()*4; + } else { + printOp(MI->getOperand(OpNo), O); + } + } + + void printCallOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + } + + void printAbsAddrOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + } + + void printSymbolHi(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { + O << "#HI("; + if (MI->getOperand(OpNo).isImm()) { + printImmOperand(MI, OpNo, O); + } + else { + printOp(MI->getOperand(OpNo), O); + } + O << ")"; + } + + void printSymbolLo(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { + O << "#HI("; + if (MI->getOperand(OpNo).isImm()) { + printImmOperand(MI, OpNo, O); + } + else { + printOp(MI->getOperand(OpNo), O); + } + O << ")"; + } + + void printPredicateOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O); + +#if 0 + void printModuleLevelGV(const GlobalVariable* GVar, raw_ostream &O); +#endif + + void printAddrModeBasePlusOffset(const MachineInstr *MI, int OpNo, + raw_ostream &O); + + void printGlobalOperand(const MachineInstr *MI, int OpNo, raw_ostream &O); + void printJumpTable(const MachineInstr *MI, int OpNo, raw_ostream &O); + void printConstantPool(const MachineInstr *MI, int OpNo, raw_ostream &O); + + static const char *getRegisterName(unsigned RegNo); + +#if 0 + void EmitStartOfAsmFile(Module &M); +#endif + }; + +} // end of llvm namespace + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp new file mode 100644 index 000000000000..8597f11ddde7 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp @@ -0,0 +1,252 @@ +//===-- HexagonCFGOptimizer.cpp - CFG optimizations -----------------------===// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "hexagon_cfg" +#include "Hexagon.h" +#include "HexagonMachineFunctionInfo.h" +#include "HexagonSubtarget.h" +#include "HexagonTargetMachine.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +namespace llvm { + void initializeHexagonCFGOptimizerPass(PassRegistry&); +} + + +namespace { + +class HexagonCFGOptimizer : public MachineFunctionPass { + +private: + const HexagonTargetMachine& QTM; + const HexagonSubtarget &QST; + + void InvertAndChangeJumpTarget(MachineInstr*, MachineBasicBlock*); + + public: + static char ID; + HexagonCFGOptimizer(const HexagonTargetMachine& TM) + : MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) { + initializeHexagonCFGOptimizerPass(*PassRegistry::getPassRegistry()); + } + + const char *getPassName() const { + return "Hexagon CFG Optimizer"; + } + bool runOnMachineFunction(MachineFunction &Fn); +}; + + +char HexagonCFGOptimizer::ID = 0; + +static bool IsConditionalBranch(int Opc) { + return (Opc == Hexagon::JMP_t) || (Opc == Hexagon::JMP_f) + || (Opc == Hexagon::JMP_tnew_t) || (Opc == Hexagon::JMP_fnew_t); +} + + +static bool IsUnconditionalJump(int Opc) { + return (Opc == Hexagon::JMP); +} + + +void +HexagonCFGOptimizer::InvertAndChangeJumpTarget(MachineInstr* MI, + MachineBasicBlock* NewTarget) { + const HexagonInstrInfo *QII = QTM.getInstrInfo(); + int NewOpcode = 0; + switch(MI->getOpcode()) { + case Hexagon::JMP_t: + NewOpcode = Hexagon::JMP_f; + break; + + case Hexagon::JMP_f: + NewOpcode = Hexagon::JMP_t; + break; + + case Hexagon::JMP_tnew_t: + NewOpcode = Hexagon::JMP_fnew_t; + break; + + case Hexagon::JMP_fnew_t: + NewOpcode = Hexagon::JMP_tnew_t; + break; + + default: + llvm_unreachable("Cannot handle this case"); + } + + MI->setDesc(QII->get(NewOpcode)); + MI->getOperand(1).setMBB(NewTarget); +} + + +bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) { + + // Loop over all of the basic blocks. + for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end(); + MBBb != MBBe; ++MBBb) { + MachineBasicBlock* MBB = MBBb; + + // Traverse the basic block. + MachineBasicBlock::iterator MII = MBB->getFirstTerminator(); + if (MII != MBB->end()) { + MachineInstr *MI = MII; + int Opc = MI->getOpcode(); + if (IsConditionalBranch(Opc)) { + + // + // (Case 1) Transform the code if the following condition occurs: + // BB1: if (p0) jump BB3 + // ...falls-through to BB2 ... + // BB2: jump BB4 + // ...next block in layout is BB3... + // BB3: ... + // + // Transform this to: + // BB1: if (!p0) jump BB4 + // Remove BB2 + // BB3: ... + // + // (Case 2) A variation occurs when BB3 contains a JMP to BB4: + // BB1: if (p0) jump BB3 + // ...falls-through to BB2 ... + // BB2: jump BB4 + // ...other basic blocks ... + // BB4: + // ...not a fall-thru + // BB3: ... + // jump BB4 + // + // Transform this to: + // BB1: if (!p0) jump BB4 + // Remove BB2 + // BB3: ... + // BB4: ... + // + unsigned NumSuccs = MBB->succ_size(); + MachineBasicBlock::succ_iterator SI = MBB->succ_begin(); + MachineBasicBlock* FirstSucc = *SI; + MachineBasicBlock* SecondSucc = *(++SI); + MachineBasicBlock* LayoutSucc = NULL; + MachineBasicBlock* JumpAroundTarget = NULL; + + if (MBB->isLayoutSuccessor(FirstSucc)) { + LayoutSucc = FirstSucc; + JumpAroundTarget = SecondSucc; + } else if (MBB->isLayoutSuccessor(SecondSucc)) { + LayoutSucc = SecondSucc; + JumpAroundTarget = FirstSucc; + } else { + // Odd case...cannot handle. + } + + // The target of the unconditional branch must be JumpAroundTarget. + // TODO: If not, we should not invert the unconditional branch. + MachineBasicBlock* CondBranchTarget = NULL; + if ((MI->getOpcode() == Hexagon::JMP_t) || + (MI->getOpcode() == Hexagon::JMP_f)) { + CondBranchTarget = MI->getOperand(1).getMBB(); + } + + if (!LayoutSucc || (CondBranchTarget != JumpAroundTarget)) { + continue; + } + + if ((NumSuccs == 2) && LayoutSucc && (LayoutSucc->pred_size() == 1)) { + + // Ensure that BB2 has one instruction -- an unconditional jump. + if ((LayoutSucc->size() == 1) && + IsUnconditionalJump(LayoutSucc->front().getOpcode())) { + MachineBasicBlock* UncondTarget = + LayoutSucc->front().getOperand(0).getMBB(); + // Check if the layout successor of BB2 is BB3. + bool case1 = LayoutSucc->isLayoutSuccessor(JumpAroundTarget); + bool case2 = JumpAroundTarget->isSuccessor(UncondTarget) && + JumpAroundTarget->size() >= 1 && + IsUnconditionalJump(JumpAroundTarget->back().getOpcode()) && + JumpAroundTarget->pred_size() == 1 && + JumpAroundTarget->succ_size() == 1; + + if (case1 || case2) { + InvertAndChangeJumpTarget(MI, UncondTarget); + MBB->removeSuccessor(JumpAroundTarget); + MBB->addSuccessor(UncondTarget); + + // Remove the unconditional branch in LayoutSucc. + LayoutSucc->erase(LayoutSucc->begin()); + LayoutSucc->removeSuccessor(UncondTarget); + LayoutSucc->addSuccessor(JumpAroundTarget); + + // This code performs the conversion for case 2, which moves + // the block to the fall-thru case (BB3 in the code above). + if (case2 && !case1) { + JumpAroundTarget->moveAfter(LayoutSucc); + // only move a block if it doesn't have a fall-thru. otherwise + // the CFG will be incorrect. + if (!UncondTarget->canFallThrough()) { + UncondTarget->moveAfter(JumpAroundTarget); + } + } + + // + // Correct live-in information. Is used by post-RA scheduler + // The live-in to LayoutSucc is now all values live-in to + // JumpAroundTarget. + // + std::vector<unsigned> OrigLiveIn(LayoutSucc->livein_begin(), + LayoutSucc->livein_end()); + std::vector<unsigned> NewLiveIn(JumpAroundTarget->livein_begin(), + JumpAroundTarget->livein_end()); + for (unsigned i = 0; i < OrigLiveIn.size(); ++i) { + LayoutSucc->removeLiveIn(OrigLiveIn[i]); + } + for (unsigned i = 0; i < NewLiveIn.size(); ++i) { + LayoutSucc->addLiveIn(NewLiveIn[i]); + } + } + } + } + } + } + } + return true; +} +} + + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +static void initializePassOnce(PassRegistry &Registry) { + PassInfo *PI = new PassInfo("Hexagon CFG Optimizer", "hexagon-cfg", + &HexagonCFGOptimizer::ID, 0, false, false); + Registry.registerPass(*PI, true); +} + +void llvm::initializeHexagonCFGOptimizerPass(PassRegistry &Registry) { + CALL_ONCE_INITIALIZATION(initializePassOnce) +} + +FunctionPass *llvm::createHexagonCFGOptimizer(const HexagonTargetMachine &TM) { + return new HexagonCFGOptimizer(TM); +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCallingConv.td b/contrib/llvm/lib/Target/Hexagon/HexagonCallingConv.td new file mode 100644 index 000000000000..e61b2a7a58ac --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonCallingConv.td @@ -0,0 +1,35 @@ +//===- HexagonCallingConv.td - Calling Conventions Hexagon -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This describes the calling conventions for the Hexagon architectures. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Return Value Calling Conventions +//===----------------------------------------------------------------------===// + +// Hexagon 32-bit C return-value convention. +def RetCC_Hexagon32 : CallingConv<[ + CCIfType<[i32, f32], CCAssignToReg<[R0, R1, R2, R3, R4, R5]>>, + CCIfType<[i64, f64], CCAssignToReg<[D0, D1, D2]>>, + + // Alternatively, they are assigned to the stack in 4-byte aligned units. + CCAssignToStack<4, 4> +]>; + +// Hexagon 32-bit C Calling convention. +def CC_Hexagon32 : CallingConv<[ + // All arguments get passed in integer registers if there is space. + CCIfType<[f32, i32, i16, i8], CCAssignToReg<[R0, R1, R2, R3, R4, R5]>>, + CCIfType<[f64, i64], CCAssignToReg<[D0, D1, D2]>>, + + // Alternatively, they are assigned to the stack in 4-byte aligned units. + CCAssignToStack<4, 4> +]>; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCallingConvLower.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonCallingConvLower.cpp new file mode 100644 index 000000000000..f5f958c101b1 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonCallingConvLower.cpp @@ -0,0 +1,204 @@ +//===-- llvm/CallingConvLower.cpp - Calling Convention lowering -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Hexagon_CCState class, used for lowering and +// implementing calling conventions. Adapted from the machine independent +// version of the class (CCState) but this handles calls to varargs functions +// +//===----------------------------------------------------------------------===// + +#include "HexagonCallingConvLower.h" +#include "Hexagon.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +using namespace llvm; + +Hexagon_CCState::Hexagon_CCState(CallingConv::ID CC, bool isVarArg, + const TargetMachine &tm, + SmallVectorImpl<CCValAssign> &locs, + LLVMContext &c) + : CallingConv(CC), IsVarArg(isVarArg), TM(tm), Locs(locs), Context(c) { + // No stack is used. + StackOffset = 0; + + UsedRegs.resize((TM.getRegisterInfo()->getNumRegs()+31)/32); +} + +// HandleByVal - Allocate a stack slot large enough to pass an argument by +// value. The size and alignment information of the argument is encoded in its +// parameter attribute. +void Hexagon_CCState::HandleByVal(unsigned ValNo, EVT ValVT, + EVT LocVT, CCValAssign::LocInfo LocInfo, + int MinSize, int MinAlign, + ISD::ArgFlagsTy ArgFlags) { + unsigned Align = ArgFlags.getByValAlign(); + unsigned Size = ArgFlags.getByValSize(); + if (MinSize > (int)Size) + Size = MinSize; + if (MinAlign > (int)Align) + Align = MinAlign; + unsigned Offset = AllocateStack(Size, Align); + + addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset, + LocVT.getSimpleVT(), LocInfo)); +} + +/// MarkAllocated - Mark a register and all of its aliases as allocated. +void Hexagon_CCState::MarkAllocated(unsigned Reg) { + const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); + for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI) + UsedRegs[*AI/32] |= 1 << (*AI&31); +} + +/// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node, +/// incorporating info about the formals into this state. +void +Hexagon_CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> + &Ins, + Hexagon_CCAssignFn Fn, + unsigned SretValueInRegs) { + unsigned NumArgs = Ins.size(); + unsigned i = 0; + + // If the function returns a small struct in registers, skip + // over the first (dummy) argument. + if (SretValueInRegs != 0) { + ++i; + } + + + for (; i != NumArgs; ++i) { + EVT ArgVT = Ins[i].VT; + ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; + if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this, 0, 0, false)) { + dbgs() << "Formal argument #" << i << " has unhandled type " + << ArgVT.getEVTString() << "\n"; + abort(); + } + } +} + +/// AnalyzeReturn - Analyze the returned values of an ISD::RET node, +/// incorporating info about the result values into this state. +void +Hexagon_CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, + Hexagon_CCAssignFn Fn, + unsigned SretValueInRegs) { + + // For Hexagon, Return small structures in registers. + if (SretValueInRegs != 0) { + if (SretValueInRegs <= 32) { + unsigned Reg = Hexagon::R0; + addLoc(CCValAssign::getReg(0, MVT::i32, Reg, MVT::i32, + CCValAssign::Full)); + return; + } + if (SretValueInRegs <= 64) { + unsigned Reg = Hexagon::D0; + addLoc(CCValAssign::getReg(0, MVT::i64, Reg, MVT::i64, + CCValAssign::Full)); + return; + } + } + + + // Determine which register each value should be copied into. + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { + EVT VT = Outs[i].VT; + ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; + if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this, -1, -1, false)){ + dbgs() << "Return operand #" << i << " has unhandled type " + << VT.getEVTString() << "\n"; + abort(); + } + } +} + + +/// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info +/// about the passed values into this state. +void +Hexagon_CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> + &Outs, + Hexagon_CCAssignFn Fn, + int NonVarArgsParams, + unsigned SretValueSize) { + unsigned NumOps = Outs.size(); + + unsigned i = 0; + // If the called function returns a small struct in registers, skip + // the first actual parameter. We do not want to pass a pointer to + // the stack location. + if (SretValueSize != 0) { + ++i; + } + + for (; i != NumOps; ++i) { + EVT ArgVT = Outs[i].VT; + ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; + if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this, + NonVarArgsParams, i+1, false)) { + dbgs() << "Call operand #" << i << " has unhandled type " + << ArgVT.getEVTString() << "\n"; + abort(); + } + } +} + +/// AnalyzeCallOperands - Same as above except it takes vectors of types +/// and argument flags. +void +Hexagon_CCState::AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs, + SmallVectorImpl<ISD::ArgFlagsTy> &Flags, + Hexagon_CCAssignFn Fn) { + unsigned NumOps = ArgVTs.size(); + for (unsigned i = 0; i != NumOps; ++i) { + EVT ArgVT = ArgVTs[i]; + ISD::ArgFlagsTy ArgFlags = Flags[i]; + if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this, -1, -1, + false)) { + dbgs() << "Call operand #" << i << " has unhandled type " + << ArgVT.getEVTString() << "\n"; + abort(); + } + } +} + +/// AnalyzeCallResult - Analyze the return values of an ISD::CALL node, +/// incorporating info about the passed values into this state. +void +Hexagon_CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins, + Hexagon_CCAssignFn Fn, + unsigned SretValueInRegs) { + + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { + EVT VT = Ins[i].VT; + ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); + if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this, -1, -1, false)) { + dbgs() << "Call result #" << i << " has unhandled type " + << VT.getEVTString() << "\n"; + abort(); + } + } +} + +/// AnalyzeCallResult - Same as above except it's specialized for calls which +/// produce a single value. +void Hexagon_CCState::AnalyzeCallResult(EVT VT, Hexagon_CCAssignFn Fn) { + if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this, -1, -1, + false)) { + dbgs() << "Call result has unhandled type " + << VT.getEVTString() << "\n"; + abort(); + } +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCallingConvLower.h b/contrib/llvm/lib/Target/Hexagon/HexagonCallingConvLower.h new file mode 100644 index 000000000000..33c83064f90b --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonCallingConvLower.h @@ -0,0 +1,188 @@ +//===-- HexagonCallingConvLower.h - Calling Conventions ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the Hexagon_CCState class, used for lowering +// and implementing calling conventions. Adapted from the target independent +// version but this handles calls to varargs functions +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_Hexagon_CODEGEN_CALLINGCONVLOWER_H +#define LLVM_Hexagon_CODEGEN_CALLINGCONVLOWER_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/ValueTypes.h" + +// +// Need to handle varargs. +// +namespace llvm { + class TargetRegisterInfo; + class TargetMachine; + class Hexagon_CCState; + class SDNode; + + +/// Hexagon_CCAssignFn - This function assigns a location for Val, updating +/// State to reflect the change. +typedef bool Hexagon_CCAssignFn(unsigned ValNo, EVT ValVT, + EVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, Hexagon_CCState &State, + int NonVarArgsParams, + int CurrentParam, + bool ForceMem); + + +/// CCState - This class holds information needed while lowering arguments and +/// return values. It captures which registers are already assigned and which +/// stack slots are used. It provides accessors to allocate these values. +class Hexagon_CCState { + CallingConv::ID CallingConv; + bool IsVarArg; + const TargetMachine &TM; + SmallVectorImpl<CCValAssign> &Locs; + LLVMContext &Context; + + unsigned StackOffset; + SmallVector<uint32_t, 16> UsedRegs; +public: + Hexagon_CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &TM, + SmallVectorImpl<CCValAssign> &locs, LLVMContext &c); + + void addLoc(const CCValAssign &V) { + Locs.push_back(V); + } + + LLVMContext &getContext() const { return Context; } + const TargetMachine &getTarget() const { return TM; } + unsigned getCallingConv() const { return CallingConv; } + bool isVarArg() const { return IsVarArg; } + + unsigned getNextStackOffset() const { return StackOffset; } + + /// isAllocated - Return true if the specified register (or an alias) is + /// allocated. + bool isAllocated(unsigned Reg) const { + return UsedRegs[Reg/32] & (1 << (Reg&31)); + } + + /// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node, + /// incorporating info about the formals into this state. + void AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins, + Hexagon_CCAssignFn Fn, unsigned SretValueInRegs); + + /// AnalyzeReturn - Analyze the returned values of an ISD::RET node, + /// incorporating info about the result values into this state. + void AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, + Hexagon_CCAssignFn Fn, unsigned SretValueInRegs); + + /// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info + /// about the passed values into this state. + void AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs, + Hexagon_CCAssignFn Fn, int NonVarArgsParams, + unsigned SretValueSize); + + /// AnalyzeCallOperands - Same as above except it takes vectors of types + /// and argument flags. + void AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs, + SmallVectorImpl<ISD::ArgFlagsTy> &Flags, + Hexagon_CCAssignFn Fn); + + /// AnalyzeCallResult - Analyze the return values of an ISD::CALL node, + /// incorporating info about the passed values into this state. + void AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins, + Hexagon_CCAssignFn Fn, unsigned SretValueInRegs); + + /// AnalyzeCallResult - Same as above except it's specialized for calls which + /// produce a single value. + void AnalyzeCallResult(EVT VT, Hexagon_CCAssignFn Fn); + + /// getFirstUnallocated - Return the first unallocated register in the set, or + /// NumRegs if they are all allocated. + unsigned getFirstUnallocated(const unsigned *Regs, unsigned NumRegs) const { + for (unsigned i = 0; i != NumRegs; ++i) + if (!isAllocated(Regs[i])) + return i; + return NumRegs; + } + + /// AllocateReg - Attempt to allocate one register. If it is not available, + /// return zero. Otherwise, return the register, marking it and any aliases + /// as allocated. + unsigned AllocateReg(unsigned Reg) { + if (isAllocated(Reg)) return 0; + MarkAllocated(Reg); + return Reg; + } + + /// Version of AllocateReg with extra register to be shadowed. + unsigned AllocateReg(unsigned Reg, unsigned ShadowReg) { + if (isAllocated(Reg)) return 0; + MarkAllocated(Reg); + MarkAllocated(ShadowReg); + return Reg; + } + + /// AllocateReg - Attempt to allocate one of the specified registers. If none + /// are available, return zero. Otherwise, return the first one available, + /// marking it and any aliases as allocated. + unsigned AllocateReg(const unsigned *Regs, unsigned NumRegs) { + unsigned FirstUnalloc = getFirstUnallocated(Regs, NumRegs); + if (FirstUnalloc == NumRegs) + return 0; // Didn't find the reg. + + // Mark the register and any aliases as allocated. + unsigned Reg = Regs[FirstUnalloc]; + MarkAllocated(Reg); + return Reg; + } + + /// Version of AllocateReg with list of registers to be shadowed. + unsigned AllocateReg(const unsigned *Regs, const unsigned *ShadowRegs, + unsigned NumRegs) { + unsigned FirstUnalloc = getFirstUnallocated(Regs, NumRegs); + if (FirstUnalloc == NumRegs) + return 0; // Didn't find the reg. + + // Mark the register and any aliases as allocated. + unsigned Reg = Regs[FirstUnalloc], ShadowReg = ShadowRegs[FirstUnalloc]; + MarkAllocated(Reg); + MarkAllocated(ShadowReg); + return Reg; + } + + /// AllocateStack - Allocate a chunk of stack space with the specified size + /// and alignment. + unsigned AllocateStack(unsigned Size, unsigned Align) { + assert(Align && ((Align-1) & Align) == 0); // Align is power of 2. + StackOffset = ((StackOffset + Align-1) & ~(Align-1)); + unsigned Result = StackOffset; + StackOffset += Size; + return Result; + } + + // HandleByVal - Allocate a stack slot large enough to pass an argument by + // value. The size and alignment information of the argument is encoded in its + // parameter attribute. + void HandleByVal(unsigned ValNo, EVT ValVT, + EVT LocVT, CCValAssign::LocInfo LocInfo, + int MinSize, int MinAlign, ISD::ArgFlagsTy ArgFlags); + +private: + /// MarkAllocated - Mark a register and all of its aliases as allocated. + void MarkAllocated(unsigned Reg); +}; + + + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp new file mode 100644 index 000000000000..dc440cb1356c --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp @@ -0,0 +1,677 @@ +//===------- HexagonCopyToCombine.cpp - Hexagon Copy-To-Combine Pass ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This pass replaces transfer instructions by combine instructions. +// We walk along a basic block and look for two combinable instructions and try +// to move them together. If we can move them next to each other we do so and +// replace them with a combine instruction. +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "hexagon-copy-combine" + +#include "llvm/PassSupport.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#include "Hexagon.h" +#include "HexagonInstrInfo.h" +#include "HexagonRegisterInfo.h" +#include "HexagonSubtarget.h" +#include "HexagonTargetMachine.h" +#include "HexagonMachineFunctionInfo.h" + +using namespace llvm; + +static +cl::opt<bool> IsCombinesDisabled("disable-merge-into-combines", + cl::Hidden, cl::ZeroOrMore, + cl::init(false), + cl::desc("Disable merging into combines")); +static +cl::opt<unsigned> +MaxNumOfInstsBetweenNewValueStoreAndTFR("max-num-inst-between-tfr-and-nv-store", + cl::Hidden, cl::init(4), + cl::desc("Maximum distance between a tfr feeding a store we " + "consider the store still to be newifiable")); + +namespace llvm { + void initializeHexagonCopyToCombinePass(PassRegistry&); +} + + +namespace { + +class HexagonCopyToCombine : public MachineFunctionPass { + const HexagonInstrInfo *TII; + const TargetRegisterInfo *TRI; + bool ShouldCombineAggressively; + + DenseSet<MachineInstr *> PotentiallyNewifiableTFR; +public: + static char ID; + + HexagonCopyToCombine() : MachineFunctionPass(ID) { + initializeHexagonCopyToCombinePass(*PassRegistry::getPassRegistry()); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + MachineFunctionPass::getAnalysisUsage(AU); + } + + const char *getPassName() const { + return "Hexagon Copy-To-Combine Pass"; + } + + virtual bool runOnMachineFunction(MachineFunction &Fn); + +private: + MachineInstr *findPairable(MachineInstr *I1, bool &DoInsertAtI1); + + void findPotentialNewifiableTFRs(MachineBasicBlock &); + + void combine(MachineInstr *I1, MachineInstr *I2, + MachineBasicBlock::iterator &MI, bool DoInsertAtI1); + + bool isSafeToMoveTogether(MachineInstr *I1, MachineInstr *I2, + unsigned I1DestReg, unsigned I2DestReg, + bool &DoInsertAtI1); + + void emitCombineRR(MachineBasicBlock::iterator &Before, unsigned DestReg, + MachineOperand &HiOperand, MachineOperand &LoOperand); + + void emitCombineRI(MachineBasicBlock::iterator &Before, unsigned DestReg, + MachineOperand &HiOperand, MachineOperand &LoOperand); + + void emitCombineIR(MachineBasicBlock::iterator &Before, unsigned DestReg, + MachineOperand &HiOperand, MachineOperand &LoOperand); + + void emitCombineII(MachineBasicBlock::iterator &Before, unsigned DestReg, + MachineOperand &HiOperand, MachineOperand &LoOperand); +}; + +} // End anonymous namespace. + +char HexagonCopyToCombine::ID = 0; + +INITIALIZE_PASS(HexagonCopyToCombine, "hexagon-copy-combine", + "Hexagon Copy-To-Combine Pass", false, false) + +static bool isCombinableInstType(MachineInstr *MI, + const HexagonInstrInfo *TII, + bool ShouldCombineAggressively) { + switch(MI->getOpcode()) { + case Hexagon::TFR: { + // A COPY instruction can be combined if its arguments are IntRegs (32bit). + assert(MI->getOperand(0).isReg() && MI->getOperand(1).isReg()); + + unsigned DestReg = MI->getOperand(0).getReg(); + unsigned SrcReg = MI->getOperand(1).getReg(); + return Hexagon::IntRegsRegClass.contains(DestReg) && + Hexagon::IntRegsRegClass.contains(SrcReg); + } + + case Hexagon::TFRI: { + // A transfer-immediate can be combined if its argument is a signed 8bit + // value. + assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm()); + unsigned DestReg = MI->getOperand(0).getReg(); + + // Only combine constant extended TFRI if we are in aggressive mode. + return Hexagon::IntRegsRegClass.contains(DestReg) && + (ShouldCombineAggressively || isInt<8>(MI->getOperand(1).getImm())); + } + + case Hexagon::TFRI_V4: { + if (!ShouldCombineAggressively) + return false; + assert(MI->getOperand(0).isReg() && MI->getOperand(1).isGlobal()); + + // Ensure that TargetFlags are MO_NO_FLAG for a global. This is a + // workaround for an ABI bug that prevents GOT relocations on combine + // instructions + if (MI->getOperand(1).getTargetFlags() != HexagonII::MO_NO_FLAG) + return false; + + unsigned DestReg = MI->getOperand(0).getReg(); + return Hexagon::IntRegsRegClass.contains(DestReg); + } + + default: + break; + } + + return false; +} + +static bool isGreaterThan8BitTFRI(MachineInstr *I) { + return I->getOpcode() == Hexagon::TFRI && + !isInt<8>(I->getOperand(1).getImm()); +} +static bool isGreaterThan6BitTFRI(MachineInstr *I) { + return I->getOpcode() == Hexagon::TFRI && + !isUInt<6>(I->getOperand(1).getImm()); +} + +/// areCombinableOperations - Returns true if the two instruction can be merge +/// into a combine (ignoring register constraints). +static bool areCombinableOperations(const TargetRegisterInfo *TRI, + MachineInstr *HighRegInst, + MachineInstr *LowRegInst) { + assert((HighRegInst->getOpcode() == Hexagon::TFR || + HighRegInst->getOpcode() == Hexagon::TFRI || + HighRegInst->getOpcode() == Hexagon::TFRI_V4) && + (LowRegInst->getOpcode() == Hexagon::TFR || + LowRegInst->getOpcode() == Hexagon::TFRI || + LowRegInst->getOpcode() == Hexagon::TFRI_V4) && + "Assume individual instructions are of a combinable type"); + + const HexagonRegisterInfo *QRI = + static_cast<const HexagonRegisterInfo *>(TRI); + + // V4 added some combine variations (mixed immediate and register source + // operands), if we are on < V4 we can only combine 2 register-to-register + // moves and 2 immediate-to-register moves. We also don't have + // constant-extenders. + if (!QRI->Subtarget.hasV4TOps()) + return HighRegInst->getOpcode() == LowRegInst->getOpcode() && + !isGreaterThan8BitTFRI(HighRegInst) && + !isGreaterThan6BitTFRI(LowRegInst); + + // There is no combine of two constant extended values. + if ((HighRegInst->getOpcode() == Hexagon::TFRI_V4 || + isGreaterThan8BitTFRI(HighRegInst)) && + (LowRegInst->getOpcode() == Hexagon::TFRI_V4 || + isGreaterThan6BitTFRI(LowRegInst))) + return false; + + return true; +} + +static bool isEvenReg(unsigned Reg) { + assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + Hexagon::IntRegsRegClass.contains(Reg)); + return (Reg - Hexagon::R0) % 2 == 0; +} + +static void removeKillInfo(MachineInstr *MI, unsigned RegNotKilled) { + for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { + MachineOperand &Op = MI->getOperand(I); + if (!Op.isReg() || Op.getReg() != RegNotKilled || !Op.isKill()) + continue; + Op.setIsKill(false); + } +} + +/// isUnsafeToMoveAcross - Returns true if it is unsafe to move a copy +/// instruction from \p UseReg to \p DestReg over the instruction \p I. +static bool isUnsafeToMoveAcross(MachineInstr *I, unsigned UseReg, + unsigned DestReg, + const TargetRegisterInfo *TRI) { + return (UseReg && (I->modifiesRegister(UseReg, TRI))) || + I->modifiesRegister(DestReg, TRI) || + I->readsRegister(DestReg, TRI) || + I->hasUnmodeledSideEffects() || + I->isInlineAsm() || I->isDebugValue(); +} + +/// isSafeToMoveTogether - Returns true if it is safe to move I1 next to I2 such +/// that the two instructions can be paired in a combine. +bool HexagonCopyToCombine::isSafeToMoveTogether(MachineInstr *I1, + MachineInstr *I2, + unsigned I1DestReg, + unsigned I2DestReg, + bool &DoInsertAtI1) { + + bool IsImmUseReg = I2->getOperand(1).isImm() || I2->getOperand(1).isGlobal(); + unsigned I2UseReg = IsImmUseReg ? 0 : I2->getOperand(1).getReg(); + + // It is not safe to move I1 and I2 into one combine if I2 has a true + // dependence on I1. + if (I2UseReg && I1->modifiesRegister(I2UseReg, TRI)) + return false; + + bool isSafe = true; + + // First try to move I2 towards I1. + { + // A reverse_iterator instantiated like below starts before I2, and I1 + // respectively. + // Look at instructions I in between I2 and (excluding) I1. + MachineBasicBlock::reverse_iterator I(I2), + End = --(MachineBasicBlock::reverse_iterator(I1)); + // At 03 we got better results (dhrystone!) by being more conservative. + if (!ShouldCombineAggressively) + End = MachineBasicBlock::reverse_iterator(I1); + // If I2 kills its operand and we move I2 over an instruction that also + // uses I2's use reg we need to modify that (first) instruction to now kill + // this reg. + unsigned KilledOperand = 0; + if (I2->killsRegister(I2UseReg)) + KilledOperand = I2UseReg; + MachineInstr *KillingInstr = 0; + + for (; I != End; ++I) { + // If the intervening instruction I: + // * modifies I2's use reg + // * modifies I2's def reg + // * reads I2's def reg + // * or has unmodelled side effects + // we can't move I2 across it. + if (isUnsafeToMoveAcross(&*I, I2UseReg, I2DestReg, TRI)) { + isSafe = false; + break; + } + + // Update first use of the killed operand. + if (!KillingInstr && KilledOperand && + I->readsRegister(KilledOperand, TRI)) + KillingInstr = &*I; + } + if (isSafe) { + // Update the intermediate instruction to with the kill flag. + if (KillingInstr) { + bool Added = KillingInstr->addRegisterKilled(KilledOperand, TRI, true); + (void)Added; // supress compiler warning + assert(Added && "Must successfully update kill flag"); + removeKillInfo(I2, KilledOperand); + } + DoInsertAtI1 = true; + return true; + } + } + + // Try to move I1 towards I2. + { + // Look at instructions I in between I1 and (excluding) I2. + MachineBasicBlock::iterator I(I1), End(I2); + // At O3 we got better results (dhrystone) by being more conservative here. + if (!ShouldCombineAggressively) + End = llvm::next(MachineBasicBlock::iterator(I2)); + IsImmUseReg = I1->getOperand(1).isImm() || I1->getOperand(1).isGlobal(); + unsigned I1UseReg = IsImmUseReg ? 0 : I1->getOperand(1).getReg(); + // Track killed operands. If we move across an instruction that kills our + // operand, we need to update the kill information on the moved I1. It kills + // the operand now. + MachineInstr *KillingInstr = 0; + unsigned KilledOperand = 0; + + while(++I != End) { + // If the intervening instruction I: + // * modifies I1's use reg + // * modifies I1's def reg + // * reads I1's def reg + // * or has unmodelled side effects + // We introduce this special case because llvm has no api to remove a + // kill flag for a register (a removeRegisterKilled() analogous to + // addRegisterKilled) that handles aliased register correctly. + // * or has a killed aliased register use of I1's use reg + // %D4<def> = TFRI64 16 + // %R6<def> = TFR %R9 + // %R8<def> = KILL %R8, %D4<imp-use,kill> + // If we want to move R6 = across the KILL instruction we would have + // to remove the %D4<imp-use,kill> operand. For now, we are + // conservative and disallow the move. + // we can't move I1 across it. + if (isUnsafeToMoveAcross(I, I1UseReg, I1DestReg, TRI) || + // Check for an aliased register kill. Bail out if we see one. + (!I->killsRegister(I1UseReg) && I->killsRegister(I1UseReg, TRI))) + return false; + + // Check for an exact kill (registers match). + if (I1UseReg && I->killsRegister(I1UseReg)) { + assert(KillingInstr == 0 && "Should only see one killing instruction"); + KilledOperand = I1UseReg; + KillingInstr = &*I; + } + } + if (KillingInstr) { + removeKillInfo(KillingInstr, KilledOperand); + // Update I1 to set the kill flag. This flag will later be picked up by + // the new COMBINE instruction. + bool Added = I1->addRegisterKilled(KilledOperand, TRI); + (void)Added; // supress compiler warning + assert(Added && "Must successfully update kill flag"); + } + DoInsertAtI1 = false; + } + + return true; +} + +/// findPotentialNewifiableTFRs - Finds tranfers that feed stores that could be +/// newified. (A use of a 64 bit register define can not be newified) +void +HexagonCopyToCombine::findPotentialNewifiableTFRs(MachineBasicBlock &BB) { + DenseMap<unsigned, MachineInstr *> LastDef; + for (MachineBasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) { + MachineInstr *MI = I; + // Mark TFRs that feed a potential new value store as such. + if(TII->mayBeNewStore(MI)) { + // Look for uses of TFR instructions. + for (unsigned OpdIdx = 0, OpdE = MI->getNumOperands(); OpdIdx != OpdE; + ++OpdIdx) { + MachineOperand &Op = MI->getOperand(OpdIdx); + + // Skip over anything except register uses. + if (!Op.isReg() || !Op.isUse() || !Op.getReg()) + continue; + + // Look for the defining instruction. + unsigned Reg = Op.getReg(); + MachineInstr *DefInst = LastDef[Reg]; + if (!DefInst) + continue; + if (!isCombinableInstType(DefInst, TII, ShouldCombineAggressively)) + continue; + + // Only close newifiable stores should influence the decision. + MachineBasicBlock::iterator It(DefInst); + unsigned NumInstsToDef = 0; + while (&*It++ != MI) + ++NumInstsToDef; + + if (NumInstsToDef > MaxNumOfInstsBetweenNewValueStoreAndTFR) + continue; + + PotentiallyNewifiableTFR.insert(DefInst); + } + // Skip to next instruction. + continue; + } + + // Put instructions that last defined integer or double registers into the + // map. + for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { + MachineOperand &Op = MI->getOperand(I); + if (!Op.isReg() || !Op.isDef() || !Op.getReg()) + continue; + unsigned Reg = Op.getReg(); + if (Hexagon::DoubleRegsRegClass.contains(Reg)) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + LastDef[*SubRegs] = MI; + } + } else if (Hexagon::IntRegsRegClass.contains(Reg)) + LastDef[Reg] = MI; + } + } +} + +bool HexagonCopyToCombine::runOnMachineFunction(MachineFunction &MF) { + + if (IsCombinesDisabled) return false; + + bool HasChanged = false; + + // Get target info. + TRI = MF.getTarget().getRegisterInfo(); + TII = static_cast<const HexagonInstrInfo *>(MF.getTarget().getInstrInfo()); + + // Combine aggressively (for code size) + ShouldCombineAggressively = + MF.getTarget().getOptLevel() <= CodeGenOpt::Default; + + // Traverse basic blocks. + for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; + ++BI) { + PotentiallyNewifiableTFR.clear(); + findPotentialNewifiableTFRs(*BI); + + // Traverse instructions in basic block. + for(MachineBasicBlock::iterator MI = BI->begin(), End = BI->end(); + MI != End;) { + MachineInstr *I1 = MI++; + // Don't combine a TFR whose user could be newified (instructions that + // define double registers can not be newified - Programmer's Ref Manual + // 5.4.2 New-value stores). + if (ShouldCombineAggressively && PotentiallyNewifiableTFR.count(I1)) + continue; + + // Ignore instructions that are not combinable. + if (!isCombinableInstType(I1, TII, ShouldCombineAggressively)) + continue; + + // Find a second instruction that can be merged into a combine + // instruction. + bool DoInsertAtI1 = false; + MachineInstr *I2 = findPairable(I1, DoInsertAtI1); + if (I2) { + HasChanged = true; + combine(I1, I2, MI, DoInsertAtI1); + } + } + } + + return HasChanged; +} + +/// findPairable - Returns an instruction that can be merged with \p I1 into a +/// COMBINE instruction or 0 if no such instruction can be found. Returns true +/// in \p DoInsertAtI1 if the combine must be inserted at instruction \p I1 +/// false if the combine must be inserted at the returned instruction. +MachineInstr *HexagonCopyToCombine::findPairable(MachineInstr *I1, + bool &DoInsertAtI1) { + MachineBasicBlock::iterator I2 = llvm::next(MachineBasicBlock::iterator(I1)); + unsigned I1DestReg = I1->getOperand(0).getReg(); + + for (MachineBasicBlock::iterator End = I1->getParent()->end(); I2 != End; + ++I2) { + // Bail out early if we see a second definition of I1DestReg. + if (I2->modifiesRegister(I1DestReg, TRI)) + break; + + // Ignore non-combinable instructions. + if (!isCombinableInstType(I2, TII, ShouldCombineAggressively)) + continue; + + // Don't combine a TFR whose user could be newified. + if (ShouldCombineAggressively && PotentiallyNewifiableTFR.count(I2)) + continue; + + unsigned I2DestReg = I2->getOperand(0).getReg(); + + // Check that registers are adjacent and that the first destination register + // is even. + bool IsI1LowReg = (I2DestReg - I1DestReg) == 1; + bool IsI2LowReg = (I1DestReg - I2DestReg) == 1; + unsigned FirstRegIndex = IsI1LowReg ? I1DestReg : I2DestReg; + if ((!IsI1LowReg && !IsI2LowReg) || !isEvenReg(FirstRegIndex)) + continue; + + // Check that the two instructions are combinable. V4 allows more + // instructions to be merged into a combine. + // The order matters because in a TFRI we might can encode a int8 as the + // hi reg operand but only a uint6 as the low reg operand. + if ((IsI2LowReg && !areCombinableOperations(TRI, I1, I2)) || + (IsI1LowReg && !areCombinableOperations(TRI, I2, I1))) + break; + + if (isSafeToMoveTogether(I1, I2, I1DestReg, I2DestReg, + DoInsertAtI1)) + return I2; + + // Not safe. Stop searching. + break; + } + return 0; +} + +void HexagonCopyToCombine::combine(MachineInstr *I1, MachineInstr *I2, + MachineBasicBlock::iterator &MI, + bool DoInsertAtI1) { + // We are going to delete I2. If MI points to I2 advance it to the next + // instruction. + if ((MachineInstr *)MI == I2) ++MI; + + // Figure out whether I1 or I2 goes into the lowreg part. + unsigned I1DestReg = I1->getOperand(0).getReg(); + unsigned I2DestReg = I2->getOperand(0).getReg(); + bool IsI1Loreg = (I2DestReg - I1DestReg) == 1; + unsigned LoRegDef = IsI1Loreg ? I1DestReg : I2DestReg; + + // Get the double word register. + unsigned DoubleRegDest = + TRI->getMatchingSuperReg(LoRegDef, Hexagon::subreg_loreg, + &Hexagon::DoubleRegsRegClass); + assert(DoubleRegDest != 0 && "Expect a valid register"); + + + // Setup source operands. + MachineOperand &LoOperand = IsI1Loreg ? I1->getOperand(1) : + I2->getOperand(1); + MachineOperand &HiOperand = IsI1Loreg ? I2->getOperand(1) : + I1->getOperand(1); + + // Figure out which source is a register and which a constant. + bool IsHiReg = HiOperand.isReg(); + bool IsLoReg = LoOperand.isReg(); + + MachineBasicBlock::iterator InsertPt(DoInsertAtI1 ? I1 : I2); + // Emit combine. + if (IsHiReg && IsLoReg) + emitCombineRR(InsertPt, DoubleRegDest, HiOperand, LoOperand); + else if (IsHiReg) + emitCombineRI(InsertPt, DoubleRegDest, HiOperand, LoOperand); + else if (IsLoReg) + emitCombineIR(InsertPt, DoubleRegDest, HiOperand, LoOperand); + else + emitCombineII(InsertPt, DoubleRegDest, HiOperand, LoOperand); + + I1->eraseFromParent(); + I2->eraseFromParent(); +} + +void HexagonCopyToCombine::emitCombineII(MachineBasicBlock::iterator &InsertPt, + unsigned DoubleDestReg, + MachineOperand &HiOperand, + MachineOperand &LoOperand) { + DebugLoc DL = InsertPt->getDebugLoc(); + MachineBasicBlock *BB = InsertPt->getParent(); + + // Handle globals. + if (HiOperand.isGlobal()) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_Ii), DoubleDestReg) + .addGlobalAddress(HiOperand.getGlobal(), HiOperand.getOffset(), + HiOperand.getTargetFlags()) + .addImm(LoOperand.getImm()); + return; + } + if (LoOperand.isGlobal()) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_iI_V4), DoubleDestReg) + .addImm(HiOperand.getImm()) + .addGlobalAddress(LoOperand.getGlobal(), LoOperand.getOffset(), + LoOperand.getTargetFlags()); + return; + } + + // Handle constant extended immediates. + if (!isInt<8>(HiOperand.getImm())) { + assert(isInt<8>(LoOperand.getImm())); + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_Ii), DoubleDestReg) + .addImm(HiOperand.getImm()) + .addImm(LoOperand.getImm()); + return; + } + + if (!isUInt<6>(LoOperand.getImm())) { + assert(isInt<8>(HiOperand.getImm())); + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_iI_V4), DoubleDestReg) + .addImm(HiOperand.getImm()) + .addImm(LoOperand.getImm()); + return; + } + + // Insert new combine instruction. + // DoubleRegDest = combine #HiImm, #LoImm + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_Ii), DoubleDestReg) + .addImm(HiOperand.getImm()) + .addImm(LoOperand.getImm()); +} + +void HexagonCopyToCombine::emitCombineIR(MachineBasicBlock::iterator &InsertPt, + unsigned DoubleDestReg, + MachineOperand &HiOperand, + MachineOperand &LoOperand) { + unsigned LoReg = LoOperand.getReg(); + unsigned LoRegKillFlag = getKillRegState(LoOperand.isKill()); + + DebugLoc DL = InsertPt->getDebugLoc(); + MachineBasicBlock *BB = InsertPt->getParent(); + + // Handle global. + if (HiOperand.isGlobal()) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_Ir_V4), DoubleDestReg) + .addGlobalAddress(HiOperand.getGlobal(), HiOperand.getOffset(), + HiOperand.getTargetFlags()) + .addReg(LoReg, LoRegKillFlag); + return; + } + // Insert new combine instruction. + // DoubleRegDest = combine #HiImm, LoReg + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_Ir_V4), DoubleDestReg) + .addImm(HiOperand.getImm()) + .addReg(LoReg, LoRegKillFlag); +} + +void HexagonCopyToCombine::emitCombineRI(MachineBasicBlock::iterator &InsertPt, + unsigned DoubleDestReg, + MachineOperand &HiOperand, + MachineOperand &LoOperand) { + unsigned HiRegKillFlag = getKillRegState(HiOperand.isKill()); + unsigned HiReg = HiOperand.getReg(); + + DebugLoc DL = InsertPt->getDebugLoc(); + MachineBasicBlock *BB = InsertPt->getParent(); + + // Handle global. + if (LoOperand.isGlobal()) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_rI_V4), DoubleDestReg) + .addReg(HiReg, HiRegKillFlag) + .addGlobalAddress(LoOperand.getGlobal(), LoOperand.getOffset(), + LoOperand.getTargetFlags()); + return; + } + + // Insert new combine instruction. + // DoubleRegDest = combine HiReg, #LoImm + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_rI_V4), DoubleDestReg) + .addReg(HiReg, HiRegKillFlag) + .addImm(LoOperand.getImm()); +} + +void HexagonCopyToCombine::emitCombineRR(MachineBasicBlock::iterator &InsertPt, + unsigned DoubleDestReg, + MachineOperand &HiOperand, + MachineOperand &LoOperand) { + unsigned LoRegKillFlag = getKillRegState(LoOperand.isKill()); + unsigned HiRegKillFlag = getKillRegState(HiOperand.isKill()); + unsigned LoReg = LoOperand.getReg(); + unsigned HiReg = HiOperand.getReg(); + + DebugLoc DL = InsertPt->getDebugLoc(); + MachineBasicBlock *BB = InsertPt->getParent(); + + // Insert new combine instruction. + // DoubleRegDest = combine HiReg, LoReg + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_rr), DoubleDestReg) + .addReg(HiReg, HiRegKillFlag) + .addReg(LoReg, LoRegKillFlag); +} + +FunctionPass *llvm::createHexagonCopyToCombine() { + return new HexagonCopyToCombine(); +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp new file mode 100644 index 000000000000..8a5991fbc4f2 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp @@ -0,0 +1,201 @@ +//===-- HexagonExpandPredSpillCode.cpp - Expand Predicate Spill Code ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// The Hexagon processor has no instructions that load or store predicate +// registers directly. So, when these registers must be spilled a general +// purpose register must be found and the value copied to/from it from/to +// the predicate register. This code currently does not use the register +// scavenger mechanism available in the allocator. There are two registers +// reserved to allow spilling/restoring predicate registers. One is used to +// hold the predicate value. The other is used when stack frame offsets are +// too large. +// +//===----------------------------------------------------------------------===// + +#include "Hexagon.h" +#include "HexagonMachineFunctionInfo.h" +#include "HexagonSubtarget.h" +#include "HexagonTargetMachine.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LatencyPriorityQueue.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + + +namespace llvm { + void initializeHexagonExpandPredSpillCodePass(PassRegistry&); +} + + +namespace { + +class HexagonExpandPredSpillCode : public MachineFunctionPass { + const HexagonTargetMachine& QTM; + const HexagonSubtarget &QST; + + public: + static char ID; + HexagonExpandPredSpillCode(const HexagonTargetMachine& TM) : + MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) { + PassRegistry &Registry = *PassRegistry::getPassRegistry(); + initializeHexagonExpandPredSpillCodePass(Registry); + } + + const char *getPassName() const { + return "Hexagon Expand Predicate Spill Code"; + } + bool runOnMachineFunction(MachineFunction &Fn); +}; + + +char HexagonExpandPredSpillCode::ID = 0; + + +bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) { + + const HexagonInstrInfo *TII = QTM.getInstrInfo(); + + // Loop over all of the basic blocks. + for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end(); + MBBb != MBBe; ++MBBb) { + MachineBasicBlock* MBB = MBBb; + // Traverse the basic block. + for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end(); + ++MII) { + MachineInstr *MI = MII; + int Opc = MI->getOpcode(); + if (Opc == Hexagon::STriw_pred) { + // STriw_pred [R30], ofst, SrcReg; + unsigned FP = MI->getOperand(0).getReg(); + assert(FP == QTM.getRegisterInfo()->getFrameRegister() && + "Not a Frame Pointer, Nor a Spill Slot"); + assert(MI->getOperand(1).isImm() && "Not an offset"); + int Offset = MI->getOperand(1).getImm(); + int SrcReg = MI->getOperand(2).getReg(); + assert(Hexagon::PredRegsRegClass.contains(SrcReg) && + "Not a predicate register"); + if (!TII->isValidOffset(Hexagon::STriw_indexed, Offset)) { + if (!TII->isValidOffset(Hexagon::ADD_ri, Offset)) { + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::CONST32_Int_Real), + HEXAGON_RESERVED_REG_1).addImm(Offset); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_rr), + HEXAGON_RESERVED_REG_1) + .addReg(FP).addReg(HEXAGON_RESERVED_REG_1); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_RsPd), + HEXAGON_RESERVED_REG_2).addReg(SrcReg); + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::STriw_indexed)) + .addReg(HEXAGON_RESERVED_REG_1) + .addImm(0).addReg(HEXAGON_RESERVED_REG_2); + } else { + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_ri), + HEXAGON_RESERVED_REG_1).addReg(FP).addImm(Offset); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_RsPd), + HEXAGON_RESERVED_REG_2).addReg(SrcReg); + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::STriw_indexed)) + .addReg(HEXAGON_RESERVED_REG_1) + .addImm(0) + .addReg(HEXAGON_RESERVED_REG_2); + } + } else { + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_RsPd), + HEXAGON_RESERVED_REG_2).addReg(SrcReg); + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::STriw_indexed)). + addReg(FP).addImm(Offset).addReg(HEXAGON_RESERVED_REG_2); + } + MII = MBB->erase(MI); + --MII; + } else if (Opc == Hexagon::LDriw_pred) { + // DstReg = LDriw_pred [R30], ofst. + int DstReg = MI->getOperand(0).getReg(); + assert(Hexagon::PredRegsRegClass.contains(DstReg) && + "Not a predicate register"); + unsigned FP = MI->getOperand(1).getReg(); + assert(FP == QTM.getRegisterInfo()->getFrameRegister() && + "Not a Frame Pointer, Nor a Spill Slot"); + assert(MI->getOperand(2).isImm() && "Not an offset"); + int Offset = MI->getOperand(2).getImm(); + if (!TII->isValidOffset(Hexagon::LDriw, Offset)) { + if (!TII->isValidOffset(Hexagon::ADD_ri, Offset)) { + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::CONST32_Int_Real), + HEXAGON_RESERVED_REG_1).addImm(Offset); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_rr), + HEXAGON_RESERVED_REG_1) + .addReg(FP) + .addReg(HEXAGON_RESERVED_REG_1); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::LDriw), + HEXAGON_RESERVED_REG_2) + .addReg(HEXAGON_RESERVED_REG_1) + .addImm(0); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_PdRs), + DstReg).addReg(HEXAGON_RESERVED_REG_2); + } else { + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_ri), + HEXAGON_RESERVED_REG_1).addReg(FP).addImm(Offset); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::LDriw), + HEXAGON_RESERVED_REG_2) + .addReg(HEXAGON_RESERVED_REG_1) + .addImm(0); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_PdRs), + DstReg).addReg(HEXAGON_RESERVED_REG_2); + } + } else { + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::LDriw), + HEXAGON_RESERVED_REG_2).addReg(FP).addImm(Offset); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_PdRs), + DstReg).addReg(HEXAGON_RESERVED_REG_2); + } + MII = MBB->erase(MI); + --MII; + } + } + } + + return true; +} + +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +static void initializePassOnce(PassRegistry &Registry) { + const char *Name = "Hexagon Expand Predicate Spill Code"; + PassInfo *PI = new PassInfo(Name, "hexagon-spill-pred", + &HexagonExpandPredSpillCode::ID, + 0, false, false); + Registry.registerPass(*PI, true); +} + +void llvm::initializeHexagonExpandPredSpillCodePass(PassRegistry &Registry) { + CALL_ONCE_INITIALIZATION(initializePassOnce) +} + +FunctionPass* +llvm::createHexagonExpandPredSpillCode(const HexagonTargetMachine &TM) { + return new HexagonExpandPredSpillCode(TM); +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonFixupHwLoops.cpp new file mode 100644 index 000000000000..240cc9566648 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonFixupHwLoops.cpp @@ -0,0 +1,183 @@ +//===---- HexagonFixupHwLoops.cpp - Fixup HW loops too far from LOOPn. ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// The loop start address in the LOOPn instruction is encoded as a distance +// from the LOOPn instruction itself. If the start address is too far from +// the LOOPn instruction, the loop needs to be set up manually, i.e. via +// direct transfers to SAn and LCn. +// This pass will identify and convert such LOOPn instructions to a proper +// form. +//===----------------------------------------------------------------------===// + + +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/PassSupport.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "Hexagon.h" +#include "HexagonTargetMachine.h" + +using namespace llvm; + +namespace llvm { + void initializeHexagonFixupHwLoopsPass(PassRegistry&); +} + +namespace { + struct HexagonFixupHwLoops : public MachineFunctionPass { + public: + static char ID; + + HexagonFixupHwLoops() : MachineFunctionPass(ID) { + initializeHexagonFixupHwLoopsPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const { return "Hexagon Hardware Loop Fixup"; } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + /// \brief Maximum distance between the loop instr and the basic block. + /// Just an estimate. + static const unsigned MAX_LOOP_DISTANCE = 200; + + /// \brief Check the offset between each loop instruction and + /// the loop basic block to determine if we can use the LOOP instruction + /// or if we need to set the LC/SA registers explicitly. + bool fixupLoopInstrs(MachineFunction &MF); + + /// \brief Add the instruction to set the LC and SA registers explicitly. + void convertLoopInstr(MachineFunction &MF, + MachineBasicBlock::iterator &MII, + RegScavenger &RS); + + }; + + char HexagonFixupHwLoops::ID = 0; +} + +INITIALIZE_PASS(HexagonFixupHwLoops, "hwloopsfixup", + "Hexagon Hardware Loops Fixup", false, false) + +FunctionPass *llvm::createHexagonFixupHwLoops() { + return new HexagonFixupHwLoops(); +} + + +/// \brief Returns true if the instruction is a hardware loop instruction. +static bool isHardwareLoop(const MachineInstr *MI) { + return MI->getOpcode() == Hexagon::LOOP0_r || + MI->getOpcode() == Hexagon::LOOP0_i; +} + + +bool HexagonFixupHwLoops::runOnMachineFunction(MachineFunction &MF) { + bool Changed = fixupLoopInstrs(MF); + return Changed; +} + + +/// \brief For Hexagon, if the loop label is to far from the +/// loop instruction then we need to set the LC0 and SA0 registers +/// explicitly instead of using LOOP(start,count). This function +/// checks the distance, and generates register assignments if needed. +/// +/// This function makes two passes over the basic blocks. The first +/// pass computes the offset of the basic block from the start. +/// The second pass checks all the loop instructions. +bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) { + + // Offset of the current instruction from the start. + unsigned InstOffset = 0; + // Map for each basic block to it's first instruction. + DenseMap<MachineBasicBlock*, unsigned> BlockToInstOffset; + + // First pass - compute the offset of each basic block. + for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end(); + MBB != MBBe; ++MBB) { + BlockToInstOffset[MBB] = InstOffset; + InstOffset += (MBB->size() * 4); + } + + // Second pass - check each loop instruction to see if it needs to + // be converted. + InstOffset = 0; + bool Changed = false; + RegScavenger RS; + + // Loop over all the basic blocks. + for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end(); + MBB != MBBe; ++MBB) { + InstOffset = BlockToInstOffset[MBB]; + RS.enterBasicBlock(MBB); + + // Loop over all the instructions. + MachineBasicBlock::iterator MIE = MBB->end(); + MachineBasicBlock::iterator MII = MBB->begin(); + while (MII != MIE) { + if (isHardwareLoop(MII)) { + RS.forward(MII); + assert(MII->getOperand(0).isMBB() && + "Expect a basic block as loop operand"); + int Sub = InstOffset - BlockToInstOffset[MII->getOperand(0).getMBB()]; + unsigned Dist = Sub > 0 ? Sub : -Sub; + if (Dist > MAX_LOOP_DISTANCE) { + // Convert to explicity setting LC0 and SA0. + convertLoopInstr(MF, MII, RS); + MII = MBB->erase(MII); + Changed = true; + } else { + ++MII; + } + } else { + ++MII; + } + InstOffset += 4; + } + } + + return Changed; +} + + +/// \brief convert a loop instruction to a sequence of instructions that +/// set the LC0 and SA0 register explicitly. +void HexagonFixupHwLoops::convertLoopInstr(MachineFunction &MF, + MachineBasicBlock::iterator &MII, + RegScavenger &RS) { + const TargetInstrInfo *TII = MF.getTarget().getInstrInfo(); + MachineBasicBlock *MBB = MII->getParent(); + DebugLoc DL = MII->getDebugLoc(); + unsigned Scratch = RS.scavengeRegister(&Hexagon::IntRegsRegClass, MII, 0); + + // First, set the LC0 with the trip count. + if (MII->getOperand(1).isReg()) { + // Trip count is a register + BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0) + .addReg(MII->getOperand(1).getReg()); + } else { + // Trip count is an immediate. + BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFRI), Scratch) + .addImm(MII->getOperand(1).getImm()); + BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0) + .addReg(Scratch); + } + // Then, set the SA0 with the loop start address. + BuildMI(*MBB, MII, DL, TII->get(Hexagon::CONST32_Label), Scratch) + .addMBB(MII->getOperand(0).getMBB()); + BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::SA0) + .addReg(Scratch); +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp new file mode 100644 index 000000000000..2b04f25dd677 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -0,0 +1,346 @@ +//===-- HexagonFrameLowering.cpp - Define frame lowering ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// +//===----------------------------------------------------------------------===// + +#include "HexagonFrameLowering.h" +#include "Hexagon.h" +#include "HexagonInstrInfo.h" +#include "HexagonMachineFunctionInfo.h" +#include "HexagonRegisterInfo.h" +#include "HexagonSubtarget.h" +#include "HexagonTargetMachine.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Type.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" + +using namespace llvm; + +static cl::opt<bool> DisableDeallocRet( + "disable-hexagon-dealloc-ret", + cl::Hidden, + cl::desc("Disable Dealloc Return for Hexagon target")); + +/// determineFrameLayout - Determine the size of the frame and maximum call +/// frame size. +void HexagonFrameLowering::determineFrameLayout(MachineFunction &MF) const { + MachineFrameInfo *MFI = MF.getFrameInfo(); + + // Get the number of bytes to allocate from the FrameInfo. + unsigned FrameSize = MFI->getStackSize(); + + // Get the alignments provided by the target. + unsigned TargetAlign = MF.getTarget().getFrameLowering()->getStackAlignment(); + // Get the maximum call frame size of all the calls. + unsigned maxCallFrameSize = MFI->getMaxCallFrameSize(); + + // If we have dynamic alloca then maxCallFrameSize needs to be aligned so + // that allocations will be aligned. + if (MFI->hasVarSizedObjects()) + maxCallFrameSize = RoundUpToAlignment(maxCallFrameSize, TargetAlign); + + // Update maximum call frame size. + MFI->setMaxCallFrameSize(maxCallFrameSize); + + // Include call frame size in total. + FrameSize += maxCallFrameSize; + + // Make sure the frame is aligned. + FrameSize = RoundUpToAlignment(FrameSize, TargetAlign); + + // Update frame info. + MFI->setStackSize(FrameSize); +} + + +void HexagonFrameLowering::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineBasicBlock::iterator MBBI = MBB.begin(); + const HexagonRegisterInfo *QRI = + static_cast<const HexagonRegisterInfo *>(MF.getTarget().getRegisterInfo()); + DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + determineFrameLayout(MF); + + // Get the number of bytes to allocate from the FrameInfo. + int NumBytes = (int) MFI->getStackSize(); + + // LLVM expects allocframe not to be the first instruction in the + // basic block. + MachineBasicBlock::iterator InsertPt = MBB.begin(); + + // + // ALLOCA adjust regs. Iterate over ADJDYNALLOC nodes and change the offset. + // + HexagonMachineFunctionInfo *FuncInfo = + MF.getInfo<HexagonMachineFunctionInfo>(); + const std::vector<MachineInstr*>& AdjustRegs = + FuncInfo->getAllocaAdjustInsts(); + for (std::vector<MachineInstr*>::const_iterator i = AdjustRegs.begin(), + e = AdjustRegs.end(); + i != e; ++i) { + MachineInstr* MI = *i; + assert((MI->getOpcode() == Hexagon::ADJDYNALLOC) && + "Expected adjust alloca node"); + + MachineOperand& MO = MI->getOperand(2); + assert(MO.isImm() && "Expected immediate"); + MO.setImm(MFI->getMaxCallFrameSize()); + } + + // + // Only insert ALLOCFRAME if we need to. + // + if (hasFP(MF)) { + // Check for overflow. + // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used? + const int ALLOCFRAME_MAX = 16384; + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + + if (NumBytes >= ALLOCFRAME_MAX) { + // Emit allocframe(#0). + BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::ALLOCFRAME)).addImm(0); + + // Subtract offset from frame pointer. + BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::CONST32_Int_Real), + HEXAGON_RESERVED_REG_1).addImm(NumBytes); + BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::SUB_rr), + QRI->getStackRegister()). + addReg(QRI->getStackRegister()). + addReg(HEXAGON_RESERVED_REG_1); + } else { + BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::ALLOCFRAME)).addImm(NumBytes); + } + } +} +// Returns true if MBB has a machine instructions that indicates a tail call +// in the block. +bool HexagonFrameLowering::hasTailCall(MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + unsigned RetOpcode = MBBI->getOpcode(); + + return RetOpcode == Hexagon::TCRETURNtg || RetOpcode == Hexagon::TCRETURNtext; +} + +void HexagonFrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator MBBI = prior(MBB.end()); + DebugLoc dl = MBBI->getDebugLoc(); + // + // Only insert deallocframe if we need to. Also at -O0. See comment + // in emitPrologue above. + // + if (hasFP(MF) || MF.getTarget().getOptLevel() == CodeGenOpt::None) { + MachineBasicBlock::iterator MBBI = prior(MBB.end()); + MachineBasicBlock::iterator MBBI_end = MBB.end(); + + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + // Handle EH_RETURN. + if (MBBI->getOpcode() == Hexagon::EH_RETURN_JMPR) { + assert(MBBI->getOperand(0).isReg() && "Offset should be in register!"); + BuildMI(MBB, MBBI, dl, TII.get(Hexagon::DEALLOCFRAME)); + BuildMI(MBB, MBBI, dl, TII.get(Hexagon::ADD_rr), + Hexagon::R29).addReg(Hexagon::R29).addReg(Hexagon::R28); + return; + } + // Replace 'jumpr r31' instruction with dealloc_return for V4 and higher + // versions. + if (STI.hasV4TOps() && MBBI->getOpcode() == Hexagon::JMPret + && !DisableDeallocRet) { + // Check for RESTORE_DEALLOC_RET_JMP_V4 call. Don't emit an extra DEALLOC + // instruction if we encounter it. + MachineBasicBlock::iterator BeforeJMPR = + MBB.begin() == MBBI ? MBBI : prior(MBBI); + if (BeforeJMPR != MBBI && + BeforeJMPR->getOpcode() == Hexagon::RESTORE_DEALLOC_RET_JMP_V4) { + // Remove the JMPR node. + MBB.erase(MBBI); + return; + } + + // Add dealloc_return. + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI_end, dl, TII.get(Hexagon::DEALLOC_RET_V4)); + // Transfer the function live-out registers. + MIB->copyImplicitOps(*MBB.getParent(), &*MBBI); + // Remove the JUMPR node. + MBB.erase(MBBI); + } else { // Add deallocframe for V2 and V3, and V4 tail calls. + // Check for RESTORE_DEALLOC_BEFORE_TAILCALL_V4. We don't need an extra + // DEALLOCFRAME instruction after it. + MachineBasicBlock::iterator Term = MBB.getFirstTerminator(); + MachineBasicBlock::iterator I = + Term == MBB.begin() ? MBB.end() : prior(Term); + if (I != MBB.end() && + I->getOpcode() == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4) + return; + + BuildMI(MBB, MBBI, dl, TII.get(Hexagon::DEALLOCFRAME)); + } + } +} + +bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const HexagonMachineFunctionInfo *FuncInfo = + MF.getInfo<HexagonMachineFunctionInfo>(); + return (MFI->hasCalls() || (MFI->getStackSize() > 0) || + FuncInfo->hasClobberLR() ); +} + +static inline +unsigned uniqueSuperReg(unsigned Reg, const TargetRegisterInfo *TRI) { + MCSuperRegIterator SRI(Reg, TRI); + assert(SRI.isValid() && "Expected a superreg"); + unsigned SuperReg = *SRI; + ++SRI; + assert(!SRI.isValid() && "Expected exactly one superreg"); + return SuperReg; +} + +bool +HexagonFrameLowering::spillCalleeSavedRegisters( + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + MachineFunction *MF = MBB.getParent(); + const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo(); + + if (CSI.empty()) { + return false; + } + + // We can only schedule double loads if we spill contiguous callee-saved regs + // For instance, we cannot scheduled double-word loads if we spill r24, + // r26, and r27. + // Hexagon_TODO: We can try to double-word align odd registers for -O2 and + // above. + bool ContiguousRegs = true; + + for (unsigned i = 0; i < CSI.size(); ++i) { + unsigned Reg = CSI[i].getReg(); + + // + // Check if we can use a double-word store. + // + unsigned SuperReg = uniqueSuperReg(Reg, TRI); + bool CanUseDblStore = false; + const TargetRegisterClass* SuperRegClass = 0; + + if (ContiguousRegs && (i < CSI.size()-1)) { + unsigned SuperRegNext = uniqueSuperReg(CSI[i+1].getReg(), TRI); + SuperRegClass = TRI->getMinimalPhysRegClass(SuperReg); + CanUseDblStore = (SuperRegNext == SuperReg); + } + + + if (CanUseDblStore) { + TII.storeRegToStackSlot(MBB, MI, SuperReg, true, + CSI[i+1].getFrameIdx(), SuperRegClass, TRI); + MBB.addLiveIn(SuperReg); + ++i; + } else { + // Cannot use a double-word store. + ContiguousRegs = false; + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), RC, + TRI); + MBB.addLiveIn(Reg); + } + } + return true; +} + + +bool HexagonFrameLowering::restoreCalleeSavedRegisters( + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + + MachineFunction *MF = MBB.getParent(); + const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo(); + + if (CSI.empty()) { + return false; + } + + // We can only schedule double loads if we spill contiguous callee-saved regs + // For instance, we cannot scheduled double-word loads if we spill r24, + // r26, and r27. + // Hexagon_TODO: We can try to double-word align odd registers for -O2 and + // above. + bool ContiguousRegs = true; + + for (unsigned i = 0; i < CSI.size(); ++i) { + unsigned Reg = CSI[i].getReg(); + + // + // Check if we can use a double-word load. + // + unsigned SuperReg = uniqueSuperReg(Reg, TRI); + const TargetRegisterClass* SuperRegClass = 0; + bool CanUseDblLoad = false; + if (ContiguousRegs && (i < CSI.size()-1)) { + unsigned SuperRegNext = uniqueSuperReg(CSI[i+1].getReg(), TRI); + SuperRegClass = TRI->getMinimalPhysRegClass(SuperReg); + CanUseDblLoad = (SuperRegNext == SuperReg); + } + + + if (CanUseDblLoad) { + TII.loadRegFromStackSlot(MBB, MI, SuperReg, CSI[i+1].getFrameIdx(), + SuperRegClass, TRI); + MBB.addLiveIn(SuperReg); + ++i; + } else { + // Cannot use a double-word load. + ContiguousRegs = false; + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI); + MBB.addLiveIn(Reg); + } + } + return true; +} + +void HexagonFrameLowering:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + MachineInstr &MI = *I; + + if (MI.getOpcode() == Hexagon::ADJCALLSTACKDOWN) { + // Hexagon_TODO: add code + } else if (MI.getOpcode() == Hexagon::ADJCALLSTACKUP) { + // Hexagon_TODO: add code + } else { + llvm_unreachable("Cannot handle this call frame pseudo instruction"); + } + MBB.erase(I); +} + +int HexagonFrameLowering::getFrameIndexOffset(const MachineFunction &MF, + int FI) const { + return MF.getFrameInfo()->getObjectOffset(FI); +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h new file mode 100644 index 000000000000..a62c76aaf676 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h @@ -0,0 +1,55 @@ +//=- HexagonFrameLowering.h - Define frame lowering for Hexagon --*- C++ -*--=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef HEXAGON_FRAMEINFO_H +#define HEXAGON_FRAMEINFO_H + +#include "Hexagon.h" +#include "HexagonSubtarget.h" +#include "llvm/Target/TargetFrameLowering.h" + +namespace llvm { + +class HexagonFrameLowering : public TargetFrameLowering { +private: + const HexagonSubtarget &STI; + void determineFrameLayout(MachineFunction &MF) const; + +public: + explicit HexagonFrameLowering(const HexagonSubtarget &sti) + : TargetFrameLowering(StackGrowsDown, 8, 0), STI(sti) { + } + + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into + /// the function. + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + virtual bool + spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + + virtual bool + restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; + int getFrameIndexOffset(const MachineFunction &MF, int FI) const; + bool hasFP(const MachineFunction &MF) const; + bool hasTailCall(MachineBasicBlock &MBB) const; +}; + +} // End llvm namespace + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp new file mode 100644 index 000000000000..52d5ab2fee27 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -0,0 +1,1548 @@ +//===-- HexagonHardwareLoops.cpp - Identify and generate hardware loops ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass identifies loops where we can generate the Hexagon hardware +// loop instruction. The hardware loop can perform loop branches with a +// zero-cycle overhead. +// +// The pattern that defines the induction variable can changed depending on +// prior optimizations. For example, the IndVarSimplify phase run by 'opt' +// normalizes induction variables, and the Loop Strength Reduction pass +// run by 'llc' may also make changes to the induction variable. +// The pattern detected by this phase is due to running Strength Reduction. +// +// Criteria for hardware loops: +// - Countable loops (w/ ind. var for a trip count) +// - Assumes loops are normalized by IndVarSimplify +// - Try inner-most loops first +// - No nested hardware loops. +// - No function calls in loops. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "hwloops" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/PassSupport.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "Hexagon.h" +#include "HexagonTargetMachine.h" + +#include <algorithm> +#include <vector> + +using namespace llvm; + +#ifndef NDEBUG +static cl::opt<int> HWLoopLimit("max-hwloop", cl::Hidden, cl::init(-1)); +#endif + +STATISTIC(NumHWLoops, "Number of loops converted to hardware loops"); + +namespace llvm { + void initializeHexagonHardwareLoopsPass(PassRegistry&); +} + +namespace { + class CountValue; + struct HexagonHardwareLoops : public MachineFunctionPass { + MachineLoopInfo *MLI; + MachineRegisterInfo *MRI; + MachineDominatorTree *MDT; + const HexagonTargetMachine *TM; + const HexagonInstrInfo *TII; + const HexagonRegisterInfo *TRI; +#ifndef NDEBUG + static int Counter; +#endif + + public: + static char ID; + + HexagonHardwareLoops() : MachineFunctionPass(ID) { + initializeHexagonHardwareLoopsPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const { return "Hexagon Hardware Loops"; } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + /// Kinds of comparisons in the compare instructions. + struct Comparison { + enum Kind { + EQ = 0x01, + NE = 0x02, + L = 0x04, // Less-than property. + G = 0x08, // Greater-than property. + U = 0x40, // Unsigned property. + LTs = L, + LEs = L | EQ, + GTs = G, + GEs = G | EQ, + LTu = L | U, + LEu = L | EQ | U, + GTu = G | U, + GEu = G | EQ | U + }; + + static Kind getSwappedComparison(Kind Cmp) { + assert ((!((Cmp & L) && (Cmp & G))) && "Malformed comparison operator"); + if ((Cmp & L) || (Cmp & G)) + return (Kind)(Cmp ^ (L|G)); + return Cmp; + } + }; + + /// \brief Find the register that contains the loop controlling + /// induction variable. + /// If successful, it will return true and set the \p Reg, \p IVBump + /// and \p IVOp arguments. Otherwise it will return false. + /// The returned induction register is the register R that follows the + /// following induction pattern: + /// loop: + /// R = phi ..., [ R.next, LatchBlock ] + /// R.next = R + #bump + /// if (R.next < #N) goto loop + /// IVBump is the immediate value added to R, and IVOp is the instruction + /// "R.next = R + #bump". + bool findInductionRegister(MachineLoop *L, unsigned &Reg, + int64_t &IVBump, MachineInstr *&IVOp) const; + + /// \brief Analyze the statements in a loop to determine if the loop + /// has a computable trip count and, if so, return a value that represents + /// the trip count expression. + CountValue *getLoopTripCount(MachineLoop *L, + SmallVectorImpl<MachineInstr *> &OldInsts); + + /// \brief Return the expression that represents the number of times + /// a loop iterates. The function takes the operands that represent the + /// loop start value, loop end value, and induction value. Based upon + /// these operands, the function attempts to compute the trip count. + /// If the trip count is not directly available (as an immediate value, + /// or a register), the function will attempt to insert computation of it + /// to the loop's preheader. + CountValue *computeCount(MachineLoop *Loop, + const MachineOperand *Start, + const MachineOperand *End, + unsigned IVReg, + int64_t IVBump, + Comparison::Kind Cmp) const; + + /// \brief Return true if the instruction is not valid within a hardware + /// loop. + bool isInvalidLoopOperation(const MachineInstr *MI) const; + + /// \brief Return true if the loop contains an instruction that inhibits + /// using the hardware loop. + bool containsInvalidInstruction(MachineLoop *L) const; + + /// \brief Given a loop, check if we can convert it to a hardware loop. + /// If so, then perform the conversion and return true. + bool convertToHardwareLoop(MachineLoop *L); + + /// \brief Return true if the instruction is now dead. + bool isDead(const MachineInstr *MI, + SmallVectorImpl<MachineInstr *> &DeadPhis) const; + + /// \brief Remove the instruction if it is now dead. + void removeIfDead(MachineInstr *MI); + + /// \brief Make sure that the "bump" instruction executes before the + /// compare. We need that for the IV fixup, so that the compare + /// instruction would not use a bumped value that has not yet been + /// defined. If the instructions are out of order, try to reorder them. + bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI); + + /// \brief Get the instruction that loads an immediate value into \p R, + /// or 0 if such an instruction does not exist. + MachineInstr *defWithImmediate(unsigned R); + + /// \brief Get the immediate value referenced to by \p MO, either for + /// immediate operands, or for register operands, where the register + /// was defined with an immediate value. + int64_t getImmediate(MachineOperand &MO); + + /// \brief Reset the given machine operand to now refer to a new immediate + /// value. Assumes that the operand was already referencing an immediate + /// value, either directly, or via a register. + void setImmediate(MachineOperand &MO, int64_t Val); + + /// \brief Fix the data flow of the induction varible. + /// The desired flow is: phi ---> bump -+-> comparison-in-latch. + /// | + /// +-> back to phi + /// where "bump" is the increment of the induction variable: + /// iv = iv + #const. + /// Due to some prior code transformations, the actual flow may look + /// like this: + /// phi -+-> bump ---> back to phi + /// | + /// +-> comparison-in-latch (against upper_bound-bump), + /// i.e. the comparison that controls the loop execution may be using + /// the value of the induction variable from before the increment. + /// + /// Return true if the loop's flow is the desired one (i.e. it's + /// either been fixed, or no fixing was necessary). + /// Otherwise, return false. This can happen if the induction variable + /// couldn't be identified, or if the value in the latch's comparison + /// cannot be adjusted to reflect the post-bump value. + bool fixupInductionVariable(MachineLoop *L); + + /// \brief Given a loop, if it does not have a preheader, create one. + /// Return the block that is the preheader. + MachineBasicBlock *createPreheaderForLoop(MachineLoop *L); + }; + + char HexagonHardwareLoops::ID = 0; +#ifndef NDEBUG + int HexagonHardwareLoops::Counter = 0; +#endif + + /// \brief Abstraction for a trip count of a loop. A smaller vesrsion + /// of the MachineOperand class without the concerns of changing the + /// operand representation. + class CountValue { + public: + enum CountValueType { + CV_Register, + CV_Immediate + }; + private: + CountValueType Kind; + union Values { + struct { + unsigned Reg; + unsigned Sub; + } R; + unsigned ImmVal; + } Contents; + + public: + explicit CountValue(CountValueType t, unsigned v, unsigned u = 0) { + Kind = t; + if (Kind == CV_Register) { + Contents.R.Reg = v; + Contents.R.Sub = u; + } else { + Contents.ImmVal = v; + } + } + bool isReg() const { return Kind == CV_Register; } + bool isImm() const { return Kind == CV_Immediate; } + + unsigned getReg() const { + assert(isReg() && "Wrong CountValue accessor"); + return Contents.R.Reg; + } + unsigned getSubReg() const { + assert(isReg() && "Wrong CountValue accessor"); + return Contents.R.Sub; + } + unsigned getImm() const { + assert(isImm() && "Wrong CountValue accessor"); + return Contents.ImmVal; + } + + void print(raw_ostream &OS, const TargetMachine *TM = 0) const { + const TargetRegisterInfo *TRI = TM ? TM->getRegisterInfo() : 0; + if (isReg()) { OS << PrintReg(Contents.R.Reg, TRI, Contents.R.Sub); } + if (isImm()) { OS << Contents.ImmVal; } + } + }; +} // end anonymous namespace + + +INITIALIZE_PASS_BEGIN(HexagonHardwareLoops, "hwloops", + "Hexagon Hardware Loops", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops", + "Hexagon Hardware Loops", false, false) + + +/// \brief Returns true if the instruction is a hardware loop instruction. +static bool isHardwareLoop(const MachineInstr *MI) { + return MI->getOpcode() == Hexagon::LOOP0_r || + MI->getOpcode() == Hexagon::LOOP0_i; +} + +FunctionPass *llvm::createHexagonHardwareLoops() { + return new HexagonHardwareLoops(); +} + + +bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n"); + + bool Changed = false; + + MLI = &getAnalysis<MachineLoopInfo>(); + MRI = &MF.getRegInfo(); + MDT = &getAnalysis<MachineDominatorTree>(); + TM = static_cast<const HexagonTargetMachine*>(&MF.getTarget()); + TII = static_cast<const HexagonInstrInfo*>(TM->getInstrInfo()); + TRI = static_cast<const HexagonRegisterInfo*>(TM->getRegisterInfo()); + + for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); + I != E; ++I) { + MachineLoop *L = *I; + if (!L->getParentLoop()) + Changed |= convertToHardwareLoop(L); + } + + return Changed; +} + + +bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L, + unsigned &Reg, + int64_t &IVBump, + MachineInstr *&IVOp + ) const { + MachineBasicBlock *Header = L->getHeader(); + MachineBasicBlock *Preheader = L->getLoopPreheader(); + MachineBasicBlock *Latch = L->getLoopLatch(); + if (!Header || !Preheader || !Latch) + return false; + + // This pair represents an induction register together with an immediate + // value that will be added to it in each loop iteration. + typedef std::pair<unsigned,int64_t> RegisterBump; + + // Mapping: R.next -> (R, bump), where R, R.next and bump are derived + // from an induction operation + // R.next = R + bump + // where bump is an immediate value. + typedef std::map<unsigned,RegisterBump> InductionMap; + + InductionMap IndMap; + + typedef MachineBasicBlock::instr_iterator instr_iterator; + for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); + I != E && I->isPHI(); ++I) { + MachineInstr *Phi = &*I; + + // Have a PHI instruction. Get the operand that corresponds to the + // latch block, and see if is a result of an addition of form "reg+imm", + // where the "reg" is defined by the PHI node we are looking at. + for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) { + if (Phi->getOperand(i+1).getMBB() != Latch) + continue; + + unsigned PhiOpReg = Phi->getOperand(i).getReg(); + MachineInstr *DI = MRI->getVRegDef(PhiOpReg); + unsigned UpdOpc = DI->getOpcode(); + bool isAdd = (UpdOpc == Hexagon::ADD_ri); + + if (isAdd) { + // If the register operand to the add is the PHI we're + // looking at, this meets the induction pattern. + unsigned IndReg = DI->getOperand(1).getReg(); + if (MRI->getVRegDef(IndReg) == Phi) { + unsigned UpdReg = DI->getOperand(0).getReg(); + int64_t V = DI->getOperand(2).getImm(); + IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V))); + } + } + } // for (i) + } // for (instr) + + SmallVector<MachineOperand,2> Cond; + MachineBasicBlock *TB = 0, *FB = 0; + bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false); + if (NotAnalyzed) + return false; + + unsigned CSz = Cond.size(); + assert (CSz == 1 || CSz == 2); + unsigned PredR = Cond[CSz-1].getReg(); + + MachineInstr *PredI = MRI->getVRegDef(PredR); + if (!PredI->isCompare()) + return false; + + unsigned CmpReg1 = 0, CmpReg2 = 0; + int CmpImm = 0, CmpMask = 0; + bool CmpAnalyzed = TII->analyzeCompare(PredI, CmpReg1, CmpReg2, + CmpMask, CmpImm); + // Fail if the compare was not analyzed, or it's not comparing a register + // with an immediate value. Not checking the mask here, since we handle + // the individual compare opcodes (including CMPb) later on. + if (!CmpAnalyzed) + return false; + + // Exactly one of the input registers to the comparison should be among + // the induction registers. + InductionMap::iterator IndMapEnd = IndMap.end(); + InductionMap::iterator F = IndMapEnd; + if (CmpReg1 != 0) { + InductionMap::iterator F1 = IndMap.find(CmpReg1); + if (F1 != IndMapEnd) + F = F1; + } + if (CmpReg2 != 0) { + InductionMap::iterator F2 = IndMap.find(CmpReg2); + if (F2 != IndMapEnd) { + if (F != IndMapEnd) + return false; + F = F2; + } + } + if (F == IndMapEnd) + return false; + + Reg = F->second.first; + IVBump = F->second.second; + IVOp = MRI->getVRegDef(F->first); + return true; +} + + +/// \brief Analyze the statements in a loop to determine if the loop has +/// a computable trip count and, if so, return a value that represents +/// the trip count expression. +/// +/// This function iterates over the phi nodes in the loop to check for +/// induction variable patterns that are used in the calculation for +/// the number of time the loop is executed. +CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, + SmallVectorImpl<MachineInstr *> &OldInsts) { + MachineBasicBlock *TopMBB = L->getTopBlock(); + MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(); + assert(PI != TopMBB->pred_end() && + "Loop must have more than one incoming edge!"); + MachineBasicBlock *Backedge = *PI++; + if (PI == TopMBB->pred_end()) // dead loop? + return 0; + MachineBasicBlock *Incoming = *PI++; + if (PI != TopMBB->pred_end()) // multiple backedges? + return 0; + + // Make sure there is one incoming and one backedge and determine which + // is which. + if (L->contains(Incoming)) { + if (L->contains(Backedge)) + return 0; + std::swap(Incoming, Backedge); + } else if (!L->contains(Backedge)) + return 0; + + // Look for the cmp instruction to determine if we can get a useful trip + // count. The trip count can be either a register or an immediate. The + // location of the value depends upon the type (reg or imm). + MachineBasicBlock *Latch = L->getLoopLatch(); + if (!Latch) + return 0; + + unsigned IVReg = 0; + int64_t IVBump = 0; + MachineInstr *IVOp; + bool FoundIV = findInductionRegister(L, IVReg, IVBump, IVOp); + if (!FoundIV) + return 0; + + MachineBasicBlock *Preheader = L->getLoopPreheader(); + + MachineOperand *InitialValue = 0; + MachineInstr *IV_Phi = MRI->getVRegDef(IVReg); + for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) { + MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB(); + if (MBB == Preheader) + InitialValue = &IV_Phi->getOperand(i); + else if (MBB == Latch) + IVReg = IV_Phi->getOperand(i).getReg(); // Want IV reg after bump. + } + if (!InitialValue) + return 0; + + SmallVector<MachineOperand,2> Cond; + MachineBasicBlock *TB = 0, *FB = 0; + bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false); + if (NotAnalyzed) + return 0; + + MachineBasicBlock *Header = L->getHeader(); + // TB must be non-null. If FB is also non-null, one of them must be + // the header. Otherwise, branch to TB could be exiting the loop, and + // the fall through can go to the header. + assert (TB && "Latch block without a branch?"); + assert ((!FB || TB == Header || FB == Header) && "Branches not to header?"); + if (!TB || (FB && TB != Header && FB != Header)) + return 0; + + // Branches of form "if (!P) ..." cause HexagonInstrInfo::AnalyzeBranch + // to put imm(0), followed by P in the vector Cond. + // If TB is not the header, it means that the "not-taken" path must lead + // to the header. + bool Negated = (Cond.size() > 1) ^ (TB != Header); + unsigned PredReg = Cond[Cond.size()-1].getReg(); + MachineInstr *CondI = MRI->getVRegDef(PredReg); + unsigned CondOpc = CondI->getOpcode(); + + unsigned CmpReg1 = 0, CmpReg2 = 0; + int Mask = 0, ImmValue = 0; + bool AnalyzedCmp = TII->analyzeCompare(CondI, CmpReg1, CmpReg2, + Mask, ImmValue); + if (!AnalyzedCmp) + return 0; + + // The comparison operator type determines how we compute the loop + // trip count. + OldInsts.push_back(CondI); + OldInsts.push_back(IVOp); + + // Sadly, the following code gets information based on the position + // of the operands in the compare instruction. This has to be done + // this way, because the comparisons check for a specific relationship + // between the operands (e.g. is-less-than), rather than to find out + // what relationship the operands are in (as on PPC). + Comparison::Kind Cmp; + bool isSwapped = false; + const MachineOperand &Op1 = CondI->getOperand(1); + const MachineOperand &Op2 = CondI->getOperand(2); + const MachineOperand *EndValue = 0; + + if (Op1.isReg()) { + if (Op2.isImm() || Op1.getReg() == IVReg) + EndValue = &Op2; + else { + EndValue = &Op1; + isSwapped = true; + } + } + + if (!EndValue) + return 0; + + switch (CondOpc) { + case Hexagon::CMPEQri: + case Hexagon::CMPEQrr: + Cmp = !Negated ? Comparison::EQ : Comparison::NE; + break; + case Hexagon::CMPGTUri: + case Hexagon::CMPGTUrr: + Cmp = !Negated ? Comparison::GTu : Comparison::LEu; + break; + case Hexagon::CMPGTri: + case Hexagon::CMPGTrr: + Cmp = !Negated ? Comparison::GTs : Comparison::LEs; + break; + // Very limited support for byte/halfword compares. + case Hexagon::CMPbEQri_V4: + case Hexagon::CMPhEQri_V4: { + if (IVBump != 1) + return 0; + + int64_t InitV, EndV; + // Since the comparisons are "ri", the EndValue should be an + // immediate. Check it just in case. + assert(EndValue->isImm() && "Unrecognized latch comparison"); + EndV = EndValue->getImm(); + // Allow InitialValue to be a register defined with an immediate. + if (InitialValue->isReg()) { + if (!defWithImmediate(InitialValue->getReg())) + return 0; + InitV = getImmediate(*InitialValue); + } else { + assert(InitialValue->isImm()); + InitV = InitialValue->getImm(); + } + if (InitV >= EndV) + return 0; + if (CondOpc == Hexagon::CMPbEQri_V4) { + if (!isInt<8>(InitV) || !isInt<8>(EndV)) + return 0; + } else { // Hexagon::CMPhEQri_V4 + if (!isInt<16>(InitV) || !isInt<16>(EndV)) + return 0; + } + Cmp = !Negated ? Comparison::EQ : Comparison::NE; + break; + } + default: + return 0; + } + + if (isSwapped) + Cmp = Comparison::getSwappedComparison(Cmp); + + if (InitialValue->isReg()) { + unsigned R = InitialValue->getReg(); + MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent(); + if (!MDT->properlyDominates(DefBB, Header)) + return 0; + OldInsts.push_back(MRI->getVRegDef(R)); + } + if (EndValue->isReg()) { + unsigned R = EndValue->getReg(); + MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent(); + if (!MDT->properlyDominates(DefBB, Header)) + return 0; + } + + return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp); +} + +/// \brief Helper function that returns the expression that represents the +/// number of times a loop iterates. The function takes the operands that +/// represent the loop start value, loop end value, and induction value. +/// Based upon these operands, the function attempts to compute the trip count. +CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, + const MachineOperand *Start, + const MachineOperand *End, + unsigned IVReg, + int64_t IVBump, + Comparison::Kind Cmp) const { + // Cannot handle comparison EQ, i.e. while (A == B). + if (Cmp == Comparison::EQ) + return 0; + + // Check if either the start or end values are an assignment of an immediate. + // If so, use the immediate value rather than the register. + if (Start->isReg()) { + const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg()); + if (StartValInstr && StartValInstr->getOpcode() == Hexagon::TFRI) + Start = &StartValInstr->getOperand(1); + } + if (End->isReg()) { + const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg()); + if (EndValInstr && EndValInstr->getOpcode() == Hexagon::TFRI) + End = &EndValInstr->getOperand(1); + } + + assert (Start->isReg() || Start->isImm()); + assert (End->isReg() || End->isImm()); + + bool CmpLess = Cmp & Comparison::L; + bool CmpGreater = Cmp & Comparison::G; + bool CmpHasEqual = Cmp & Comparison::EQ; + + // Avoid certain wrap-arounds. This doesn't detect all wrap-arounds. + // If loop executes while iv is "less" with the iv value going down, then + // the iv must wrap. + if (CmpLess && IVBump < 0) + return 0; + // If loop executes while iv is "greater" with the iv value going up, then + // the iv must wrap. + if (CmpGreater && IVBump > 0) + return 0; + + if (Start->isImm() && End->isImm()) { + // Both, start and end are immediates. + int64_t StartV = Start->getImm(); + int64_t EndV = End->getImm(); + int64_t Dist = EndV - StartV; + if (Dist == 0) + return 0; + + bool Exact = (Dist % IVBump) == 0; + + if (Cmp == Comparison::NE) { + if (!Exact) + return 0; + if ((Dist < 0) ^ (IVBump < 0)) + return 0; + } + + // For comparisons that include the final value (i.e. include equality + // with the final value), we need to increase the distance by 1. + if (CmpHasEqual) + Dist = Dist > 0 ? Dist+1 : Dist-1; + + // assert (CmpLess => Dist > 0); + assert ((!CmpLess || Dist > 0) && "Loop should never iterate!"); + // assert (CmpGreater => Dist < 0); + assert ((!CmpGreater || Dist < 0) && "Loop should never iterate!"); + + // "Normalized" distance, i.e. with the bump set to +-1. + int64_t Dist1 = (IVBump > 0) ? (Dist + (IVBump-1)) / IVBump + : (-Dist + (-IVBump-1)) / (-IVBump); + assert (Dist1 > 0 && "Fishy thing. Both operands have the same sign."); + + uint64_t Count = Dist1; + + if (Count > 0xFFFFFFFFULL) + return 0; + + return new CountValue(CountValue::CV_Immediate, Count); + } + + // A general case: Start and End are some values, but the actual + // iteration count may not be available. If it is not, insert + // a computation of it into the preheader. + + // If the induction variable bump is not a power of 2, quit. + // Othwerise we'd need a general integer division. + if (!isPowerOf2_64(abs64(IVBump))) + return 0; + + MachineBasicBlock *PH = Loop->getLoopPreheader(); + assert (PH && "Should have a preheader by now"); + MachineBasicBlock::iterator InsertPos = PH->getFirstTerminator(); + DebugLoc DL = (InsertPos != PH->end()) ? InsertPos->getDebugLoc() + : DebugLoc(); + + // If Start is an immediate and End is a register, the trip count + // will be "reg - imm". Hexagon's "subtract immediate" instruction + // is actually "reg + -imm". + + // If the loop IV is going downwards, i.e. if the bump is negative, + // then the iteration count (computed as End-Start) will need to be + // negated. To avoid the negation, just swap Start and End. + if (IVBump < 0) { + std::swap(Start, End); + IVBump = -IVBump; + } + // Cmp may now have a wrong direction, e.g. LEs may now be GEs. + // Signedness, and "including equality" are preserved. + + bool RegToImm = Start->isReg() && End->isImm(); // for (reg..imm) + bool RegToReg = Start->isReg() && End->isReg(); // for (reg..reg) + + int64_t StartV = 0, EndV = 0; + if (Start->isImm()) + StartV = Start->getImm(); + if (End->isImm()) + EndV = End->getImm(); + + int64_t AdjV = 0; + // To compute the iteration count, we would need this computation: + // Count = (End - Start + (IVBump-1)) / IVBump + // or, when CmpHasEqual: + // Count = (End - Start + (IVBump-1)+1) / IVBump + // The "IVBump-1" part is the adjustment (AdjV). We can avoid + // generating an instruction specifically to add it if we can adjust + // the immediate values for Start or End. + + if (CmpHasEqual) { + // Need to add 1 to the total iteration count. + if (Start->isImm()) + StartV--; + else if (End->isImm()) + EndV++; + else + AdjV += 1; + } + + if (Cmp != Comparison::NE) { + if (Start->isImm()) + StartV -= (IVBump-1); + else if (End->isImm()) + EndV += (IVBump-1); + else + AdjV += (IVBump-1); + } + + unsigned R = 0, SR = 0; + if (Start->isReg()) { + R = Start->getReg(); + SR = Start->getSubReg(); + } else { + R = End->getReg(); + SR = End->getSubReg(); + } + const TargetRegisterClass *RC = MRI->getRegClass(R); + // Hardware loops cannot handle 64-bit registers. If it's a double + // register, it has to have a subregister. + if (!SR && RC == &Hexagon::DoubleRegsRegClass) + return 0; + const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass; + + // Compute DistR (register with the distance between Start and End). + unsigned DistR, DistSR; + + // Avoid special case, where the start value is an imm(0). + if (Start->isImm() && StartV == 0) { + DistR = End->getReg(); + DistSR = End->getSubReg(); + } else { + const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::SUB_rr) : + (RegToImm ? TII->get(Hexagon::SUB_ri) : + TII->get(Hexagon::ADD_ri)); + unsigned SubR = MRI->createVirtualRegister(IntRC); + MachineInstrBuilder SubIB = + BuildMI(*PH, InsertPos, DL, SubD, SubR); + + if (RegToReg) { + SubIB.addReg(End->getReg(), 0, End->getSubReg()) + .addReg(Start->getReg(), 0, Start->getSubReg()); + } else if (RegToImm) { + SubIB.addImm(EndV) + .addReg(Start->getReg(), 0, Start->getSubReg()); + } else { // ImmToReg + SubIB.addReg(End->getReg(), 0, End->getSubReg()) + .addImm(-StartV); + } + DistR = SubR; + DistSR = 0; + } + + // From DistR, compute AdjR (register with the adjusted distance). + unsigned AdjR, AdjSR; + + if (AdjV == 0) { + AdjR = DistR; + AdjSR = DistSR; + } else { + // Generate CountR = ADD DistR, AdjVal + unsigned AddR = MRI->createVirtualRegister(IntRC); + const MCInstrDesc &AddD = TII->get(Hexagon::ADD_ri); + BuildMI(*PH, InsertPos, DL, AddD, AddR) + .addReg(DistR, 0, DistSR) + .addImm(AdjV); + + AdjR = AddR; + AdjSR = 0; + } + + // From AdjR, compute CountR (register with the final count). + unsigned CountR, CountSR; + + if (IVBump == 1) { + CountR = AdjR; + CountSR = AdjSR; + } else { + // The IV bump is a power of two. Log_2(IV bump) is the shift amount. + unsigned Shift = Log2_32(IVBump); + + // Generate NormR = LSR DistR, Shift. + unsigned LsrR = MRI->createVirtualRegister(IntRC); + const MCInstrDesc &LsrD = TII->get(Hexagon::LSR_ri); + BuildMI(*PH, InsertPos, DL, LsrD, LsrR) + .addReg(AdjR, 0, AdjSR) + .addImm(Shift); + + CountR = LsrR; + CountSR = 0; + } + + return new CountValue(CountValue::CV_Register, CountR, CountSR); +} + + +/// \brief Return true if the operation is invalid within hardware loop. +bool HexagonHardwareLoops::isInvalidLoopOperation( + const MachineInstr *MI) const { + + // call is not allowed because the callee may use a hardware loop + if (MI->getDesc().isCall()) + return true; + + // do not allow nested hardware loops + if (isHardwareLoop(MI)) + return true; + + // check if the instruction defines a hardware loop register + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned R = MO.getReg(); + if (R == Hexagon::LC0 || R == Hexagon::LC1 || + R == Hexagon::SA0 || R == Hexagon::SA1) + return true; + } + return false; +} + + +/// \brief - Return true if the loop contains an instruction that inhibits +/// the use of the hardware loop function. +bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L) const { + const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks(); + for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { + MachineBasicBlock *MBB = Blocks[i]; + for (MachineBasicBlock::iterator + MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) { + const MachineInstr *MI = &*MII; + if (isInvalidLoopOperation(MI)) + return true; + } + } + return false; +} + + +/// \brief Returns true if the instruction is dead. This was essentially +/// copied from DeadMachineInstructionElim::isDead, but with special cases +/// for inline asm, physical registers and instructions with side effects +/// removed. +bool HexagonHardwareLoops::isDead(const MachineInstr *MI, + SmallVectorImpl<MachineInstr *> &DeadPhis) const { + // Examine each operand. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + + unsigned Reg = MO.getReg(); + if (MRI->use_nodbg_empty(Reg)) + continue; + + typedef MachineRegisterInfo::use_nodbg_iterator use_nodbg_iterator; + + // This instruction has users, but if the only user is the phi node for the + // parent block, and the only use of that phi node is this instruction, then + // this instruction is dead: both it (and the phi node) can be removed. + use_nodbg_iterator I = MRI->use_nodbg_begin(Reg); + use_nodbg_iterator End = MRI->use_nodbg_end(); + if (llvm::next(I) != End || !I.getOperand().getParent()->isPHI()) + return false; + + MachineInstr *OnePhi = I.getOperand().getParent(); + for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) { + const MachineOperand &OPO = OnePhi->getOperand(j); + if (!OPO.isReg() || !OPO.isDef()) + continue; + + unsigned OPReg = OPO.getReg(); + use_nodbg_iterator nextJ; + for (use_nodbg_iterator J = MRI->use_nodbg_begin(OPReg); + J != End; J = nextJ) { + nextJ = llvm::next(J); + MachineOperand &Use = J.getOperand(); + MachineInstr *UseMI = Use.getParent(); + + // If the phi node has a user that is not MI, bail... + if (MI != UseMI) + return false; + } + } + DeadPhis.push_back(OnePhi); + } + + // If there are no defs with uses, the instruction is dead. + return true; +} + +void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) { + // This procedure was essentially copied from DeadMachineInstructionElim. + + SmallVector<MachineInstr*, 1> DeadPhis; + if (isDead(MI, DeadPhis)) { + DEBUG(dbgs() << "HW looping will remove: " << *MI); + + // It is possible that some DBG_VALUE instructions refer to this + // instruction. Examine each def operand for such references; + // if found, mark the DBG_VALUE as undef (but don't delete it). + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + MachineRegisterInfo::use_iterator nextI; + for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg), + E = MRI->use_end(); I != E; I = nextI) { + nextI = llvm::next(I); // I is invalidated by the setReg + MachineOperand &Use = I.getOperand(); + MachineInstr *UseMI = Use.getParent(); + if (UseMI == MI) + continue; + if (Use.isDebug()) + UseMI->getOperand(0).setReg(0U); + // This may also be a "instr -> phi -> instr" case which can + // be removed too. + } + } + + MI->eraseFromParent(); + for (unsigned i = 0; i < DeadPhis.size(); ++i) + DeadPhis[i]->eraseFromParent(); + } +} + +/// \brief Check if the loop is a candidate for converting to a hardware +/// loop. If so, then perform the transformation. +/// +/// This function works on innermost loops first. A loop can be converted +/// if it is a counting loop; either a register value or an immediate. +/// +/// The code makes several assumptions about the representation of the loop +/// in llvm. +bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) { + // This is just for sanity. + assert(L->getHeader() && "Loop without a header?"); + + bool Changed = false; + // Process nested loops first. + for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) + Changed |= convertToHardwareLoop(*I); + + // If a nested loop has been converted, then we can't convert this loop. + if (Changed) + return Changed; + +#ifndef NDEBUG + // Stop trying after reaching the limit (if any). + int Limit = HWLoopLimit; + if (Limit >= 0) { + if (Counter >= HWLoopLimit) + return false; + Counter++; + } +#endif + + // Does the loop contain any invalid instructions? + if (containsInvalidInstruction(L)) + return false; + + // Is the induction variable bump feeding the latch condition? + if (!fixupInductionVariable(L)) + return false; + + MachineBasicBlock *LastMBB = L->getExitingBlock(); + // Don't generate hw loop if the loop has more than one exit. + if (LastMBB == 0) + return false; + + MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator(); + if (LastI == LastMBB->end()) + return false; + + // Ensure the loop has a preheader: the loop instruction will be + // placed there. + bool NewPreheader = false; + MachineBasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) { + Preheader = createPreheaderForLoop(L); + if (!Preheader) + return false; + NewPreheader = true; + } + MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator(); + + SmallVector<MachineInstr*, 2> OldInsts; + // Are we able to determine the trip count for the loop? + CountValue *TripCount = getLoopTripCount(L, OldInsts); + if (TripCount == 0) + return false; + + // Is the trip count available in the preheader? + if (TripCount->isReg()) { + // There will be a use of the register inserted into the preheader, + // so make sure that the register is actually defined at that point. + MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg()); + MachineBasicBlock *BBDef = TCDef->getParent(); + if (!NewPreheader) { + if (!MDT->dominates(BBDef, Preheader)) + return false; + } else { + // If we have just created a preheader, the dominator tree won't be + // aware of it. Check if the definition of the register dominates + // the header, but is not the header itself. + if (!MDT->properlyDominates(BBDef, L->getHeader())) + return false; + } + } + + // Determine the loop start. + MachineBasicBlock *LoopStart = L->getTopBlock(); + if (L->getLoopLatch() != LastMBB) { + // When the exit and latch are not the same, use the latch block as the + // start. + // The loop start address is used only after the 1st iteration, and the + // loop latch may contains instrs. that need to be executed after the + // first iteration. + LoopStart = L->getLoopLatch(); + // Make sure the latch is a successor of the exit, otherwise it won't work. + if (!LastMBB->isSuccessor(LoopStart)) + return false; + } + + // Convert the loop to a hardware loop. + DEBUG(dbgs() << "Change to hardware loop at "; L->dump()); + DebugLoc DL; + if (InsertPos != Preheader->end()) + DL = InsertPos->getDebugLoc(); + + if (TripCount->isReg()) { + // Create a copy of the loop count register. + unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); + BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg) + .addReg(TripCount->getReg(), 0, TripCount->getSubReg()); + // Add the Loop instruction to the beginning of the loop. + BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::LOOP0_r)) + .addMBB(LoopStart) + .addReg(CountReg); + } else { + assert(TripCount->isImm() && "Expecting immediate value for trip count"); + // Add the Loop immediate instruction to the beginning of the loop, + // if the immediate fits in the instructions. Otherwise, we need to + // create a new virtual register. + int64_t CountImm = TripCount->getImm(); + if (!TII->isValidOffset(Hexagon::LOOP0_i, CountImm)) { + unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); + BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::TFRI), CountReg) + .addImm(CountImm); + BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::LOOP0_r)) + .addMBB(LoopStart).addReg(CountReg); + } else + BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::LOOP0_i)) + .addMBB(LoopStart).addImm(CountImm); + } + + // Make sure the loop start always has a reference in the CFG. We need + // to create a BlockAddress operand to get this mechanism to work both the + // MachineBasicBlock and BasicBlock objects need the flag set. + LoopStart->setHasAddressTaken(); + // This line is needed to set the hasAddressTaken flag on the BasicBlock + // object. + BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock())); + + // Replace the loop branch with an endloop instruction. + DebugLoc LastIDL = LastI->getDebugLoc(); + BuildMI(*LastMBB, LastI, LastIDL, + TII->get(Hexagon::ENDLOOP0)).addMBB(LoopStart); + + // The loop ends with either: + // - a conditional branch followed by an unconditional branch, or + // - a conditional branch to the loop start. + if (LastI->getOpcode() == Hexagon::JMP_t || + LastI->getOpcode() == Hexagon::JMP_f) { + // Delete one and change/add an uncond. branch to out of the loop. + MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB(); + LastI = LastMBB->erase(LastI); + if (!L->contains(BranchTarget)) { + if (LastI != LastMBB->end()) + LastI = LastMBB->erase(LastI); + SmallVector<MachineOperand, 0> Cond; + TII->InsertBranch(*LastMBB, BranchTarget, 0, Cond, LastIDL); + } + } else { + // Conditional branch to loop start; just delete it. + LastMBB->erase(LastI); + } + delete TripCount; + + // The induction operation and the comparison may now be + // unneeded. If these are unneeded, then remove them. + for (unsigned i = 0; i < OldInsts.size(); ++i) + removeIfDead(OldInsts[i]); + + ++NumHWLoops; + return true; +} + + +bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI, + MachineInstr *CmpI) { + assert (BumpI != CmpI && "Bump and compare in the same instruction?"); + + MachineBasicBlock *BB = BumpI->getParent(); + if (CmpI->getParent() != BB) + return false; + + typedef MachineBasicBlock::instr_iterator instr_iterator; + // Check if things are in order to begin with. + for (instr_iterator I = BumpI, E = BB->instr_end(); I != E; ++I) + if (&*I == CmpI) + return true; + + // Out of order. + unsigned PredR = CmpI->getOperand(0).getReg(); + bool FoundBump = false; + instr_iterator CmpIt = CmpI, NextIt = llvm::next(CmpIt); + for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) { + MachineInstr *In = &*I; + for (unsigned i = 0, n = In->getNumOperands(); i < n; ++i) { + MachineOperand &MO = In->getOperand(i); + if (MO.isReg() && MO.isUse()) { + if (MO.getReg() == PredR) // Found an intervening use of PredR. + return false; + } + } + + if (In == BumpI) { + instr_iterator After = BumpI; + instr_iterator From = CmpI; + BB->splice(llvm::next(After), BB, From); + FoundBump = true; + break; + } + } + assert (FoundBump && "Cannot determine instruction order"); + return FoundBump; +} + + +MachineInstr *HexagonHardwareLoops::defWithImmediate(unsigned R) { + MachineInstr *DI = MRI->getVRegDef(R); + unsigned DOpc = DI->getOpcode(); + switch (DOpc) { + case Hexagon::TFRI: + case Hexagon::TFRI64: + case Hexagon::CONST32_Int_Real: + case Hexagon::CONST64_Int_Real: + return DI; + } + return 0; +} + + +int64_t HexagonHardwareLoops::getImmediate(MachineOperand &MO) { + if (MO.isImm()) + return MO.getImm(); + assert(MO.isReg()); + unsigned R = MO.getReg(); + MachineInstr *DI = defWithImmediate(R); + assert(DI && "Need an immediate operand"); + // All currently supported "define-with-immediate" instructions have the + // actual immediate value in the operand(1). + int64_t v = DI->getOperand(1).getImm(); + return v; +} + + +void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) { + if (MO.isImm()) { + MO.setImm(Val); + return; + } + + assert(MO.isReg()); + unsigned R = MO.getReg(); + MachineInstr *DI = defWithImmediate(R); + if (MRI->hasOneNonDBGUse(R)) { + // If R has only one use, then just change its defining instruction to + // the new immediate value. + DI->getOperand(1).setImm(Val); + return; + } + + const TargetRegisterClass *RC = MRI->getRegClass(R); + unsigned NewR = MRI->createVirtualRegister(RC); + MachineBasicBlock &B = *DI->getParent(); + DebugLoc DL = DI->getDebugLoc(); + BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR) + .addImm(Val); + MO.setReg(NewR); +} + + +bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { + MachineBasicBlock *Header = L->getHeader(); + MachineBasicBlock *Preheader = L->getLoopPreheader(); + MachineBasicBlock *Latch = L->getLoopLatch(); + + if (!Header || !Preheader || !Latch) + return false; + + // These data structures follow the same concept as the corresponding + // ones in findInductionRegister (where some comments are). + typedef std::pair<unsigned,int64_t> RegisterBump; + typedef std::pair<unsigned,RegisterBump> RegisterInduction; + typedef std::set<RegisterInduction> RegisterInductionSet; + + // Register candidates for induction variables, with their associated bumps. + RegisterInductionSet IndRegs; + + // Look for induction patterns: + // vreg1 = PHI ..., [ latch, vreg2 ] + // vreg2 = ADD vreg1, imm + typedef MachineBasicBlock::instr_iterator instr_iterator; + for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); + I != E && I->isPHI(); ++I) { + MachineInstr *Phi = &*I; + + // Have a PHI instruction. + for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) { + if (Phi->getOperand(i+1).getMBB() != Latch) + continue; + + unsigned PhiReg = Phi->getOperand(i).getReg(); + MachineInstr *DI = MRI->getVRegDef(PhiReg); + unsigned UpdOpc = DI->getOpcode(); + bool isAdd = (UpdOpc == Hexagon::ADD_ri); + + if (isAdd) { + // If the register operand to the add/sub is the PHI we are looking + // at, this meets the induction pattern. + unsigned IndReg = DI->getOperand(1).getReg(); + if (MRI->getVRegDef(IndReg) == Phi) { + unsigned UpdReg = DI->getOperand(0).getReg(); + int64_t V = DI->getOperand(2).getImm(); + IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V))); + } + } + } // for (i) + } // for (instr) + + if (IndRegs.empty()) + return false; + + MachineBasicBlock *TB = 0, *FB = 0; + SmallVector<MachineOperand,2> Cond; + // AnalyzeBranch returns true if it fails to analyze branch. + bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false); + if (NotAnalyzed) + return false; + + // Check if the latch branch is unconditional. + if (Cond.empty()) + return false; + + if (TB != Header && FB != Header) + // The latch does not go back to the header. Not a latch we know and love. + return false; + + // Expecting a predicate register as a condition. It won't be a hardware + // predicate register at this point yet, just a vreg. + // HexagonInstrInfo::AnalyzeBranch for negated branches inserts imm(0) + // into Cond, followed by the predicate register. For non-negated branches + // it's just the register. + unsigned CSz = Cond.size(); + if (CSz != 1 && CSz != 2) + return false; + + unsigned P = Cond[CSz-1].getReg(); + MachineInstr *PredDef = MRI->getVRegDef(P); + + if (!PredDef->isCompare()) + return false; + + SmallSet<unsigned,2> CmpRegs; + MachineOperand *CmpImmOp = 0; + + // Go over all operands to the compare and look for immediate and register + // operands. Assume that if the compare has a single register use and a + // single immediate operand, then the register is being compared with the + // immediate value. + for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) { + MachineOperand &MO = PredDef->getOperand(i); + if (MO.isReg()) { + // Skip all implicit references. In one case there was: + // %vreg140<def> = FCMPUGT32_rr %vreg138, %vreg139, %USR<imp-use> + if (MO.isImplicit()) + continue; + if (MO.isUse()) { + unsigned R = MO.getReg(); + if (!defWithImmediate(R)) { + CmpRegs.insert(MO.getReg()); + continue; + } + // Consider the register to be the "immediate" operand. + if (CmpImmOp) + return false; + CmpImmOp = &MO; + } + } else if (MO.isImm()) { + if (CmpImmOp) // A second immediate argument? Confusing. Bail out. + return false; + CmpImmOp = &MO; + } + } + + if (CmpRegs.empty()) + return false; + + // Check if the compared register follows the order we want. Fix if needed. + for (RegisterInductionSet::iterator I = IndRegs.begin(), E = IndRegs.end(); + I != E; ++I) { + // This is a success. If the register used in the comparison is one that + // we have identified as a bumped (updated) induction register, there is + // nothing to do. + if (CmpRegs.count(I->first)) + return true; + + // Otherwise, if the register being compared comes out of a PHI node, + // and has been recognized as following the induction pattern, and is + // compared against an immediate, we can fix it. + const RegisterBump &RB = I->second; + if (CmpRegs.count(RB.first)) { + if (!CmpImmOp) + return false; + + int64_t CmpImm = getImmediate(*CmpImmOp); + int64_t V = RB.second; + if (V > 0 && CmpImm+V < CmpImm) // Overflow (64-bit). + return false; + if (V < 0 && CmpImm+V > CmpImm) // Overflow (64-bit). + return false; + CmpImm += V; + // Some forms of cmp-immediate allow u9 and s10. Assume the worst case + // scenario, i.e. an 8-bit value. + if (CmpImmOp->isImm() && !isInt<8>(CmpImm)) + return false; + + // Make sure that the compare happens after the bump. Otherwise, + // after the fixup, the compare would use a yet-undefined register. + MachineInstr *BumpI = MRI->getVRegDef(I->first); + bool Order = orderBumpCompare(BumpI, PredDef); + if (!Order) + return false; + + // Finally, fix the compare instruction. + setImmediate(*CmpImmOp, CmpImm); + for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) { + MachineOperand &MO = PredDef->getOperand(i); + if (MO.isReg() && MO.getReg() == RB.first) { + MO.setReg(I->first); + return true; + } + } + } + } + + return false; +} + + +/// \brief Create a preheader for a given loop. +MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop( + MachineLoop *L) { + if (MachineBasicBlock *TmpPH = L->getLoopPreheader()) + return TmpPH; + + MachineBasicBlock *Header = L->getHeader(); + MachineBasicBlock *Latch = L->getLoopLatch(); + MachineFunction *MF = Header->getParent(); + DebugLoc DL; + + if (!Latch || Header->hasAddressTaken()) + return 0; + + typedef MachineBasicBlock::instr_iterator instr_iterator; + + // Verify that all existing predecessors have analyzable branches + // (or no branches at all). + typedef std::vector<MachineBasicBlock*> MBBVector; + MBBVector Preds(Header->pred_begin(), Header->pred_end()); + SmallVector<MachineOperand,2> Tmp1; + MachineBasicBlock *TB = 0, *FB = 0; + + if (TII->AnalyzeBranch(*Latch, TB, FB, Tmp1, false)) + return 0; + + for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) { + MachineBasicBlock *PB = *I; + if (PB != Latch) { + bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp1, false); + if (NotAnalyzed) + return 0; + } + } + + MachineBasicBlock *NewPH = MF->CreateMachineBasicBlock(); + MF->insert(Header, NewPH); + + if (Header->pred_size() > 2) { + // Ensure that the header has only two predecessors: the preheader and + // the loop latch. Any additional predecessors of the header should + // join at the newly created preheader. Inspect all PHI nodes from the + // header and create appropriate corresponding PHI nodes in the preheader. + + for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); + I != E && I->isPHI(); ++I) { + MachineInstr *PN = &*I; + + const MCInstrDesc &PD = TII->get(TargetOpcode::PHI); + MachineInstr *NewPN = MF->CreateMachineInstr(PD, DL); + NewPH->insert(NewPH->end(), NewPN); + + unsigned PR = PN->getOperand(0).getReg(); + const TargetRegisterClass *RC = MRI->getRegClass(PR); + unsigned NewPR = MRI->createVirtualRegister(RC); + NewPN->addOperand(MachineOperand::CreateReg(NewPR, true)); + + // Copy all non-latch operands of a header's PHI node to the newly + // created PHI node in the preheader. + for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) { + unsigned PredR = PN->getOperand(i).getReg(); + MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB(); + if (PredB == Latch) + continue; + + NewPN->addOperand(MachineOperand::CreateReg(PredR, false)); + NewPN->addOperand(MachineOperand::CreateMBB(PredB)); + } + + // Remove copied operands from the old PHI node and add the value + // coming from the preheader's PHI. + for (int i = PN->getNumOperands()-2; i > 0; i -= 2) { + MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB(); + if (PredB != Latch) { + PN->RemoveOperand(i+1); + PN->RemoveOperand(i); + } + } + PN->addOperand(MachineOperand::CreateReg(NewPR, false)); + PN->addOperand(MachineOperand::CreateMBB(NewPH)); + } + + } else { + assert(Header->pred_size() == 2); + + // The header has only two predecessors, but the non-latch predecessor + // is not a preheader (e.g. it has other successors, etc.) + // In such a case we don't need any extra PHI nodes in the new preheader, + // all we need is to adjust existing PHIs in the header to now refer to + // the new preheader. + for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); + I != E && I->isPHI(); ++I) { + MachineInstr *PN = &*I; + for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) { + MachineOperand &MO = PN->getOperand(i+1); + if (MO.getMBB() != Latch) + MO.setMBB(NewPH); + } + } + } + + // "Reroute" the CFG edges to link in the new preheader. + // If any of the predecessors falls through to the header, insert a branch + // to the new preheader in that place. + SmallVector<MachineOperand,1> Tmp2; + SmallVector<MachineOperand,1> EmptyCond; + + TB = FB = 0; + + for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) { + MachineBasicBlock *PB = *I; + if (PB != Latch) { + Tmp2.clear(); + bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp2, false); + (void)NotAnalyzed; // supress compiler warning + assert (!NotAnalyzed && "Should be analyzable!"); + if (TB != Header && (Tmp2.empty() || FB != Header)) + TII->InsertBranch(*PB, NewPH, 0, EmptyCond, DL); + PB->ReplaceUsesOfBlockWith(Header, NewPH); + } + } + + // It can happen that the latch block will fall through into the header. + // Insert an unconditional branch to the header. + TB = FB = 0; + bool LatchNotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Tmp2, false); + (void)LatchNotAnalyzed; // supress compiler warning + assert (!LatchNotAnalyzed && "Should be analyzable!"); + if (!TB && !FB) + TII->InsertBranch(*Latch, Header, 0, EmptyCond, DL); + + // Finally, the branch from the preheader to the header. + TII->InsertBranch(*NewPH, Header, 0, EmptyCond, DL); + NewPH->addSuccessor(Header); + + return NewPH; +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp new file mode 100644 index 000000000000..5ae93284269b --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -0,0 +1,1684 @@ +//===-- HexagonISelDAGToDAG.cpp - A dag to dag inst selector for Hexagon --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the Hexagon target. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "hexagon-isel" +#include "Hexagon.h" +#include "HexagonISelLowering.h" +#include "HexagonTargetMachine.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + +static +cl::opt<unsigned> +MaxNumOfUsesForConstExtenders("ga-max-num-uses-for-constant-extenders", + cl::Hidden, cl::init(2), + cl::desc("Maximum number of uses of a global address such that we still us a" + "constant extended instruction")); + +//===----------------------------------------------------------------------===// +// Instruction Selector Implementation +//===----------------------------------------------------------------------===// + +namespace llvm { + void initializeHexagonDAGToDAGISelPass(PassRegistry&); +} + +//===--------------------------------------------------------------------===// +/// HexagonDAGToDAGISel - Hexagon specific code to select Hexagon machine +/// instructions for SelectionDAG operations. +/// +namespace { +class HexagonDAGToDAGISel : public SelectionDAGISel { + /// Subtarget - Keep a pointer to the Hexagon Subtarget around so that we can + /// make the right decision when generating code for different targets. + const HexagonSubtarget &Subtarget; + + // Keep a reference to HexagonTargetMachine. + const HexagonTargetMachine& TM; + DenseMap<const GlobalValue *, unsigned> GlobalAddressUseCountMap; +public: + explicit HexagonDAGToDAGISel(HexagonTargetMachine &targetmachine, + CodeGenOpt::Level OptLevel) + : SelectionDAGISel(targetmachine, OptLevel), + Subtarget(targetmachine.getSubtarget<HexagonSubtarget>()), + TM(targetmachine) { + initializeHexagonDAGToDAGISelPass(*PassRegistry::getPassRegistry()); + } + bool hasNumUsesBelowThresGA(SDNode *N) const; + + SDNode *Select(SDNode *N); + + // Complex Pattern Selectors. + inline bool foldGlobalAddress(SDValue &N, SDValue &R); + inline bool foldGlobalAddressGP(SDValue &N, SDValue &R); + bool foldGlobalAddressImpl(SDValue &N, SDValue &R, bool ShouldLookForGP); + bool SelectADDRri(SDValue& N, SDValue &R1, SDValue &R2); + bool SelectADDRriS11_0(SDValue& N, SDValue &R1, SDValue &R2); + bool SelectADDRriS11_1(SDValue& N, SDValue &R1, SDValue &R2); + bool SelectADDRriS11_2(SDValue& N, SDValue &R1, SDValue &R2); + bool SelectMEMriS11_2(SDValue& Addr, SDValue &Base, SDValue &Offset); + bool SelectADDRriS11_3(SDValue& N, SDValue &R1, SDValue &R2); + bool SelectADDRrr(SDValue &Addr, SDValue &Base, SDValue &Offset); + bool SelectADDRriU6_0(SDValue& N, SDValue &R1, SDValue &R2); + bool SelectADDRriU6_1(SDValue& N, SDValue &R1, SDValue &R2); + bool SelectADDRriU6_2(SDValue& N, SDValue &R1, SDValue &R2); + + virtual const char *getPassName() const { + return "Hexagon DAG->DAG Pattern Instruction Selection"; + } + + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for + /// inline asm expressions. + virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector<SDValue> &OutOps); + bool SelectAddr(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Offset); + + SDNode *SelectLoad(SDNode *N); + SDNode *SelectBaseOffsetLoad(LoadSDNode *LD, SDLoc dl); + SDNode *SelectIndexedLoad(LoadSDNode *LD, SDLoc dl); + SDNode *SelectIndexedLoadZeroExtend64(LoadSDNode *LD, unsigned Opcode, + SDLoc dl); + SDNode *SelectIndexedLoadSignExtend64(LoadSDNode *LD, unsigned Opcode, + SDLoc dl); + SDNode *SelectBaseOffsetStore(StoreSDNode *ST, SDLoc dl); + SDNode *SelectIndexedStore(StoreSDNode *ST, SDLoc dl); + SDNode *SelectStore(SDNode *N); + SDNode *SelectSHL(SDNode *N); + SDNode *SelectSelect(SDNode *N); + SDNode *SelectTruncate(SDNode *N); + SDNode *SelectMul(SDNode *N); + SDNode *SelectZeroExtend(SDNode *N); + SDNode *SelectIntrinsicWOChain(SDNode *N); + SDNode *SelectIntrinsicWChain(SDNode *N); + SDNode *SelectConstant(SDNode *N); + SDNode *SelectConstantFP(SDNode *N); + SDNode *SelectAdd(SDNode *N); + bool isConstExtProfitable(SDNode *N) const; + +// XformMskToBitPosU5Imm - Returns the bit position which +// the single bit 32 bit mask represents. +// Used in Clr and Set bit immediate memops. +SDValue XformMskToBitPosU5Imm(uint32_t Imm) { + int32_t bitPos; + bitPos = Log2_32(Imm); + assert(bitPos >= 0 && bitPos < 32 && + "Constant out of range for 32 BitPos Memops"); + return CurDAG->getTargetConstant(bitPos, MVT::i32); +} + +// XformMskToBitPosU4Imm - Returns the bit position which the single bit 16 bit +// mask represents. Used in Clr and Set bit immediate memops. +SDValue XformMskToBitPosU4Imm(uint16_t Imm) { + return XformMskToBitPosU5Imm(Imm); +} + +// XformMskToBitPosU3Imm - Returns the bit position which the single bit 8 bit +// mask represents. Used in Clr and Set bit immediate memops. +SDValue XformMskToBitPosU3Imm(uint8_t Imm) { + return XformMskToBitPosU5Imm(Imm); +} + +// Return true if there is exactly one bit set in V, i.e., if V is one of the +// following integers: 2^0, 2^1, ..., 2^31. +bool ImmIsSingleBit(uint32_t v) const { + uint32_t c = CountPopulation_64(v); + // Only return true if we counted 1 bit. + return c == 1; +} + +// XformM5ToU5Imm - Return a target constant with the specified value, of type +// i32 where the negative literal is transformed into a positive literal for +// use in -= memops. +inline SDValue XformM5ToU5Imm(signed Imm) { + assert( (Imm >= -31 && Imm <= -1) && "Constant out of range for Memops"); + return CurDAG->getTargetConstant( - Imm, MVT::i32); +} + + +// XformU7ToU7M1Imm - Return a target constant decremented by 1, in range +// [1..128], used in cmpb.gtu instructions. +inline SDValue XformU7ToU7M1Imm(signed Imm) { + assert((Imm >= 1 && Imm <= 128) && "Constant out of range for cmpb op"); + return CurDAG->getTargetConstant(Imm - 1, MVT::i8); +} + +// XformS8ToS8M1Imm - Return a target constant decremented by 1. +inline SDValue XformSToSM1Imm(signed Imm) { + return CurDAG->getTargetConstant(Imm - 1, MVT::i32); +} + +// XformU8ToU8M1Imm - Return a target constant decremented by 1. +inline SDValue XformUToUM1Imm(unsigned Imm) { + assert((Imm >= 1) && "Cannot decrement unsigned int less than 1"); + return CurDAG->getTargetConstant(Imm - 1, MVT::i32); +} + +// Include the pieces autogenerated from the target description. +#include "HexagonGenDAGISel.inc" +}; +} // end anonymous namespace + + +/// createHexagonISelDag - This pass converts a legalized DAG into a +/// Hexagon-specific DAG, ready for instruction scheduling. +/// +FunctionPass *llvm::createHexagonISelDag(HexagonTargetMachine &TM, + CodeGenOpt::Level OptLevel) { + return new HexagonDAGToDAGISel(TM, OptLevel); +} + +static void initializePassOnce(PassRegistry &Registry) { + const char *Name = "Hexagon DAG->DAG Pattern Instruction Selection"; + PassInfo *PI = new PassInfo(Name, "hexagon-isel", + &SelectionDAGISel::ID, 0, false, false); + Registry.registerPass(*PI, true); +} + +void llvm::initializeHexagonDAGToDAGISelPass(PassRegistry &Registry) { + CALL_ONCE_INITIALIZATION(initializePassOnce) +} + + +static bool IsS11_0_Offset(SDNode * S) { + ConstantSDNode *N = cast<ConstantSDNode>(S); + + // immS16 predicate - True if the immediate fits in a 16-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isInt<11>(v); +} + + +static bool IsS11_1_Offset(SDNode * S) { + ConstantSDNode *N = cast<ConstantSDNode>(S); + + // immS16 predicate - True if the immediate fits in a 16-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<11,1>(v); +} + + +static bool IsS11_2_Offset(SDNode * S) { + ConstantSDNode *N = cast<ConstantSDNode>(S); + + // immS16 predicate - True if the immediate fits in a 16-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<11,2>(v); +} + + +static bool IsS11_3_Offset(SDNode * S) { + ConstantSDNode *N = cast<ConstantSDNode>(S); + + // immS16 predicate - True if the immediate fits in a 16-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<11,3>(v); +} + + +static bool IsU6_0_Offset(SDNode * S) { + ConstantSDNode *N = cast<ConstantSDNode>(S); + + // u6 predicate - True if the immediate fits in a 6-bit unsigned extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<6>(v); +} + + +static bool IsU6_1_Offset(SDNode * S) { + ConstantSDNode *N = cast<ConstantSDNode>(S); + + // u6 predicate - True if the immediate fits in a 6-bit unsigned extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<6,1>(v); +} + + +static bool IsU6_2_Offset(SDNode * S) { + ConstantSDNode *N = cast<ConstantSDNode>(S); + + // u6 predicate - True if the immediate fits in a 6-bit unsigned extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<6,2>(v); +} + + +// Intrinsics that return a a predicate. +static unsigned doesIntrinsicReturnPredicate(unsigned ID) +{ + switch (ID) { + default: + return 0; + case Intrinsic::hexagon_C2_cmpeq: + case Intrinsic::hexagon_C2_cmpgt: + case Intrinsic::hexagon_C2_cmpgtu: + case Intrinsic::hexagon_C2_cmpgtup: + case Intrinsic::hexagon_C2_cmpgtp: + case Intrinsic::hexagon_C2_cmpeqp: + case Intrinsic::hexagon_C2_bitsset: + case Intrinsic::hexagon_C2_bitsclr: + case Intrinsic::hexagon_C2_cmpeqi: + case Intrinsic::hexagon_C2_cmpgti: + case Intrinsic::hexagon_C2_cmpgtui: + case Intrinsic::hexagon_C2_cmpgei: + case Intrinsic::hexagon_C2_cmpgeui: + case Intrinsic::hexagon_C2_cmplt: + case Intrinsic::hexagon_C2_cmpltu: + case Intrinsic::hexagon_C2_bitsclri: + case Intrinsic::hexagon_C2_and: + case Intrinsic::hexagon_C2_or: + case Intrinsic::hexagon_C2_xor: + case Intrinsic::hexagon_C2_andn: + case Intrinsic::hexagon_C2_not: + case Intrinsic::hexagon_C2_orn: + case Intrinsic::hexagon_C2_pxfer_map: + case Intrinsic::hexagon_C2_any8: + case Intrinsic::hexagon_C2_all8: + case Intrinsic::hexagon_A2_vcmpbeq: + case Intrinsic::hexagon_A2_vcmpbgtu: + case Intrinsic::hexagon_A2_vcmpheq: + case Intrinsic::hexagon_A2_vcmphgt: + case Intrinsic::hexagon_A2_vcmphgtu: + case Intrinsic::hexagon_A2_vcmpweq: + case Intrinsic::hexagon_A2_vcmpwgt: + case Intrinsic::hexagon_A2_vcmpwgtu: + case Intrinsic::hexagon_C2_tfrrp: + case Intrinsic::hexagon_S2_tstbit_i: + case Intrinsic::hexagon_S2_tstbit_r: + return 1; + } +} + + +// Intrinsics that have predicate operands. +static unsigned doesIntrinsicContainPredicate(unsigned ID) +{ + switch (ID) { + default: + return 0; + case Intrinsic::hexagon_C2_tfrpr: + return Hexagon::TFR_RsPd; + case Intrinsic::hexagon_C2_and: + return Hexagon::AND_pp; + case Intrinsic::hexagon_C2_xor: + return Hexagon::XOR_pp; + case Intrinsic::hexagon_C2_or: + return Hexagon::OR_pp; + case Intrinsic::hexagon_C2_not: + return Hexagon::NOT_p; + case Intrinsic::hexagon_C2_any8: + return Hexagon::ANY_pp; + case Intrinsic::hexagon_C2_all8: + return Hexagon::ALL_pp; + case Intrinsic::hexagon_C2_vitpack: + return Hexagon::VITPACK_pp; + case Intrinsic::hexagon_C2_mask: + return Hexagon::MASK_p; + case Intrinsic::hexagon_C2_mux: + return Hexagon::MUX_rr; + + // Mapping hexagon_C2_muxir to MUX_pri. This is pretty weird - but + // that's how it's mapped in q6protos.h. + case Intrinsic::hexagon_C2_muxir: + return Hexagon::MUX_ri; + + // Mapping hexagon_C2_muxri to MUX_pir. This is pretty weird - but + // that's how it's mapped in q6protos.h. + case Intrinsic::hexagon_C2_muxri: + return Hexagon::MUX_ir; + + case Intrinsic::hexagon_C2_muxii: + return Hexagon::MUX_ii; + case Intrinsic::hexagon_C2_vmux: + return Hexagon::VMUX_prr64; + case Intrinsic::hexagon_S2_valignrb: + return Hexagon::VALIGN_rrp; + case Intrinsic::hexagon_S2_vsplicerb: + return Hexagon::VSPLICE_rrp; + } +} + + +static bool OffsetFitsS11(EVT MemType, int64_t Offset) { + if (MemType == MVT::i64 && isShiftedInt<11,3>(Offset)) { + return true; + } + if (MemType == MVT::i32 && isShiftedInt<11,2>(Offset)) { + return true; + } + if (MemType == MVT::i16 && isShiftedInt<11,1>(Offset)) { + return true; + } + if (MemType == MVT::i8 && isInt<11>(Offset)) { + return true; + } + return false; +} + + +// +// Try to lower loads of GlobalAdresses into base+offset loads. Custom +// lowering for GlobalAddress nodes has already turned it into a +// CONST32. +// +SDNode *HexagonDAGToDAGISel::SelectBaseOffsetLoad(LoadSDNode *LD, SDLoc dl) { + SDValue Chain = LD->getChain(); + SDNode* Const32 = LD->getBasePtr().getNode(); + unsigned Opcode = 0; + + if (Const32->getOpcode() == HexagonISD::CONST32 && + ISD::isNormalLoad(LD)) { + SDValue Base = Const32->getOperand(0); + EVT LoadedVT = LD->getMemoryVT(); + int64_t Offset = cast<GlobalAddressSDNode>(Base)->getOffset(); + if (Offset != 0 && OffsetFitsS11(LoadedVT, Offset)) { + MVT PointerTy = getTargetLowering()->getPointerTy(); + const GlobalValue* GV = + cast<GlobalAddressSDNode>(Base)->getGlobal(); + SDValue TargAddr = + CurDAG->getTargetGlobalAddress(GV, dl, PointerTy, 0); + SDNode* NewBase = CurDAG->getMachineNode(Hexagon::CONST32_set, + dl, PointerTy, + TargAddr); + // Figure out base + offset opcode + if (LoadedVT == MVT::i64) Opcode = Hexagon::LDrid_indexed; + else if (LoadedVT == MVT::i32) Opcode = Hexagon::LDriw_indexed; + else if (LoadedVT == MVT::i16) Opcode = Hexagon::LDrih_indexed; + else if (LoadedVT == MVT::i8) Opcode = Hexagon::LDrib_indexed; + else llvm_unreachable("unknown memory type"); + + // Build indexed load. + SDValue TargetConstOff = CurDAG->getTargetConstant(Offset, PointerTy); + SDNode* Result = CurDAG->getMachineNode(Opcode, dl, + LD->getValueType(0), + MVT::Other, + SDValue(NewBase,0), + TargetConstOff, + Chain); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = LD->getMemOperand(); + cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1); + ReplaceUses(LD, Result); + return Result; + } + } + + return SelectCode(LD); +} + + +SDNode *HexagonDAGToDAGISel::SelectIndexedLoadSignExtend64(LoadSDNode *LD, + unsigned Opcode, + SDLoc dl) +{ + SDValue Chain = LD->getChain(); + EVT LoadedVT = LD->getMemoryVT(); + SDValue Base = LD->getBasePtr(); + SDValue Offset = LD->getOffset(); + SDNode *OffsetNode = Offset.getNode(); + int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue(); + SDValue N1 = LD->getOperand(1); + SDValue CPTmpN1_0; + SDValue CPTmpN1_1; + + if (SelectADDRriS11_2(N1, CPTmpN1_0, CPTmpN1_1) && + N1.getNode()->getValueType(0) == MVT::i32) { + const HexagonInstrInfo *TII = + static_cast<const HexagonInstrInfo*>(TM.getInstrInfo()); + if (TII->isValidAutoIncImm(LoadedVT, Val)) { + SDValue TargetConst = CurDAG->getTargetConstant(Val, MVT::i32); + SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32, + MVT::Other, Base, TargetConst, + Chain); + SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::SXTW, dl, MVT::i64, + SDValue(Result_1, 0)); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = LD->getMemOperand(); + cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); + const SDValue Froms[] = { SDValue(LD, 0), + SDValue(LD, 1), + SDValue(LD, 2) + }; + const SDValue Tos[] = { SDValue(Result_2, 0), + SDValue(Result_1, 1), + SDValue(Result_1, 2) + }; + ReplaceUses(Froms, Tos, 3); + return Result_2; + } + SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); + SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); + SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, + MVT::Other, Base, TargetConst0, + Chain); + SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::SXTW, dl, + MVT::i64, SDValue(Result_1, 0)); + SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, + MVT::i32, Base, TargetConstVal, + SDValue(Result_1, 1)); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = LD->getMemOperand(); + cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); + const SDValue Froms[] = { SDValue(LD, 0), + SDValue(LD, 1), + SDValue(LD, 2) + }; + const SDValue Tos[] = { SDValue(Result_2, 0), + SDValue(Result_3, 0), + SDValue(Result_1, 1) + }; + ReplaceUses(Froms, Tos, 3); + return Result_2; + } + return SelectCode(LD); +} + + +SDNode *HexagonDAGToDAGISel::SelectIndexedLoadZeroExtend64(LoadSDNode *LD, + unsigned Opcode, + SDLoc dl) +{ + SDValue Chain = LD->getChain(); + EVT LoadedVT = LD->getMemoryVT(); + SDValue Base = LD->getBasePtr(); + SDValue Offset = LD->getOffset(); + SDNode *OffsetNode = Offset.getNode(); + int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue(); + SDValue N1 = LD->getOperand(1); + SDValue CPTmpN1_0; + SDValue CPTmpN1_1; + + if (SelectADDRriS11_2(N1, CPTmpN1_0, CPTmpN1_1) && + N1.getNode()->getValueType(0) == MVT::i32) { + const HexagonInstrInfo *TII = + static_cast<const HexagonInstrInfo*>(TM.getInstrInfo()); + if (TII->isValidAutoIncImm(LoadedVT, Val)) { + SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); + SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); + SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, + MVT::i32, MVT::Other, Base, + TargetConstVal, Chain); + SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::TFRI, dl, MVT::i32, + TargetConst0); + SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::COMBINE_rr, dl, + MVT::i64, MVT::Other, + SDValue(Result_2,0), + SDValue(Result_1,0)); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = LD->getMemOperand(); + cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); + const SDValue Froms[] = { SDValue(LD, 0), + SDValue(LD, 1), + SDValue(LD, 2) + }; + const SDValue Tos[] = { SDValue(Result_3, 0), + SDValue(Result_1, 1), + SDValue(Result_1, 2) + }; + ReplaceUses(Froms, Tos, 3); + return Result_3; + } + + // Generate an indirect load. + SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); + SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); + SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, + MVT::Other, + Base, TargetConst0, Chain); + SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::TFRI, dl, MVT::i32, + TargetConst0); + SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::COMBINE_rr, dl, + MVT::i64, MVT::Other, + SDValue(Result_2,0), + SDValue(Result_1,0)); + // Add offset to base. + SDNode* Result_4 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32, + Base, TargetConstVal, + SDValue(Result_1, 1)); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = LD->getMemOperand(); + cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); + const SDValue Froms[] = { SDValue(LD, 0), + SDValue(LD, 1), + SDValue(LD, 2) + }; + const SDValue Tos[] = { SDValue(Result_3, 0), // Load value. + SDValue(Result_4, 0), // New address. + SDValue(Result_1, 1) + }; + ReplaceUses(Froms, Tos, 3); + return Result_3; + } + + return SelectCode(LD); +} + + +SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, SDLoc dl) { + SDValue Chain = LD->getChain(); + SDValue Base = LD->getBasePtr(); + SDValue Offset = LD->getOffset(); + SDNode *OffsetNode = Offset.getNode(); + // Get the constant value. + int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue(); + EVT LoadedVT = LD->getMemoryVT(); + unsigned Opcode = 0; + + // Check for zero ext loads. + bool zextval = (LD->getExtensionType() == ISD::ZEXTLOAD); + + // Figure out the opcode. + const HexagonInstrInfo *TII = + static_cast<const HexagonInstrInfo*>(TM.getInstrInfo()); + if (LoadedVT == MVT::i64) { + if (TII->isValidAutoIncImm(LoadedVT, Val)) + Opcode = Hexagon::POST_LDrid; + else + Opcode = Hexagon::LDrid; + } else if (LoadedVT == MVT::i32) { + if (TII->isValidAutoIncImm(LoadedVT, Val)) + Opcode = Hexagon::POST_LDriw; + else + Opcode = Hexagon::LDriw; + } else if (LoadedVT == MVT::i16) { + if (TII->isValidAutoIncImm(LoadedVT, Val)) + Opcode = zextval ? Hexagon::POST_LDriuh : Hexagon::POST_LDrih; + else + Opcode = zextval ? Hexagon::LDriuh : Hexagon::LDrih; + } else if (LoadedVT == MVT::i8) { + if (TII->isValidAutoIncImm(LoadedVT, Val)) + Opcode = zextval ? Hexagon::POST_LDriub : Hexagon::POST_LDrib; + else + Opcode = zextval ? Hexagon::LDriub : Hexagon::LDrib; + } else + llvm_unreachable("unknown memory type"); + + // For zero ext i64 loads, we need to add combine instructions. + if (LD->getValueType(0) == MVT::i64 && + LD->getExtensionType() == ISD::ZEXTLOAD) { + return SelectIndexedLoadZeroExtend64(LD, Opcode, dl); + } + if (LD->getValueType(0) == MVT::i64 && + LD->getExtensionType() == ISD::SEXTLOAD) { + // Handle sign ext i64 loads. + return SelectIndexedLoadSignExtend64(LD, Opcode, dl); + } + if (TII->isValidAutoIncImm(LoadedVT, Val)) { + SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); + SDNode* Result = CurDAG->getMachineNode(Opcode, dl, + LD->getValueType(0), + MVT::i32, MVT::Other, Base, + TargetConstVal, Chain); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = LD->getMemOperand(); + cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1); + const SDValue Froms[] = { SDValue(LD, 0), + SDValue(LD, 1), + SDValue(LD, 2) + }; + const SDValue Tos[] = { SDValue(Result, 0), + SDValue(Result, 1), + SDValue(Result, 2) + }; + ReplaceUses(Froms, Tos, 3); + return Result; + } else { + SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); + SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); + SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl, + LD->getValueType(0), + MVT::Other, Base, TargetConst0, + Chain); + SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32, + Base, TargetConstVal, + SDValue(Result_1, 1)); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = LD->getMemOperand(); + cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); + const SDValue Froms[] = { SDValue(LD, 0), + SDValue(LD, 1), + SDValue(LD, 2) + }; + const SDValue Tos[] = { SDValue(Result_1, 0), + SDValue(Result_2, 0), + SDValue(Result_1, 1) + }; + ReplaceUses(Froms, Tos, 3); + return Result_1; + } +} + + +SDNode *HexagonDAGToDAGISel::SelectLoad(SDNode *N) { + SDNode *result; + SDLoc dl(N); + LoadSDNode *LD = cast<LoadSDNode>(N); + ISD::MemIndexedMode AM = LD->getAddressingMode(); + + // Handle indexed loads. + if (AM != ISD::UNINDEXED) { + result = SelectIndexedLoad(LD, dl); + } else { + result = SelectBaseOffsetLoad(LD, dl); + } + + return result; +} + + +SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) { + SDValue Chain = ST->getChain(); + SDValue Base = ST->getBasePtr(); + SDValue Offset = ST->getOffset(); + SDValue Value = ST->getValue(); + SDNode *OffsetNode = Offset.getNode(); + // Get the constant value. + int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue(); + EVT StoredVT = ST->getMemoryVT(); + + // Offset value must be within representable range + // and must have correct alignment properties. + const HexagonInstrInfo *TII = + static_cast<const HexagonInstrInfo*>(TM.getInstrInfo()); + if (TII->isValidAutoIncImm(StoredVT, Val)) { + SDValue Ops[] = {Base, CurDAG->getTargetConstant(Val, MVT::i32), Value, + Chain}; + unsigned Opcode = 0; + + // Figure out the post inc version of opcode. + if (StoredVT == MVT::i64) Opcode = Hexagon::POST_STdri; + else if (StoredVT == MVT::i32) Opcode = Hexagon::POST_STwri; + else if (StoredVT == MVT::i16) Opcode = Hexagon::POST_SThri; + else if (StoredVT == MVT::i8) Opcode = Hexagon::POST_STbri; + else llvm_unreachable("unknown memory type"); + + // Build post increment store. + SDNode* Result = CurDAG->getMachineNode(Opcode, dl, MVT::i32, + MVT::Other, Ops); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = ST->getMemOperand(); + cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1); + + ReplaceUses(ST, Result); + ReplaceUses(SDValue(ST,1), SDValue(Result,1)); + return Result; + } + + // Note: Order of operands matches the def of instruction: + // def STrid : STInst<(outs), (ins MEMri:$addr, DoubleRegs:$src1), ... + // and it differs for POST_ST* for instance. + SDValue Ops[] = { Base, CurDAG->getTargetConstant(0, MVT::i32), Value, + Chain}; + unsigned Opcode = 0; + + // Figure out the opcode. + if (StoredVT == MVT::i64) Opcode = Hexagon::STrid; + else if (StoredVT == MVT::i32) Opcode = Hexagon::STriw_indexed; + else if (StoredVT == MVT::i16) Opcode = Hexagon::STrih; + else if (StoredVT == MVT::i8) Opcode = Hexagon::STrib; + else llvm_unreachable("unknown memory type"); + + // Build regular store. + SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); + SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); + // Build splitted incriment instruction. + SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32, + Base, + TargetConstVal, + SDValue(Result_1, 0)); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = ST->getMemOperand(); + cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); + + ReplaceUses(SDValue(ST,0), SDValue(Result_2,0)); + ReplaceUses(SDValue(ST,1), SDValue(Result_1,0)); + return Result_2; +} + + +SDNode *HexagonDAGToDAGISel::SelectBaseOffsetStore(StoreSDNode *ST, + SDLoc dl) { + SDValue Chain = ST->getChain(); + SDNode* Const32 = ST->getBasePtr().getNode(); + SDValue Value = ST->getValue(); + unsigned Opcode = 0; + + // Try to lower stores of GlobalAdresses into indexed stores. Custom + // lowering for GlobalAddress nodes has already turned it into a + // CONST32. Avoid truncating stores for the moment. Post-inc stores + // do the same. Don't think there's a reason for it, so will file a + // bug to fix. + if ((Const32->getOpcode() == HexagonISD::CONST32) && + !(Value.getValueType() == MVT::i64 && ST->isTruncatingStore())) { + SDValue Base = Const32->getOperand(0); + if (Base.getOpcode() == ISD::TargetGlobalAddress) { + EVT StoredVT = ST->getMemoryVT(); + int64_t Offset = cast<GlobalAddressSDNode>(Base)->getOffset(); + if (Offset != 0 && OffsetFitsS11(StoredVT, Offset)) { + MVT PointerTy = getTargetLowering()->getPointerTy(); + const GlobalValue* GV = + cast<GlobalAddressSDNode>(Base)->getGlobal(); + SDValue TargAddr = + CurDAG->getTargetGlobalAddress(GV, dl, PointerTy, 0); + SDNode* NewBase = CurDAG->getMachineNode(Hexagon::CONST32_set, + dl, PointerTy, + TargAddr); + + // Figure out base + offset opcode + if (StoredVT == MVT::i64) Opcode = Hexagon::STrid_indexed; + else if (StoredVT == MVT::i32) Opcode = Hexagon::STriw_indexed; + else if (StoredVT == MVT::i16) Opcode = Hexagon::STrih_indexed; + else if (StoredVT == MVT::i8) Opcode = Hexagon::STrib_indexed; + else llvm_unreachable("unknown memory type"); + + SDValue Ops[] = {SDValue(NewBase,0), + CurDAG->getTargetConstant(Offset,PointerTy), + Value, Chain}; + // build indexed store + SDNode* Result = CurDAG->getMachineNode(Opcode, dl, + MVT::Other, Ops); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = ST->getMemOperand(); + cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1); + ReplaceUses(ST, Result); + return Result; + } + } + } + + return SelectCode(ST); +} + + +SDNode *HexagonDAGToDAGISel::SelectStore(SDNode *N) { + SDLoc dl(N); + StoreSDNode *ST = cast<StoreSDNode>(N); + ISD::MemIndexedMode AM = ST->getAddressingMode(); + + // Handle indexed stores. + if (AM != ISD::UNINDEXED) { + return SelectIndexedStore(ST, dl); + } + + return SelectBaseOffsetStore(ST, dl); +} + +SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) { + SDLoc dl(N); + + // + // %conv.i = sext i32 %tmp1 to i64 + // %conv2.i = sext i32 %add to i64 + // %mul.i = mul nsw i64 %conv2.i, %conv.i + // + // --- match with the following --- + // + // %mul.i = mpy (%tmp1, %add) + // + + if (N->getValueType(0) == MVT::i64) { + // Shifting a i64 signed multiply. + SDValue MulOp0 = N->getOperand(0); + SDValue MulOp1 = N->getOperand(1); + + SDValue OP0; + SDValue OP1; + + // Handle sign_extend and sextload. + if (MulOp0.getOpcode() == ISD::SIGN_EXTEND) { + SDValue Sext0 = MulOp0.getOperand(0); + if (Sext0.getNode()->getValueType(0) != MVT::i32) { + return SelectCode(N); + } + + OP0 = Sext0; + } else if (MulOp0.getOpcode() == ISD::LOAD) { + LoadSDNode *LD = cast<LoadSDNode>(MulOp0.getNode()); + if (LD->getMemoryVT() != MVT::i32 || + LD->getExtensionType() != ISD::SEXTLOAD || + LD->getAddressingMode() != ISD::UNINDEXED) { + return SelectCode(N); + } + + SDValue Chain = LD->getChain(); + SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); + OP0 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32, + MVT::Other, + LD->getBasePtr(), TargetConst0, + Chain), 0); + } else { + return SelectCode(N); + } + + // Same goes for the second operand. + if (MulOp1.getOpcode() == ISD::SIGN_EXTEND) { + SDValue Sext1 = MulOp1.getOperand(0); + if (Sext1.getNode()->getValueType(0) != MVT::i32) { + return SelectCode(N); + } + + OP1 = Sext1; + } else if (MulOp1.getOpcode() == ISD::LOAD) { + LoadSDNode *LD = cast<LoadSDNode>(MulOp1.getNode()); + if (LD->getMemoryVT() != MVT::i32 || + LD->getExtensionType() != ISD::SEXTLOAD || + LD->getAddressingMode() != ISD::UNINDEXED) { + return SelectCode(N); + } + + SDValue Chain = LD->getChain(); + SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); + OP1 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32, + MVT::Other, + LD->getBasePtr(), TargetConst0, + Chain), 0); + } else { + return SelectCode(N); + } + + // Generate a mpy instruction. + SDNode *Result = CurDAG->getMachineNode(Hexagon::MPY64, dl, MVT::i64, + OP0, OP1); + ReplaceUses(N, Result); + return Result; + } + + return SelectCode(N); +} + + +SDNode *HexagonDAGToDAGISel::SelectSelect(SDNode *N) { + SDLoc dl(N); + SDValue N0 = N->getOperand(0); + if (N0.getOpcode() == ISD::SETCC) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::SIGN_EXTEND_INREG) { + SDValue N000 = N00.getOperand(0); + SDValue N001 = N00.getOperand(1); + if (cast<VTSDNode>(N001)->getVT() == MVT::i16) { + SDValue N01 = N0.getOperand(1); + SDValue N02 = N0.getOperand(2); + + // Pattern: (select:i32 (setcc:i1 (sext_inreg:i32 IntRegs:i32:$src2, + // i16:Other),IntRegs:i32:$src1, SETLT:Other),IntRegs:i32:$src1, + // IntRegs:i32:$src2) + // Emits: (MAXh_rr:i32 IntRegs:i32:$src1, IntRegs:i32:$src2) + // Pattern complexity = 9 cost = 1 size = 0. + if (cast<CondCodeSDNode>(N02)->get() == ISD::SETLT) { + SDValue N1 = N->getOperand(1); + if (N01 == N1) { + SDValue N2 = N->getOperand(2); + if (N000 == N2 && + N0.getNode()->getValueType(N0.getResNo()) == MVT::i1 && + N00.getNode()->getValueType(N00.getResNo()) == MVT::i32) { + SDNode *SextNode = CurDAG->getMachineNode(Hexagon::SXTH, dl, + MVT::i32, N000); + SDNode *Result = CurDAG->getMachineNode(Hexagon::MAXw_rr, dl, + MVT::i32, + SDValue(SextNode, 0), + N1); + ReplaceUses(N, Result); + return Result; + } + } + } + + // Pattern: (select:i32 (setcc:i1 (sext_inreg:i32 IntRegs:i32:$src2, + // i16:Other), IntRegs:i32:$src1, SETGT:Other), IntRegs:i32:$src1, + // IntRegs:i32:$src2) + // Emits: (MINh_rr:i32 IntRegs:i32:$src1, IntRegs:i32:$src2) + // Pattern complexity = 9 cost = 1 size = 0. + if (cast<CondCodeSDNode>(N02)->get() == ISD::SETGT) { + SDValue N1 = N->getOperand(1); + if (N01 == N1) { + SDValue N2 = N->getOperand(2); + if (N000 == N2 && + N0.getNode()->getValueType(N0.getResNo()) == MVT::i1 && + N00.getNode()->getValueType(N00.getResNo()) == MVT::i32) { + SDNode *SextNode = CurDAG->getMachineNode(Hexagon::SXTH, dl, + MVT::i32, N000); + SDNode *Result = CurDAG->getMachineNode(Hexagon::MINw_rr, dl, + MVT::i32, + SDValue(SextNode, 0), + N1); + ReplaceUses(N, Result); + return Result; + } + } + } + } + } + } + + return SelectCode(N); +} + + +SDNode *HexagonDAGToDAGISel::SelectTruncate(SDNode *N) { + SDLoc dl(N); + SDValue Shift = N->getOperand(0); + + // + // %conv.i = sext i32 %tmp1 to i64 + // %conv2.i = sext i32 %add to i64 + // %mul.i = mul nsw i64 %conv2.i, %conv.i + // %shr5.i = lshr i64 %mul.i, 32 + // %conv3.i = trunc i64 %shr5.i to i32 + // + // --- match with the following --- + // + // %conv3.i = mpy (%tmp1, %add) + // + // Trunc to i32. + if (N->getValueType(0) == MVT::i32) { + // Trunc from i64. + if (Shift.getNode()->getValueType(0) == MVT::i64) { + // Trunc child is logical shift right. + if (Shift.getOpcode() != ISD::SRL) { + return SelectCode(N); + } + + SDValue ShiftOp0 = Shift.getOperand(0); + SDValue ShiftOp1 = Shift.getOperand(1); + + // Shift by const 32 + if (ShiftOp1.getOpcode() != ISD::Constant) { + return SelectCode(N); + } + + int32_t ShiftConst = + cast<ConstantSDNode>(ShiftOp1.getNode())->getSExtValue(); + if (ShiftConst != 32) { + return SelectCode(N); + } + + // Shifting a i64 signed multiply + SDValue Mul = ShiftOp0; + if (Mul.getOpcode() != ISD::MUL) { + return SelectCode(N); + } + + SDValue MulOp0 = Mul.getOperand(0); + SDValue MulOp1 = Mul.getOperand(1); + + SDValue OP0; + SDValue OP1; + + // Handle sign_extend and sextload + if (MulOp0.getOpcode() == ISD::SIGN_EXTEND) { + SDValue Sext0 = MulOp0.getOperand(0); + if (Sext0.getNode()->getValueType(0) != MVT::i32) { + return SelectCode(N); + } + + OP0 = Sext0; + } else if (MulOp0.getOpcode() == ISD::LOAD) { + LoadSDNode *LD = cast<LoadSDNode>(MulOp0.getNode()); + if (LD->getMemoryVT() != MVT::i32 || + LD->getExtensionType() != ISD::SEXTLOAD || + LD->getAddressingMode() != ISD::UNINDEXED) { + return SelectCode(N); + } + + SDValue Chain = LD->getChain(); + SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); + OP0 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32, + MVT::Other, + LD->getBasePtr(), + TargetConst0, Chain), 0); + } else { + return SelectCode(N); + } + + // Same goes for the second operand. + if (MulOp1.getOpcode() == ISD::SIGN_EXTEND) { + SDValue Sext1 = MulOp1.getOperand(0); + if (Sext1.getNode()->getValueType(0) != MVT::i32) + return SelectCode(N); + + OP1 = Sext1; + } else if (MulOp1.getOpcode() == ISD::LOAD) { + LoadSDNode *LD = cast<LoadSDNode>(MulOp1.getNode()); + if (LD->getMemoryVT() != MVT::i32 || + LD->getExtensionType() != ISD::SEXTLOAD || + LD->getAddressingMode() != ISD::UNINDEXED) { + return SelectCode(N); + } + + SDValue Chain = LD->getChain(); + SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); + OP1 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32, + MVT::Other, + LD->getBasePtr(), + TargetConst0, Chain), 0); + } else { + return SelectCode(N); + } + + // Generate a mpy instruction. + SDNode *Result = CurDAG->getMachineNode(Hexagon::MPY, dl, MVT::i32, + OP0, OP1); + ReplaceUses(N, Result); + return Result; + } + } + + return SelectCode(N); +} + + +SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) { + SDLoc dl(N); + if (N->getValueType(0) == MVT::i32) { + SDValue Shl_0 = N->getOperand(0); + SDValue Shl_1 = N->getOperand(1); + // RHS is const. + if (Shl_1.getOpcode() == ISD::Constant) { + if (Shl_0.getOpcode() == ISD::MUL) { + SDValue Mul_0 = Shl_0.getOperand(0); // Val + SDValue Mul_1 = Shl_0.getOperand(1); // Const + // RHS of mul is const. + if (Mul_1.getOpcode() == ISD::Constant) { + int32_t ShlConst = + cast<ConstantSDNode>(Shl_1.getNode())->getSExtValue(); + int32_t MulConst = + cast<ConstantSDNode>(Mul_1.getNode())->getSExtValue(); + int32_t ValConst = MulConst << ShlConst; + SDValue Val = CurDAG->getTargetConstant(ValConst, + MVT::i32); + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val.getNode())) + if (isInt<9>(CN->getSExtValue())) { + SDNode* Result = + CurDAG->getMachineNode(Hexagon::MPYI_ri, dl, + MVT::i32, Mul_0, Val); + ReplaceUses(N, Result); + return Result; + } + + } + } else if (Shl_0.getOpcode() == ISD::SUB) { + SDValue Sub_0 = Shl_0.getOperand(0); // Const 0 + SDValue Sub_1 = Shl_0.getOperand(1); // Val + if (Sub_0.getOpcode() == ISD::Constant) { + int32_t SubConst = + cast<ConstantSDNode>(Sub_0.getNode())->getSExtValue(); + if (SubConst == 0) { + if (Sub_1.getOpcode() == ISD::SHL) { + SDValue Shl2_0 = Sub_1.getOperand(0); // Val + SDValue Shl2_1 = Sub_1.getOperand(1); // Const + if (Shl2_1.getOpcode() == ISD::Constant) { + int32_t ShlConst = + cast<ConstantSDNode>(Shl_1.getNode())->getSExtValue(); + int32_t Shl2Const = + cast<ConstantSDNode>(Shl2_1.getNode())->getSExtValue(); + int32_t ValConst = 1 << (ShlConst+Shl2Const); + SDValue Val = CurDAG->getTargetConstant(-ValConst, MVT::i32); + if (ConstantSDNode *CN = + dyn_cast<ConstantSDNode>(Val.getNode())) + if (isInt<9>(CN->getSExtValue())) { + SDNode* Result = + CurDAG->getMachineNode(Hexagon::MPYI_ri, dl, MVT::i32, + Shl2_0, Val); + ReplaceUses(N, Result); + return Result; + } + } + } + } + } + } + } + } + return SelectCode(N); +} + + +// +// If there is an zero_extend followed an intrinsic in DAG (this means - the +// result of the intrinsic is predicate); convert the zero_extend to +// transfer instruction. +// +// Zero extend -> transfer is lowered here. Otherwise, zero_extend will be +// converted into a MUX as predicate registers defined as 1 bit in the +// compiler. Architecture defines them as 8-bit registers. +// We want to preserve all the lower 8-bits and, not just 1 LSB bit. +// +SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) { + SDLoc dl(N); + SDNode *IsIntrinsic = N->getOperand(0).getNode(); + if ((IsIntrinsic->getOpcode() == ISD::INTRINSIC_WO_CHAIN)) { + unsigned ID = + cast<ConstantSDNode>(IsIntrinsic->getOperand(0))->getZExtValue(); + if (doesIntrinsicReturnPredicate(ID)) { + // Now we need to differentiate target data types. + if (N->getValueType(0) == MVT::i64) { + // Convert the zero_extend to Rs = Pd followed by COMBINE_rr(0,Rs). + SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); + SDNode *Result_1 = CurDAG->getMachineNode(Hexagon::TFR_RsPd, dl, + MVT::i32, + SDValue(IsIntrinsic, 0)); + SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::TFRI, dl, + MVT::i32, + TargetConst0); + SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::COMBINE_rr, dl, + MVT::i64, MVT::Other, + SDValue(Result_2, 0), + SDValue(Result_1, 0)); + ReplaceUses(N, Result_3); + return Result_3; + } + if (N->getValueType(0) == MVT::i32) { + // Convert the zero_extend to Rs = Pd + SDNode* RsPd = CurDAG->getMachineNode(Hexagon::TFR_RsPd, dl, + MVT::i32, + SDValue(IsIntrinsic, 0)); + ReplaceUses(N, RsPd); + return RsPd; + } + llvm_unreachable("Unexpected value type"); + } + } + return SelectCode(N); +} + + +// +// Checking for intrinsics which have predicate registers as operand(s) +// and lowering to the actual intrinsic. +// +SDNode *HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) { + SDLoc dl(N); + unsigned ID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + unsigned IntrinsicWithPred = doesIntrinsicContainPredicate(ID); + + // We are concerned with only those intrinsics that have predicate registers + // as at least one of the operands. + if (IntrinsicWithPred) { + SmallVector<SDValue, 8> Ops; + const HexagonInstrInfo *TII = + static_cast<const HexagonInstrInfo*>(TM.getInstrInfo()); + const MCInstrDesc &MCID = TII->get(IntrinsicWithPred); + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + + // Iterate over all the operands of the intrinsics. + // For PredRegs, do the transfer. + // For Double/Int Regs, just preserve the value + // For immediates, lower it. + for (unsigned i = 1; i < N->getNumOperands(); ++i) { + SDNode *Arg = N->getOperand(i).getNode(); + const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI, *MF); + + if (RC == &Hexagon::IntRegsRegClass || + RC == &Hexagon::DoubleRegsRegClass) { + Ops.push_back(SDValue(Arg, 0)); + } else if (RC == &Hexagon::PredRegsRegClass) { + // Do the transfer. + SDNode *PdRs = CurDAG->getMachineNode(Hexagon::TFR_PdRs, dl, MVT::i1, + SDValue(Arg, 0)); + Ops.push_back(SDValue(PdRs,0)); + } else if (RC == NULL && (dyn_cast<ConstantSDNode>(Arg) != NULL)) { + // This is immediate operand. Lower it here making sure that we DO have + // const SDNode for immediate value. + int32_t Val = cast<ConstantSDNode>(Arg)->getSExtValue(); + SDValue SDVal = CurDAG->getTargetConstant(Val, MVT::i32); + Ops.push_back(SDVal); + } else { + llvm_unreachable("Unimplemented"); + } + } + EVT ReturnValueVT = N->getValueType(0); + SDNode *Result = CurDAG->getMachineNode(IntrinsicWithPred, dl, + ReturnValueVT, Ops); + ReplaceUses(N, Result); + return Result; + } + return SelectCode(N); +} + +// +// Map floating point constant values. +// +SDNode *HexagonDAGToDAGISel::SelectConstantFP(SDNode *N) { + SDLoc dl(N); + ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N); + APFloat APF = CN->getValueAPF(); + if (N->getValueType(0) == MVT::f32) { + return CurDAG->getMachineNode(Hexagon::TFRI_f, dl, MVT::f32, + CurDAG->getTargetConstantFP(APF.convertToFloat(), MVT::f32)); + } + else if (N->getValueType(0) == MVT::f64) { + return CurDAG->getMachineNode(Hexagon::CONST64_Float_Real, dl, MVT::f64, + CurDAG->getTargetConstantFP(APF.convertToDouble(), MVT::f64)); + } + + return SelectCode(N); +} + + +// +// Map predicate true (encoded as -1 in LLVM) to a XOR. +// +SDNode *HexagonDAGToDAGISel::SelectConstant(SDNode *N) { + SDLoc dl(N); + if (N->getValueType(0) == MVT::i1) { + SDNode* Result; + int32_t Val = cast<ConstantSDNode>(N)->getSExtValue(); + if (Val == -1) { + // Create the IntReg = 1 node. + SDNode* IntRegTFR = + CurDAG->getMachineNode(Hexagon::TFRI, dl, MVT::i32, + CurDAG->getTargetConstant(0, MVT::i32)); + + // Pd = IntReg + SDNode* Pd = CurDAG->getMachineNode(Hexagon::TFR_PdRs, dl, MVT::i1, + SDValue(IntRegTFR, 0)); + + // not(Pd) + SDNode* NotPd = CurDAG->getMachineNode(Hexagon::NOT_p, dl, MVT::i1, + SDValue(Pd, 0)); + + // xor(not(Pd)) + Result = CurDAG->getMachineNode(Hexagon::XOR_pp, dl, MVT::i1, + SDValue(Pd, 0), SDValue(NotPd, 0)); + + // We have just built: + // Rs = Pd + // Pd = xor(not(Pd), Pd) + + ReplaceUses(N, Result); + return Result; + } + } + + return SelectCode(N); +} + + +// +// Map add followed by a asr -> asr +=. +// +SDNode *HexagonDAGToDAGISel::SelectAdd(SDNode *N) { + SDLoc dl(N); + if (N->getValueType(0) != MVT::i32) { + return SelectCode(N); + } + // Identify nodes of the form: add(asr(...)). + SDNode* Src1 = N->getOperand(0).getNode(); + if (Src1->getOpcode() != ISD::SRA || !Src1->hasOneUse() + || Src1->getValueType(0) != MVT::i32) { + return SelectCode(N); + } + + // Build Rd = Rd' + asr(Rs, Rt). The machine constraints will ensure that + // Rd and Rd' are assigned to the same register + SDNode* Result = CurDAG->getMachineNode(Hexagon::ASR_ADD_rr, dl, MVT::i32, + N->getOperand(1), + Src1->getOperand(0), + Src1->getOperand(1)); + ReplaceUses(N, Result); + + return Result; +} + + +SDNode *HexagonDAGToDAGISel::Select(SDNode *N) { + if (N->isMachineOpcode()) { + N->setNodeId(-1); + return NULL; // Already selected. + } + + + switch (N->getOpcode()) { + case ISD::Constant: + return SelectConstant(N); + + case ISD::ConstantFP: + return SelectConstantFP(N); + + case ISD::ADD: + return SelectAdd(N); + + case ISD::SHL: + return SelectSHL(N); + + case ISD::LOAD: + return SelectLoad(N); + + case ISD::STORE: + return SelectStore(N); + + case ISD::SELECT: + return SelectSelect(N); + + case ISD::TRUNCATE: + return SelectTruncate(N); + + case ISD::MUL: + return SelectMul(N); + + case ISD::ZERO_EXTEND: + return SelectZeroExtend(N); + + case ISD::INTRINSIC_WO_CHAIN: + return SelectIntrinsicWOChain(N); + } + + return SelectCode(N); +} + + +// +// Hexagon_TODO: Five functions for ADDRri?! Surely there must be a better way +// to define these instructions. +// +bool HexagonDAGToDAGISel::SelectADDRri(SDValue& Addr, SDValue &Base, + SDValue &Offset) { + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) + return false; // Direct calls. + + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return true; + } + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return true; +} + + +bool HexagonDAGToDAGISel::SelectADDRriS11_0(SDValue& Addr, SDValue &Base, + SDValue &Offset) { + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) + return false; // Direct calls. + + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsS11_0_Offset(Offset.getNode())); + } + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsS11_0_Offset(Offset.getNode())); +} + + +bool HexagonDAGToDAGISel::SelectADDRriS11_1(SDValue& Addr, SDValue &Base, + SDValue &Offset) { + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) + return false; // Direct calls. + + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsS11_1_Offset(Offset.getNode())); + } + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsS11_1_Offset(Offset.getNode())); +} + + +bool HexagonDAGToDAGISel::SelectADDRriS11_2(SDValue& Addr, SDValue &Base, + SDValue &Offset) { + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) + return false; // Direct calls. + + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsS11_2_Offset(Offset.getNode())); + } + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsS11_2_Offset(Offset.getNode())); +} + + +bool HexagonDAGToDAGISel::SelectADDRriU6_0(SDValue& Addr, SDValue &Base, + SDValue &Offset) { + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) + return false; // Direct calls. + + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsU6_0_Offset(Offset.getNode())); + } + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsU6_0_Offset(Offset.getNode())); +} + + +bool HexagonDAGToDAGISel::SelectADDRriU6_1(SDValue& Addr, SDValue &Base, + SDValue &Offset) { + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) + return false; // Direct calls. + + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsU6_1_Offset(Offset.getNode())); + } + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsU6_1_Offset(Offset.getNode())); +} + + +bool HexagonDAGToDAGISel::SelectADDRriU6_2(SDValue& Addr, SDValue &Base, + SDValue &Offset) { + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) + return false; // Direct calls. + + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsU6_2_Offset(Offset.getNode())); + } + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsU6_2_Offset(Offset.getNode())); +} + + +bool HexagonDAGToDAGISel::SelectMEMriS11_2(SDValue& Addr, SDValue &Base, + SDValue &Offset) { + + if (Addr.getOpcode() != ISD::ADD) { + return(SelectADDRriS11_2(Addr, Base, Offset)); + } + + return SelectADDRriS11_2(Addr, Base, Offset); +} + + +bool HexagonDAGToDAGISel::SelectADDRriS11_3(SDValue& Addr, SDValue &Base, + SDValue &Offset) { + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) + return false; // Direct calls. + + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsS11_3_Offset(Offset.getNode())); + } + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsS11_3_Offset(Offset.getNode())); +} + +bool HexagonDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, + SDValue &R2) { + if (Addr.getOpcode() == ISD::FrameIndex) return false; + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) + return false; // Direct calls. + + if (Addr.getOpcode() == ISD::ADD) { + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) + if (isInt<13>(CN->getSExtValue())) + return false; // Let the reg+imm pattern catch this! + R1 = Addr.getOperand(0); + R2 = Addr.getOperand(1); + return true; + } + + R1 = Addr; + + return true; +} + + +// Handle generic address case. It is accessed from inlined asm =m constraints, +// which could have any kind of pointer. +bool HexagonDAGToDAGISel::SelectAddr(SDNode *Op, SDValue Addr, + SDValue &Base, SDValue &Offset) { + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) + return false; // Direct calls. + + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return true; + } + + if (Addr.getOpcode() == ISD::ADD) { + Base = Addr.getOperand(0); + Offset = Addr.getOperand(1); + return true; + } + + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return true; +} + + +bool HexagonDAGToDAGISel:: +SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, + std::vector<SDValue> &OutOps) { + SDValue Op0, Op1; + + switch (ConstraintCode) { + case 'o': // Offsetable. + case 'v': // Not offsetable. + default: return true; + case 'm': // Memory. + if (!SelectAddr(Op.getNode(), Op, Op0, Op1)) + return true; + break; + } + + OutOps.push_back(Op0); + OutOps.push_back(Op1); + return false; +} + +bool HexagonDAGToDAGISel::isConstExtProfitable(SDNode *N) const { + unsigned UseCount = 0; + for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { + UseCount++; + } + + return (UseCount <= 1); + +} + +//===--------------------------------------------------------------------===// +// Return 'true' if use count of the global address is below threshold. +//===--------------------------------------------------------------------===// +bool HexagonDAGToDAGISel::hasNumUsesBelowThresGA(SDNode *N) const { + assert(N->getOpcode() == ISD::TargetGlobalAddress && + "Expecting a target global address"); + + // Always try to fold the address. + if (TM.getOptLevel() == CodeGenOpt::Aggressive) + return true; + + GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N); + DenseMap<const GlobalValue *, unsigned>::const_iterator GI = + GlobalAddressUseCountMap.find(GA->getGlobal()); + + if (GI == GlobalAddressUseCountMap.end()) + return false; + + return GI->second <= MaxNumOfUsesForConstExtenders; +} + +//===--------------------------------------------------------------------===// +// Return true if the non GP-relative global address can be folded. +//===--------------------------------------------------------------------===// +inline bool HexagonDAGToDAGISel::foldGlobalAddress(SDValue &N, SDValue &R) { + return foldGlobalAddressImpl(N, R, false); +} + +//===--------------------------------------------------------------------===// +// Return true if the GP-relative global address can be folded. +//===--------------------------------------------------------------------===// +inline bool HexagonDAGToDAGISel::foldGlobalAddressGP(SDValue &N, SDValue &R) { + return foldGlobalAddressImpl(N, R, true); +} + +//===--------------------------------------------------------------------===// +// Fold offset of the global address if number of uses are below threshold. +//===--------------------------------------------------------------------===// +bool HexagonDAGToDAGISel::foldGlobalAddressImpl(SDValue &N, SDValue &R, + bool ShouldLookForGP) { + if (N.getOpcode() == ISD::ADD) { + SDValue N0 = N.getOperand(0); + SDValue N1 = N.getOperand(1); + if ((ShouldLookForGP && (N0.getOpcode() == HexagonISD::CONST32_GP)) || + (!ShouldLookForGP && (N0.getOpcode() == HexagonISD::CONST32))) { + ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N1); + GlobalAddressSDNode *GA = + dyn_cast<GlobalAddressSDNode>(N0.getOperand(0)); + + if (Const && GA && + (GA->getOpcode() == ISD::TargetGlobalAddress)) { + if ((N0.getOpcode() == HexagonISD::CONST32) && + !hasNumUsesBelowThresGA(GA)) + return false; + R = CurDAG->getTargetGlobalAddress(GA->getGlobal(), + SDLoc(Const), + N.getValueType(), + GA->getOffset() + + (uint64_t)Const->getSExtValue()); + return true; + } + } + } + return false; +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp new file mode 100644 index 000000000000..137417966976 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -0,0 +1,1725 @@ +//===-- HexagonISelLowering.cpp - Hexagon DAG Lowering Implementation -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the interfaces that Hexagon uses to lower LLVM code +// into a selection DAG. +// +//===----------------------------------------------------------------------===// + +#include "HexagonISelLowering.h" +#include "HexagonMachineFunctionInfo.h" +#include "HexagonSubtarget.h" +#include "HexagonTargetMachine.h" +#include "HexagonTargetObjectFile.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +static cl::opt<bool> +EmitJumpTables("hexagon-emit-jump-tables", cl::init(true), cl::Hidden, + cl::desc("Control jump table emission on Hexagon target")); + +namespace { +class HexagonCCState : public CCState { + int NumNamedVarArgParams; + +public: + HexagonCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, + const TargetMachine &TM, SmallVectorImpl<CCValAssign> &locs, + LLVMContext &C, int NumNamedVarArgParams) + : CCState(CC, isVarArg, MF, TM, locs, C), + NumNamedVarArgParams(NumNamedVarArgParams) {} + + int getNumNamedVarArgParams() const { return NumNamedVarArgParams; } +}; +} + +// Implement calling convention for Hexagon. +static bool +CC_Hexagon(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State); + +static bool +CC_Hexagon32(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State); + +static bool +CC_Hexagon64(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State); + +static bool +RetCC_Hexagon(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State); + +static bool +RetCC_Hexagon32(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State); + +static bool +RetCC_Hexagon64(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State); + +static bool +CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + HexagonCCState &HState = static_cast<HexagonCCState &>(State); + + // NumNamedVarArgParams can not be zero for a VarArg function. + assert((HState.getNumNamedVarArgParams() > 0) && + "NumNamedVarArgParams is not bigger than zero."); + + if ((int)ValNo < HState.getNumNamedVarArgParams()) { + // Deal with named arguments. + return CC_Hexagon(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State); + } + + // Deal with un-named arguments. + unsigned ofst; + if (ArgFlags.isByVal()) { + // If pass-by-value, the size allocated on stack is decided + // by ArgFlags.getByValSize(), not by the size of LocVT. + assert ((ArgFlags.getByValSize() > 8) && + "ByValSize must be bigger than 8 bytes"); + ofst = State.AllocateStack(ArgFlags.getByValSize(), 4); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); + return false; + } + if (LocVT == MVT::i1 || LocVT == MVT::i8 || LocVT == MVT::i16) { + LocVT = MVT::i32; + ValVT = MVT::i32; + if (ArgFlags.isSExt()) + LocInfo = CCValAssign::SExt; + else if (ArgFlags.isZExt()) + LocInfo = CCValAssign::ZExt; + else + LocInfo = CCValAssign::AExt; + } + if (LocVT == MVT::i32 || LocVT == MVT::f32) { + ofst = State.AllocateStack(4, 4); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); + return false; + } + if (LocVT == MVT::i64 || LocVT == MVT::f64) { + ofst = State.AllocateStack(8, 8); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); + return false; + } + llvm_unreachable(0); +} + + +static bool +CC_Hexagon (unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + + if (ArgFlags.isByVal()) { + // Passed on stack. + assert ((ArgFlags.getByValSize() > 8) && + "ByValSize must be bigger than 8 bytes"); + unsigned Offset = State.AllocateStack(ArgFlags.getByValSize(), 4); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; + } + + if (LocVT == MVT::i1 || LocVT == MVT::i8 || LocVT == MVT::i16) { + LocVT = MVT::i32; + ValVT = MVT::i32; + if (ArgFlags.isSExt()) + LocInfo = CCValAssign::SExt; + else if (ArgFlags.isZExt()) + LocInfo = CCValAssign::ZExt; + else + LocInfo = CCValAssign::AExt; + } + + if (LocVT == MVT::i32 || LocVT == MVT::f32) { + if (!CC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State)) + return false; + } + + if (LocVT == MVT::i64 || LocVT == MVT::f64) { + if (!CC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State)) + return false; + } + + return true; // CC didn't match. +} + + +static bool CC_Hexagon32(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + + static const uint16_t RegList[] = { + Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4, + Hexagon::R5 + }; + if (unsigned Reg = State.AllocateReg(RegList, 6)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + + unsigned Offset = State.AllocateStack(4, 4); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; +} + +static bool CC_Hexagon64(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + + if (unsigned Reg = State.AllocateReg(Hexagon::D0)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + + static const uint16_t RegList1[] = { + Hexagon::D1, Hexagon::D2 + }; + static const uint16_t RegList2[] = { + Hexagon::R1, Hexagon::R3 + }; + if (unsigned Reg = State.AllocateReg(RegList1, RegList2, 2)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + + unsigned Offset = State.AllocateStack(8, 8, Hexagon::D2); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; +} + +static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + + + if (LocVT == MVT::i1 || + LocVT == MVT::i8 || + LocVT == MVT::i16) { + LocVT = MVT::i32; + ValVT = MVT::i32; + if (ArgFlags.isSExt()) + LocInfo = CCValAssign::SExt; + else if (ArgFlags.isZExt()) + LocInfo = CCValAssign::ZExt; + else + LocInfo = CCValAssign::AExt; + } + + if (LocVT == MVT::i32 || LocVT == MVT::f32) { + if (!RetCC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State)) + return false; + } + + if (LocVT == MVT::i64 || LocVT == MVT::f64) { + if (!RetCC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State)) + return false; + } + + return true; // CC didn't match. +} + +static bool RetCC_Hexagon32(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + + if (LocVT == MVT::i32 || LocVT == MVT::f32) { + if (unsigned Reg = State.AllocateReg(Hexagon::R0)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + } + + unsigned Offset = State.AllocateStack(4, 4); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; +} + +static bool RetCC_Hexagon64(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + if (LocVT == MVT::i64 || LocVT == MVT::f64) { + if (unsigned Reg = State.AllocateReg(Hexagon::D0)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + } + + unsigned Offset = State.AllocateStack(8, 8); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; +} + +SDValue +HexagonTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) +const { + return SDValue(); +} + +/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified +/// by "Src" to address "Dst" of size "Size". Alignment information is +/// specified by the specific parameter attribute. The copy will be passed as +/// a byval function parameter. Sometimes what we are copying is the end of a +/// larger object, the part that does not fit in registers. +static SDValue +CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, + ISD::ArgFlagsTy Flags, SelectionDAG &DAG, + SDLoc dl) { + + SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); + return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), + /*isVolatile=*/false, /*AlwaysInline=*/false, + MachinePointerInfo(), MachinePointerInfo()); +} + + +// LowerReturn - Lower ISD::RET. If a struct is larger than 8 bytes and is +// passed by value, the function prototype is modified to return void and +// the value is stored in memory pointed by a pointer passed by caller. +SDValue +HexagonTargetLowering::LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + SDLoc dl, SelectionDAG &DAG) const { + + // CCValAssign - represent the assignment of the return value to locations. + SmallVector<CCValAssign, 16> RVLocs; + + // CCState - Info about the registers and stack slot. + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), RVLocs, *DAG.getContext()); + + // Analyze return values of ISD::RET + CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon); + + SDValue Flag; + SmallVector<SDValue, 4> RetOps(1, Chain); + + // Copy the result values into the output registers. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag); + + // Guarantee that all emitted copies are stuck together with flags. + Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); + } + + RetOps[0] = Chain; // Update chain. + + // Add the flag if we have it. + if (Flag.getNode()) + RetOps.push_back(Flag); + + return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, + &RetOps[0], RetOps.size()); +} + + + + +/// LowerCallResult - Lower the result values of an ISD::CALL into the +/// appropriate copies out of appropriate physical registers. This assumes that +/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call +/// being lowered. Returns a SDNode with the same number of values as the +/// ISD::CALL. +SDValue +HexagonTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool isVarArg, + const + SmallVectorImpl<ISD::InputArg> &Ins, + SDLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals, + const SmallVectorImpl<SDValue> &OutVals, + SDValue Callee) const { + + // Assign locations to each value returned by this call. + SmallVector<CCValAssign, 16> RVLocs; + + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), RVLocs, *DAG.getContext()); + + CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon); + + // Copy all of the result registers out of their specified physreg. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + Chain = DAG.getCopyFromReg(Chain, dl, + RVLocs[i].getLocReg(), + RVLocs[i].getValVT(), InFlag).getValue(1); + InFlag = Chain.getValue(2); + InVals.push_back(Chain.getValue(0)); + } + + return Chain; +} + +/// LowerCall - Functions arguments are copied from virtual regs to +/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted. +SDValue +HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const { + SelectionDAG &DAG = CLI.DAG; + SDLoc &dl = CLI.DL; + SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; + SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; + SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + bool &isTailCall = CLI.IsTailCall; + CallingConv::ID CallConv = CLI.CallConv; + bool isVarArg = CLI.IsVarArg; + + bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); + + // Check for varargs. + int NumNamedVarArgParams = -1; + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Callee)) + { + const Function* CalleeFn = NULL; + Callee = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, MVT::i32); + if ((CalleeFn = dyn_cast<Function>(GA->getGlobal()))) + { + // If a function has zero args and is a vararg function, that's + // disallowed so it must be an undeclared function. Do not assume + // varargs if the callee is undefined. + if (CalleeFn->isVarArg() && + CalleeFn->getFunctionType()->getNumParams() != 0) { + NumNamedVarArgParams = CalleeFn->getFunctionType()->getNumParams(); + } + } + } + + // Analyze operands of the call, assigning locations to each operand. + SmallVector<CCValAssign, 16> ArgLocs; + HexagonCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), ArgLocs, *DAG.getContext(), + NumNamedVarArgParams); + + if (NumNamedVarArgParams > 0) + CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_VarArg); + else + CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon); + + + if(isTailCall) { + bool StructAttrFlag = + DAG.getMachineFunction().getFunction()->hasStructRetAttr(); + isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, + isVarArg, IsStructRet, + StructAttrFlag, + Outs, OutVals, Ins, DAG); + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i){ + CCValAssign &VA = ArgLocs[i]; + if (VA.isMemLoc()) { + isTailCall = false; + break; + } + } + if (isTailCall) { + DEBUG(dbgs () << "Eligible for Tail Call\n"); + } else { + DEBUG(dbgs () << + "Argument must be passed on stack. Not eligible for Tail Call\n"); + } + } + // Get a count of how many bytes are to be pushed on the stack. + unsigned NumBytes = CCInfo.getNextStackOffset(); + SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass; + SmallVector<SDValue, 8> MemOpChains; + + SDValue StackPtr = + DAG.getCopyFromReg(Chain, dl, TM.getRegisterInfo()->getStackRegister(), + getPointerTy()); + + // Walk the register/memloc assignments, inserting copies/loads. + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + SDValue Arg = OutVals[i]; + ISD::ArgFlagsTy Flags = Outs[i].Flags; + + // Promote the value if needed. + switch (VA.getLocInfo()) { + default: + // Loc info must be one of Full, SExt, ZExt, or AExt. + llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: + break; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::AExt: + Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); + break; + } + + if (VA.isMemLoc()) { + unsigned LocMemOffset = VA.getLocMemOffset(); + SDValue PtrOff = DAG.getConstant(LocMemOffset, StackPtr.getValueType()); + PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff); + + if (Flags.isByVal()) { + // The argument is a struct passed by value. According to LLVM, "Arg" + // is is pointer. + MemOpChains.push_back(CreateCopyOfByValArgument(Arg, PtrOff, Chain, + Flags, DAG, dl)); + } else { + // The argument is not passed by value. "Arg" is a buildin type. It is + // not a pointer. + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, + MachinePointerInfo(),false, false, + 0)); + } + continue; + } + + // Arguments that can be passed on register must be kept at RegsToPass + // vector. + if (VA.isRegLoc()) { + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + } + } + + // Transform all store nodes into one single node because all store + // nodes are independent of each other. + if (!MemOpChains.empty()) { + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], + MemOpChains.size()); + } + + if (!isTailCall) + Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes, + getPointerTy(), true), + dl); + + // Build a sequence of copy-to-reg nodes chained together with token + // chain and flag operands which copy the outgoing args into registers. + // The InFlag in necessary since all emitted instructions must be + // stuck together. + SDValue InFlag; + if (!isTailCall) { + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + } + + // For tail calls lower the arguments to the 'real' stack slot. + if (isTailCall) { + // Force all the incoming stack arguments to be loaded from the stack + // before any new outgoing arguments are stored to the stack, because the + // outgoing stack slots may alias the incoming argument stack slots, and + // the alias isn't otherwise explicit. This is slightly more conservative + // than necessary, because it means that each store effectively depends + // on every argument instead of just those arguments it would clobber. + // + // Do not flag preceding copytoreg stuff together with the following stuff. + InFlag = SDValue(); + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + InFlag =SDValue(); + } + + // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every + // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol + // node so that legalize doesn't hack it. + if (flag_aligned_memcpy) { + const char *MemcpyName = + "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes"; + Callee = + DAG.getTargetExternalSymbol(MemcpyName, getPointerTy()); + flag_aligned_memcpy = false; + } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy()); + } else if (ExternalSymbolSDNode *S = + dyn_cast<ExternalSymbolSDNode>(Callee)) { + Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); + } + + // Returns a chain & a flag for retval copy to use. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + SmallVector<SDValue, 8> Ops; + Ops.push_back(Chain); + Ops.push_back(Callee); + + // Add argument registers to the end of the list so that they are + // known live into the call. + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + } + + if (InFlag.getNode()) { + Ops.push_back(InFlag); + } + + if (isTailCall) + return DAG.getNode(HexagonISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); + + Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, &Ops[0], Ops.size()); + InFlag = Chain.getValue(1); + + // Create the CALLSEQ_END node. + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), + DAG.getIntPtrConstant(0, true), InFlag, dl); + InFlag = Chain.getValue(1); + + // Handle result values, copying them out of physregs into vregs that we + // return. + return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, + InVals, OutVals, Callee); +} + +static bool getIndexedAddressParts(SDNode *Ptr, EVT VT, + bool isSEXTLoad, SDValue &Base, + SDValue &Offset, bool &isInc, + SelectionDAG &DAG) { + if (Ptr->getOpcode() != ISD::ADD) + return false; + + if (VT == MVT::i64 || VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) { + isInc = (Ptr->getOpcode() == ISD::ADD); + Base = Ptr->getOperand(0); + Offset = Ptr->getOperand(1); + // Ensure that Offset is a constant. + return (isa<ConstantSDNode>(Offset)); + } + + return false; +} + +// TODO: Put this function along with the other isS* functions in +// HexagonISelDAGToDAG.cpp into a common file. Or better still, use the +// functions defined in HexagonOperands.td. +static bool Is_PostInc_S4_Offset(SDNode * S, int ShiftAmount) { + ConstantSDNode *N = cast<ConstantSDNode>(S); + + // immS4 predicate - True if the immediate fits in a 4-bit sign extended. + // field. + int64_t v = (int64_t)N->getSExtValue(); + int64_t m = 0; + if (ShiftAmount > 0) { + m = v % ShiftAmount; + v = v >> ShiftAmount; + } + return (v <= 7) && (v >= -8) && (m == 0); +} + +/// getPostIndexedAddressParts - returns true by value, base pointer and +/// offset pointer and addressing mode by reference if this node can be +/// combined with a load / store to form a post-indexed load / store. +bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, + SDValue &Base, + SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const +{ + EVT VT; + SDValue Ptr; + bool isSEXTLoad = false; + + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { + VT = LD->getMemoryVT(); + isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { + VT = ST->getMemoryVT(); + if (ST->getValue().getValueType() == MVT::i64 && ST->isTruncatingStore()) { + return false; + } + } else { + return false; + } + + bool isInc = false; + bool isLegal = getIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, + isInc, DAG); + // ShiftAmount = number of left-shifted bits in the Hexagon instruction. + int ShiftAmount = VT.getSizeInBits() / 16; + if (isLegal && Is_PostInc_S4_Offset(Offset.getNode(), ShiftAmount)) { + AM = isInc ? ISD::POST_INC : ISD::POST_DEC; + return true; + } + + return false; +} + +SDValue HexagonTargetLowering::LowerINLINEASM(SDValue Op, + SelectionDAG &DAG) const { + SDNode *Node = Op.getNode(); + MachineFunction &MF = DAG.getMachineFunction(); + HexagonMachineFunctionInfo *FuncInfo = + MF.getInfo<HexagonMachineFunctionInfo>(); + switch (Node->getOpcode()) { + case ISD::INLINEASM: { + unsigned NumOps = Node->getNumOperands(); + if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) + --NumOps; // Ignore the flag operand. + + for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { + if (FuncInfo->hasClobberLR()) + break; + unsigned Flags = + cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); + unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + ++i; // Skip the ID value. + + switch (InlineAsm::getKind(Flags)) { + default: llvm_unreachable("Bad flags!"); + case InlineAsm::Kind_RegDef: + case InlineAsm::Kind_RegUse: + case InlineAsm::Kind_Imm: + case InlineAsm::Kind_Clobber: + case InlineAsm::Kind_Mem: { + for (; NumVals; --NumVals, ++i) {} + break; + } + case InlineAsm::Kind_RegDefEarlyClobber: { + for (; NumVals; --NumVals, ++i) { + unsigned Reg = + cast<RegisterSDNode>(Node->getOperand(i))->getReg(); + + // Check it to be lr + if (Reg == TM.getRegisterInfo()->getRARegister()) { + FuncInfo->setHasClobberLR(true); + break; + } + } + break; + } + } + } + } + } // Node->getOpcode + return Op; +} + + +// +// Taken from the XCore backend. +// +SDValue HexagonTargetLowering:: +LowerBR_JT(SDValue Op, SelectionDAG &DAG) const +{ + SDValue Chain = Op.getOperand(0); + SDValue Table = Op.getOperand(1); + SDValue Index = Op.getOperand(2); + SDLoc dl(Op); + JumpTableSDNode *JT = cast<JumpTableSDNode>(Table); + unsigned JTI = JT->getIndex(); + MachineFunction &MF = DAG.getMachineFunction(); + const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); + SDValue TargetJT = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32); + + // Mark all jump table targets as address taken. + const std::vector<MachineJumpTableEntry> &JTE = MJTI->getJumpTables(); + const std::vector<MachineBasicBlock*> &JTBBs = JTE[JTI].MBBs; + for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) { + MachineBasicBlock *MBB = JTBBs[i]; + MBB->setHasAddressTaken(); + // This line is needed to set the hasAddressTaken flag on the BasicBlock + // object. + BlockAddress::get(const_cast<BasicBlock *>(MBB->getBasicBlock())); + } + + SDValue JumpTableBase = DAG.getNode(HexagonISD::WrapperJT, dl, + getPointerTy(), TargetJT); + SDValue ShiftIndex = DAG.getNode(ISD::SHL, dl, MVT::i32, Index, + DAG.getConstant(2, MVT::i32)); + SDValue JTAddress = DAG.getNode(ISD::ADD, dl, MVT::i32, JumpTableBase, + ShiftIndex); + SDValue LoadTarget = DAG.getLoad(MVT::i32, dl, Chain, JTAddress, + MachinePointerInfo(), false, false, false, + 0); + return DAG.getNode(HexagonISD::BR_JT, dl, MVT::Other, Chain, LoadTarget); +} + + +SDValue +HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, + SelectionDAG &DAG) const { + SDValue Chain = Op.getOperand(0); + SDValue Size = Op.getOperand(1); + SDLoc dl(Op); + + unsigned SPReg = getStackPointerRegisterToSaveRestore(); + + // Get a reference to the stack pointer. + SDValue StackPointer = DAG.getCopyFromReg(Chain, dl, SPReg, MVT::i32); + + // Subtract the dynamic size from the actual stack size to + // obtain the new stack size. + SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i32, StackPointer, Size); + + // + // For Hexagon, the outgoing memory arguments area should be on top of the + // alloca area on the stack i.e., the outgoing memory arguments should be + // at a lower address than the alloca area. Move the alloca area down the + // stack by adding back the space reserved for outgoing arguments to SP + // here. + // + // We do not know what the size of the outgoing args is at this point. + // So, we add a pseudo instruction ADJDYNALLOC that will adjust the + // stack pointer. We patch this instruction with the correct, known + // offset in emitPrologue(). + // + // Use a placeholder immediate (zero) for now. This will be patched up + // by emitPrologue(). + SDValue ArgAdjust = DAG.getNode(HexagonISD::ADJDYNALLOC, dl, + MVT::i32, + Sub, + DAG.getConstant(0, MVT::i32)); + + // The Sub result contains the new stack start address, so it + // must be placed in the stack pointer register. + SDValue CopyChain = DAG.getCopyToReg(Chain, dl, + TM.getRegisterInfo()->getStackRegister(), + Sub); + + SDValue Ops[2] = { ArgAdjust, CopyChain }; + return DAG.getMergeValues(Ops, 2, dl); +} + +SDValue +HexagonTargetLowering::LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const + SmallVectorImpl<ISD::InputArg> &Ins, + SDLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) +const { + + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + HexagonMachineFunctionInfo *FuncInfo = + MF.getInfo<HexagonMachineFunctionInfo>(); + + + // Assign locations to all of the incoming arguments. + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), ArgLocs, *DAG.getContext()); + + CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon); + + // For LLVM, in the case when returning a struct by value (>8byte), + // the first argument is a pointer that points to the location on caller's + // stack where the return value will be stored. For Hexagon, the location on + // caller's stack is passed only when the struct size is smaller than (and + // equal to) 8 bytes. If not, no address will be passed into callee and + // callee return the result direclty through R0/R1. + + SmallVector<SDValue, 4> MemOps; + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + ISD::ArgFlagsTy Flags = Ins[i].Flags; + unsigned ObjSize; + unsigned StackLocation; + int FI; + + if ( (VA.isRegLoc() && !Flags.isByVal()) + || (VA.isRegLoc() && Flags.isByVal() && Flags.getByValSize() > 8)) { + // Arguments passed in registers + // 1. int, long long, ptr args that get allocated in register. + // 2. Large struct that gets an register to put its address in. + EVT RegVT = VA.getLocVT(); + if (RegVT == MVT::i8 || RegVT == MVT::i16 || + RegVT == MVT::i32 || RegVT == MVT::f32) { + unsigned VReg = + RegInfo.createVirtualRegister(&Hexagon::IntRegsRegClass); + RegInfo.addLiveIn(VA.getLocReg(), VReg); + InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); + } else if (RegVT == MVT::i64) { + unsigned VReg = + RegInfo.createVirtualRegister(&Hexagon::DoubleRegsRegClass); + RegInfo.addLiveIn(VA.getLocReg(), VReg); + InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); + } else { + assert (0); + } + } else if (VA.isRegLoc() && Flags.isByVal() && Flags.getByValSize() <= 8) { + assert (0 && "ByValSize must be bigger than 8 bytes"); + } else { + // Sanity check. + assert(VA.isMemLoc()); + + if (Flags.isByVal()) { + // If it's a byval parameter, then we need to compute the + // "real" size, not the size of the pointer. + ObjSize = Flags.getByValSize(); + } else { + ObjSize = VA.getLocVT().getStoreSizeInBits() >> 3; + } + + StackLocation = HEXAGON_LRFP_SIZE + VA.getLocMemOffset(); + // Create the frame index object for this incoming parameter... + FI = MFI->CreateFixedObject(ObjSize, StackLocation, true); + + // Create the SelectionDAG nodes cordl, responding to a load + // from this parameter. + SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); + + if (Flags.isByVal()) { + // If it's a pass-by-value aggregate, then do not dereference the stack + // location. Instead, we should generate a reference to the stack + // location. + InVals.push_back(FIN); + } else { + InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, + MachinePointerInfo(), false, false, + false, 0)); + } + } + } + + if (!MemOps.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOps[0], + MemOps.size()); + + if (isVarArg) { + // This will point to the next argument passed via stack. + int FrameIndex = MFI->CreateFixedObject(Hexagon_PointerSize, + HEXAGON_LRFP_SIZE + + CCInfo.getNextStackOffset(), + true); + FuncInfo->setVarArgsFrameIndex(FrameIndex); + } + + return Chain; +} + +SDValue +HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { + // VASTART stores the address of the VarArgsFrameIndex slot into the + // memory location argument. + MachineFunction &MF = DAG.getMachineFunction(); + HexagonMachineFunctionInfo *QFI = MF.getInfo<HexagonMachineFunctionInfo>(); + SDValue Addr = DAG.getFrameIndex(QFI->getVarArgsFrameIndex(), MVT::i32); + const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); + return DAG.getStore(Op.getOperand(0), SDLoc(Op), Addr, + Op.getOperand(1), MachinePointerInfo(SV), false, + false, 0); +} + +SDValue +HexagonTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue CC = Op.getOperand(4); + SDValue TrueVal = Op.getOperand(2); + SDValue FalseVal = Op.getOperand(3); + SDLoc dl(Op); + SDNode* OpNode = Op.getNode(); + EVT SVT = OpNode->getValueType(0); + + SDValue Cond = DAG.getNode(ISD::SETCC, dl, MVT::i1, LHS, RHS, CC); + return DAG.getNode(ISD::SELECT, dl, SVT, Cond, TrueVal, FalseVal); +} + +SDValue +HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { + EVT ValTy = Op.getValueType(); + SDLoc dl(Op); + ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); + SDValue Res; + if (CP->isMachineConstantPoolEntry()) + Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), ValTy, + CP->getAlignment()); + else + Res = DAG.getTargetConstantPool(CP->getConstVal(), ValTy, + CP->getAlignment()); + return DAG.getNode(HexagonISD::CONST32, dl, ValTy, Res); +} + +SDValue +HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MFI->setReturnAddressIsTaken(true); + + EVT VT = Op.getValueType(); + SDLoc dl(Op); + unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + if (Depth) { + SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); + SDValue Offset = DAG.getConstant(4, MVT::i32); + return DAG.getLoad(VT, dl, DAG.getEntryNode(), + DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), + MachinePointerInfo(), false, false, false, 0); + } + + // Return LR, which contains the return address. Mark it an implicit live-in. + unsigned Reg = MF.addLiveIn(TRI->getRARegister(), getRegClassFor(MVT::i32)); + return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); +} + +SDValue +HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { + const HexagonRegisterInfo *TRI = TM.getRegisterInfo(); + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MFI->setFrameAddressIsTaken(true); + + EVT VT = Op.getValueType(); + SDLoc dl(Op); + unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, + TRI->getFrameRegister(), VT); + while (Depth--) + FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, + MachinePointerInfo(), + false, false, false, 0); + return FrameAddr; +} + +SDValue HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op, + SelectionDAG& DAG) const { + SDLoc dl(Op); + return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other, Op.getOperand(0)); +} + + +SDValue HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, + SelectionDAG &DAG) const { + SDValue Result; + const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset(); + SDLoc dl(Op); + Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset); + + const HexagonTargetObjectFile &TLOF = + static_cast<const HexagonTargetObjectFile &>(getObjFileLowering()); + if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) { + return DAG.getNode(HexagonISD::CONST32_GP, dl, getPointerTy(), Result); + } + + return DAG.getNode(HexagonISD::CONST32, dl, getPointerTy(), Result); +} + +SDValue +HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { + const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); + SDValue BA_SD = DAG.getTargetBlockAddress(BA, MVT::i32); + SDLoc dl(Op); + return DAG.getNode(HexagonISD::CONST32_GP, dl, getPointerTy(), BA_SD); +} + +//===----------------------------------------------------------------------===// +// TargetLowering Implementation +//===----------------------------------------------------------------------===// + +HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine + &targetmachine) + : TargetLowering(targetmachine, new HexagonTargetObjectFile()), + TM(targetmachine) { + + const HexagonRegisterInfo* QRI = TM.getRegisterInfo(); + + // Set up the register classes. + addRegisterClass(MVT::i32, &Hexagon::IntRegsRegClass); + addRegisterClass(MVT::i64, &Hexagon::DoubleRegsRegClass); + + if (QRI->Subtarget.hasV5TOps()) { + addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass); + addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass); + } + + addRegisterClass(MVT::i1, &Hexagon::PredRegsRegClass); + + computeRegisterProperties(); + + // Align loop entry + setPrefLoopAlignment(4); + + // Limits for inline expansion of memcpy/memmove + MaxStoresPerMemcpy = 6; + MaxStoresPerMemmove = 6; + + // + // Library calls for unsupported operations + // + + setLibcallName(RTLIB::SINTTOFP_I128_F64, "__hexagon_floattidf"); + setLibcallName(RTLIB::SINTTOFP_I128_F32, "__hexagon_floattisf"); + + setLibcallName(RTLIB::FPTOUINT_F32_I128, "__hexagon_fixunssfti"); + setLibcallName(RTLIB::FPTOUINT_F64_I128, "__hexagon_fixunsdfti"); + + setLibcallName(RTLIB::FPTOSINT_F32_I128, "__hexagon_fixsfti"); + setLibcallName(RTLIB::FPTOSINT_F64_I128, "__hexagon_fixdfti"); + + setLibcallName(RTLIB::SDIV_I32, "__hexagon_divsi3"); + setOperationAction(ISD::SDIV, MVT::i32, Expand); + setLibcallName(RTLIB::SREM_I32, "__hexagon_umodsi3"); + setOperationAction(ISD::SREM, MVT::i32, Expand); + + setLibcallName(RTLIB::SDIV_I64, "__hexagon_divdi3"); + setOperationAction(ISD::SDIV, MVT::i64, Expand); + setLibcallName(RTLIB::SREM_I64, "__hexagon_moddi3"); + setOperationAction(ISD::SREM, MVT::i64, Expand); + + setLibcallName(RTLIB::UDIV_I32, "__hexagon_udivsi3"); + setOperationAction(ISD::UDIV, MVT::i32, Expand); + + setLibcallName(RTLIB::UDIV_I64, "__hexagon_udivdi3"); + setOperationAction(ISD::UDIV, MVT::i64, Expand); + + setLibcallName(RTLIB::UREM_I32, "__hexagon_umodsi3"); + setOperationAction(ISD::UREM, MVT::i32, Expand); + + setLibcallName(RTLIB::UREM_I64, "__hexagon_umoddi3"); + setOperationAction(ISD::UREM, MVT::i64, Expand); + + setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3"); + setOperationAction(ISD::FDIV, MVT::f32, Expand); + + setLibcallName(RTLIB::DIV_F64, "__hexagon_divdf3"); + setOperationAction(ISD::FDIV, MVT::f64, Expand); + + setOperationAction(ISD::FSQRT, MVT::f32, Expand); + setOperationAction(ISD::FSQRT, MVT::f64, Expand); + setOperationAction(ISD::FSIN, MVT::f32, Expand); + setOperationAction(ISD::FSIN, MVT::f64, Expand); + + if (QRI->Subtarget.hasV5TOps()) { + // Hexagon V5 Support. + setOperationAction(ISD::FADD, MVT::f32, Legal); + setOperationAction(ISD::FADD, MVT::f64, Legal); + setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal); + setCondCodeAction(ISD::SETOEQ, MVT::f32, Legal); + setCondCodeAction(ISD::SETOEQ, MVT::f64, Legal); + setCondCodeAction(ISD::SETUEQ, MVT::f32, Legal); + setCondCodeAction(ISD::SETUEQ, MVT::f64, Legal); + + setCondCodeAction(ISD::SETOGE, MVT::f32, Legal); + setCondCodeAction(ISD::SETOGE, MVT::f64, Legal); + setCondCodeAction(ISD::SETUGE, MVT::f32, Legal); + setCondCodeAction(ISD::SETUGE, MVT::f64, Legal); + + setCondCodeAction(ISD::SETOGT, MVT::f32, Legal); + setCondCodeAction(ISD::SETOGT, MVT::f64, Legal); + setCondCodeAction(ISD::SETUGT, MVT::f32, Legal); + setCondCodeAction(ISD::SETUGT, MVT::f64, Legal); + + setCondCodeAction(ISD::SETOLE, MVT::f32, Legal); + setCondCodeAction(ISD::SETOLE, MVT::f64, Legal); + setCondCodeAction(ISD::SETOLT, MVT::f32, Legal); + setCondCodeAction(ISD::SETOLT, MVT::f64, Legal); + + setOperationAction(ISD::ConstantFP, MVT::f32, Legal); + setOperationAction(ISD::ConstantFP, MVT::f64, Legal); + + setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote); + setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote); + setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote); + + setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote); + setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote); + setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); + + setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote); + setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote); + setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote); + + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal); + + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::i64, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::i64, Legal); + + setOperationAction(ISD::FABS, MVT::f32, Legal); + setOperationAction(ISD::FABS, MVT::f64, Expand); + + setOperationAction(ISD::FNEG, MVT::f32, Legal); + setOperationAction(ISD::FNEG, MVT::f64, Expand); + } else { + + // Expand fp<->uint. + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); + + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); + + setLibcallName(RTLIB::SINTTOFP_I64_F32, "__hexagon_floatdisf"); + setLibcallName(RTLIB::UINTTOFP_I64_F32, "__hexagon_floatundisf"); + + setLibcallName(RTLIB::UINTTOFP_I32_F32, "__hexagon_floatunsisf"); + setLibcallName(RTLIB::SINTTOFP_I32_F32, "__hexagon_floatsisf"); + + setLibcallName(RTLIB::SINTTOFP_I64_F64, "__hexagon_floatdidf"); + setLibcallName(RTLIB::UINTTOFP_I64_F64, "__hexagon_floatundidf"); + + setLibcallName(RTLIB::UINTTOFP_I32_F64, "__hexagon_floatunsidf"); + setLibcallName(RTLIB::SINTTOFP_I32_F64, "__hexagon_floatsidf"); + + setLibcallName(RTLIB::FPTOUINT_F32_I32, "__hexagon_fixunssfsi"); + setLibcallName(RTLIB::FPTOUINT_F32_I64, "__hexagon_fixunssfdi"); + + setLibcallName(RTLIB::FPTOSINT_F64_I64, "__hexagon_fixdfdi"); + setLibcallName(RTLIB::FPTOSINT_F32_I64, "__hexagon_fixsfdi"); + + setLibcallName(RTLIB::FPTOUINT_F64_I32, "__hexagon_fixunsdfsi"); + setLibcallName(RTLIB::FPTOUINT_F64_I64, "__hexagon_fixunsdfdi"); + + setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3"); + setOperationAction(ISD::FADD, MVT::f64, Expand); + + setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3"); + setOperationAction(ISD::FADD, MVT::f32, Expand); + + setLibcallName(RTLIB::FPEXT_F32_F64, "__hexagon_extendsfdf2"); + setOperationAction(ISD::FP_EXTEND, MVT::f32, Expand); + + setLibcallName(RTLIB::OEQ_F32, "__hexagon_eqsf2"); + setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand); + + setLibcallName(RTLIB::OEQ_F64, "__hexagon_eqdf2"); + setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand); + + setLibcallName(RTLIB::OGE_F32, "__hexagon_gesf2"); + setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); + + setLibcallName(RTLIB::OGE_F64, "__hexagon_gedf2"); + setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); + + setLibcallName(RTLIB::OGT_F32, "__hexagon_gtsf2"); + setCondCodeAction(ISD::SETOGT, MVT::f32, Expand); + + setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2"); + setCondCodeAction(ISD::SETOGT, MVT::f64, Expand); + + setLibcallName(RTLIB::FPTOSINT_F64_I32, "__hexagon_fixdfsi"); + setOperationAction(ISD::FP_TO_SINT, MVT::f64, Expand); + + setLibcallName(RTLIB::FPTOSINT_F32_I32, "__hexagon_fixsfsi"); + setOperationAction(ISD::FP_TO_SINT, MVT::f32, Expand); + + setLibcallName(RTLIB::OLE_F64, "__hexagon_ledf2"); + setCondCodeAction(ISD::SETOLE, MVT::f64, Expand); + + setLibcallName(RTLIB::OLE_F32, "__hexagon_lesf2"); + setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); + + setLibcallName(RTLIB::OLT_F64, "__hexagon_ltdf2"); + setCondCodeAction(ISD::SETOLT, MVT::f64, Expand); + + setLibcallName(RTLIB::OLT_F32, "__hexagon_ltsf2"); + setCondCodeAction(ISD::SETOLT, MVT::f32, Expand); + + setLibcallName(RTLIB::MUL_F64, "__hexagon_muldf3"); + setOperationAction(ISD::FMUL, MVT::f64, Expand); + + setLibcallName(RTLIB::MUL_F32, "__hexagon_mulsf3"); + setOperationAction(ISD::MUL, MVT::f32, Expand); + + setLibcallName(RTLIB::UNE_F64, "__hexagon_nedf2"); + setCondCodeAction(ISD::SETUNE, MVT::f64, Expand); + + setLibcallName(RTLIB::UNE_F32, "__hexagon_nesf2"); + + setLibcallName(RTLIB::SUB_F64, "__hexagon_subdf3"); + setOperationAction(ISD::SUB, MVT::f64, Expand); + + setLibcallName(RTLIB::SUB_F32, "__hexagon_subsf3"); + setOperationAction(ISD::SUB, MVT::f32, Expand); + + setLibcallName(RTLIB::FPROUND_F64_F32, "__hexagon_truncdfsf2"); + setOperationAction(ISD::FP_ROUND, MVT::f64, Expand); + + setLibcallName(RTLIB::UO_F64, "__hexagon_unorddf2"); + setCondCodeAction(ISD::SETUO, MVT::f64, Expand); + + setLibcallName(RTLIB::O_F64, "__hexagon_unorddf2"); + setCondCodeAction(ISD::SETO, MVT::f64, Expand); + + setLibcallName(RTLIB::O_F32, "__hexagon_unordsf2"); + setCondCodeAction(ISD::SETO, MVT::f32, Expand); + + setLibcallName(RTLIB::UO_F32, "__hexagon_unordsf2"); + setCondCodeAction(ISD::SETUO, MVT::f32, Expand); + + setOperationAction(ISD::FABS, MVT::f32, Expand); + setOperationAction(ISD::FABS, MVT::f64, Expand); + setOperationAction(ISD::FNEG, MVT::f32, Expand); + setOperationAction(ISD::FNEG, MVT::f64, Expand); + } + + setLibcallName(RTLIB::SREM_I32, "__hexagon_modsi3"); + setOperationAction(ISD::SREM, MVT::i32, Expand); + + setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal); + setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal); + setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal); + setIndexedLoadAction(ISD::POST_INC, MVT::i64, Legal); + + setIndexedStoreAction(ISD::POST_INC, MVT::i8, Legal); + setIndexedStoreAction(ISD::POST_INC, MVT::i16, Legal); + setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal); + setIndexedStoreAction(ISD::POST_INC, MVT::i64, Legal); + + setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); + + // Turn FP extload into load/fextend. + setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + // Hexagon has a i1 sign extending load. + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Expand); + // Turn FP truncstore into trunc + store. + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + + // Custom legalize GlobalAddress nodes into CONST32. + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::GlobalAddress, MVT::i8, Custom); + setOperationAction(ISD::BlockAddress, MVT::i32, Custom); + // Truncate action? + setOperationAction(ISD::TRUNCATE, MVT::i64, Expand); + + // Hexagon doesn't have sext_inreg, replace them with shl/sra. + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); + + // Hexagon has no REM or DIVREM operations. + setOperationAction(ISD::UREM, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i32, Expand); + setOperationAction(ISD::SDIVREM, MVT::i32, Expand); + setOperationAction(ISD::UDIVREM, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i64, Expand); + setOperationAction(ISD::SDIVREM, MVT::i64, Expand); + setOperationAction(ISD::UDIVREM, MVT::i64, Expand); + + setOperationAction(ISD::BSWAP, MVT::i64, Expand); + + // Lower SELECT_CC to SETCC and SELECT. + setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); + + if (QRI->Subtarget.hasV5TOps()) { + + // We need to make the operation type of SELECT node to be Custom, + // such that we don't go into the infinite loop of + // select -> setcc -> select_cc -> select loop. + setOperationAction(ISD::SELECT, MVT::f32, Custom); + setOperationAction(ISD::SELECT, MVT::f64, Custom); + + setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); + setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + + } else { + + // Hexagon has no select or setcc: expand to SELECT_CC. + setOperationAction(ISD::SELECT, MVT::f32, Expand); + setOperationAction(ISD::SELECT, MVT::f64, Expand); + + // This is a workaround documented in DAGCombiner.cpp:2892 We don't + // support SELECT_CC on every type. + setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + + } + + if (EmitJumpTables) { + setOperationAction(ISD::BR_JT, MVT::Other, Custom); + } else { + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + } + // Increase jump tables cutover to 5, was 4. + setMinimumJumpTableEntries(5); + + setOperationAction(ISD::BR_CC, MVT::f32, Expand); + setOperationAction(ISD::BR_CC, MVT::f64, Expand); + setOperationAction(ISD::BR_CC, MVT::i1, Expand); + setOperationAction(ISD::BR_CC, MVT::i32, Expand); + setOperationAction(ISD::BR_CC, MVT::i64, Expand); + + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + + setOperationAction(ISD::FSIN , MVT::f64, Expand); + setOperationAction(ISD::FCOS , MVT::f64, Expand); + setOperationAction(ISD::FREM , MVT::f64, Expand); + setOperationAction(ISD::FSIN , MVT::f32, Expand); + setOperationAction(ISD::FCOS , MVT::f32, Expand); + setOperationAction(ISD::FREM , MVT::f32, Expand); + setOperationAction(ISD::FSINCOS, MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); + + // In V4, we have double word add/sub with carry. The problem with + // modelling this instruction is that it produces 2 results - Rdd and Px. + // To model update of Px, we will have to use Defs[p0..p3] which will + // cause any predicate live range to spill. So, we pretend we dont't + // have these instructions. + setOperationAction(ISD::ADDE, MVT::i8, Expand); + setOperationAction(ISD::ADDE, MVT::i16, Expand); + setOperationAction(ISD::ADDE, MVT::i32, Expand); + setOperationAction(ISD::ADDE, MVT::i64, Expand); + setOperationAction(ISD::SUBE, MVT::i8, Expand); + setOperationAction(ISD::SUBE, MVT::i16, Expand); + setOperationAction(ISD::SUBE, MVT::i32, Expand); + setOperationAction(ISD::SUBE, MVT::i64, Expand); + setOperationAction(ISD::ADDC, MVT::i8, Expand); + setOperationAction(ISD::ADDC, MVT::i16, Expand); + setOperationAction(ISD::ADDC, MVT::i32, Expand); + setOperationAction(ISD::ADDC, MVT::i64, Expand); + setOperationAction(ISD::SUBC, MVT::i8, Expand); + setOperationAction(ISD::SUBC, MVT::i16, Expand); + setOperationAction(ISD::SUBC, MVT::i32, Expand); + setOperationAction(ISD::SUBC, MVT::i64, Expand); + + setOperationAction(ISD::CTPOP, MVT::i32, Expand); + setOperationAction(ISD::CTPOP, MVT::i64, Expand); + setOperationAction(ISD::CTTZ , MVT::i32, Expand); + setOperationAction(ISD::CTTZ , MVT::i64, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); + setOperationAction(ISD::CTLZ , MVT::i32, Expand); + setOperationAction(ISD::CTLZ , MVT::i64, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); + setOperationAction(ISD::ROTL , MVT::i32, Expand); + setOperationAction(ISD::ROTR , MVT::i32, Expand); + setOperationAction(ISD::BSWAP, MVT::i32, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + setOperationAction(ISD::FPOW , MVT::f64, Expand); + setOperationAction(ISD::FPOW , MVT::f32, Expand); + + setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); + setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); + setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); + + setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); + + setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); + + setOperationAction(ISD::EH_RETURN, MVT::Other, Custom); + + if (TM.getSubtargetImpl()->isSubtargetV2()) { + setExceptionPointerRegister(Hexagon::R20); + setExceptionSelectorRegister(Hexagon::R21); + } else { + setExceptionPointerRegister(Hexagon::R0); + setExceptionSelectorRegister(Hexagon::R1); + } + + // VASTART needs to be custom lowered to use the VarArgsFrameIndex. + setOperationAction(ISD::VASTART , MVT::Other, Custom); + + // Use the default implementation. + setOperationAction(ISD::VAARG , MVT::Other, Expand); + setOperationAction(ISD::VACOPY , MVT::Other, Expand); + setOperationAction(ISD::VAEND , MVT::Other, Expand); + setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); + + + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); + setOperationAction(ISD::INLINEASM , MVT::Other, Custom); + + setMinFunctionAlignment(2); + + // Needed for DYNAMIC_STACKALLOC expansion. + unsigned StackRegister = TM.getRegisterInfo()->getStackRegister(); + setStackPointerRegisterToSaveRestore(StackRegister); + setSchedulingPreference(Sched::VLIW); +} + + +const char* +HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (Opcode) { + default: return 0; + case HexagonISD::CONST32: return "HexagonISD::CONST32"; + case HexagonISD::CONST32_GP: return "HexagonISD::CONST32_GP"; + case HexagonISD::CONST32_Int_Real: return "HexagonISD::CONST32_Int_Real"; + case HexagonISD::ADJDYNALLOC: return "HexagonISD::ADJDYNALLOC"; + case HexagonISD::CMPICC: return "HexagonISD::CMPICC"; + case HexagonISD::CMPFCC: return "HexagonISD::CMPFCC"; + case HexagonISD::BRICC: return "HexagonISD::BRICC"; + case HexagonISD::BRFCC: return "HexagonISD::BRFCC"; + case HexagonISD::SELECT_ICC: return "HexagonISD::SELECT_ICC"; + case HexagonISD::SELECT_FCC: return "HexagonISD::SELECT_FCC"; + case HexagonISD::Hi: return "HexagonISD::Hi"; + case HexagonISD::Lo: return "HexagonISD::Lo"; + case HexagonISD::FTOI: return "HexagonISD::FTOI"; + case HexagonISD::ITOF: return "HexagonISD::ITOF"; + case HexagonISD::CALL: return "HexagonISD::CALL"; + case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG"; + case HexagonISD::BR_JT: return "HexagonISD::BR_JT"; + case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN"; + case HexagonISD::EH_RETURN: return "HexagonISD::EH_RETURN"; + } +} + +bool +HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { + EVT MTy1 = EVT::getEVT(Ty1); + EVT MTy2 = EVT::getEVT(Ty2); + if (!MTy1.isSimple() || !MTy2.isSimple()) { + return false; + } + return ((MTy1.getSimpleVT() == MVT::i64) && (MTy2.getSimpleVT() == MVT::i32)); +} + +bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { + if (!VT1.isSimple() || !VT2.isSimple()) { + return false; + } + return ((VT1.getSimpleVT() == MVT::i64) && (VT2.getSimpleVT() == MVT::i32)); +} + +bool +HexagonTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const { + // Assuming the caller does not have either a signext or zeroext modifier, and + // only one value is accepted, any reasonable truncation is allowed. + if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) + return false; + + // FIXME: in principle up to 64-bit could be made safe, but it would be very + // fragile at the moment: any support for multiple value returns would be + // liable to disallow tail calls involving i64 -> iN truncation in many cases. + return Ty1->getPrimitiveSizeInBits() <= 32; +} + +SDValue +HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { + SDValue Chain = Op.getOperand(0); + SDValue Offset = Op.getOperand(1); + SDValue Handler = Op.getOperand(2); + SDLoc dl(Op); + + // Mark function as containing a call to EH_RETURN. + HexagonMachineFunctionInfo *FuncInfo = + DAG.getMachineFunction().getInfo<HexagonMachineFunctionInfo>(); + FuncInfo->setHasEHReturn(); + + unsigned OffsetReg = Hexagon::R28; + + SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), + DAG.getRegister(Hexagon::R30, getPointerTy()), + DAG.getIntPtrConstant(4)); + Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(), + false, false, 0); + Chain = DAG.getCopyToReg(Chain, dl, OffsetReg, Offset); + + // Not needed we already use it as explict input to EH_RETURN. + // MF.getRegInfo().addLiveOut(OffsetReg); + + return DAG.getNode(HexagonISD::EH_RETURN, dl, MVT::Other, Chain); +} + +SDValue +HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { + switch (Op.getOpcode()) { + default: llvm_unreachable("Should not custom lower this!"); + case ISD::ConstantPool: return LowerConstantPool(Op, DAG); + case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); + // Frame & Return address. Currently unimplemented. + case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); + case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); + case ISD::GlobalTLSAddress: + llvm_unreachable("TLS not implemented for Hexagon."); + case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG); + case ISD::GlobalAddress: return LowerGLOBALADDRESS(Op, DAG); + case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); + case ISD::VASTART: return LowerVASTART(Op, DAG); + case ISD::BR_JT: return LowerBR_JT(Op, DAG); + + case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); + case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::SELECT: return Op; + case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::INLINEASM: return LowerINLINEASM(Op, DAG); + + } +} + + + +//===----------------------------------------------------------------------===// +// Hexagon Scheduler Hooks +//===----------------------------------------------------------------------===// +MachineBasicBlock * +HexagonTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) +const { + switch (MI->getOpcode()) { + case Hexagon::ADJDYNALLOC: { + MachineFunction *MF = BB->getParent(); + HexagonMachineFunctionInfo *FuncInfo = + MF->getInfo<HexagonMachineFunctionInfo>(); + FuncInfo->addAllocaAdjustInst(MI); + return BB; + } + default: llvm_unreachable("Unexpected instr type to insert"); + } // switch +} + +//===----------------------------------------------------------------------===// +// Inline Assembly Support +//===----------------------------------------------------------------------===// + +std::pair<unsigned, const TargetRegisterClass*> +HexagonTargetLowering::getRegForInlineAsmConstraint(const + std::string &Constraint, + MVT VT) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 'r': // R0-R31 + switch (VT.SimpleTy) { + default: + llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type"); + case MVT::i32: + case MVT::i16: + case MVT::i8: + case MVT::f32: + return std::make_pair(0U, &Hexagon::IntRegsRegClass); + case MVT::i64: + case MVT::f64: + return std::make_pair(0U, &Hexagon::DoubleRegsRegClass); + } + default: + llvm_unreachable("Unknown asm register class"); + } + } + + return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); +} + +/// isFPImmLegal - Returns true if the target can instruction select the +/// specified FP immediate natively. If false, the legalizer will +/// materialize the FP immediate as a load from a constant pool. +bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { + const HexagonRegisterInfo* QRI = TM.getRegisterInfo(); + return QRI->Subtarget.hasV5TOps(); +} + +/// isLegalAddressingMode - Return true if the addressing mode represented by +/// AM is legal for this target, for a load/store of the specified type. +bool HexagonTargetLowering::isLegalAddressingMode(const AddrMode &AM, + Type *Ty) const { + // Allows a signed-extended 11-bit immediate field. + if (AM.BaseOffs <= -(1LL << 13) || AM.BaseOffs >= (1LL << 13)-1) { + return false; + } + + // No global is ever allowed as a base. + if (AM.BaseGV) { + return false; + } + + int Scale = AM.Scale; + if (Scale < 0) Scale = -Scale; + switch (Scale) { + case 0: // No scale reg, "r+i", "r", or just "i". + break; + default: // No scaled addressing mode. + return false; + } + return true; +} + +/// isLegalICmpImmediate - Return true if the specified immediate is legal +/// icmp immediate, that is the target has icmp instructions which can compare +/// a register against the immediate without having to materialize the +/// immediate into a register. +bool HexagonTargetLowering::isLegalICmpImmediate(int64_t Imm) const { + return Imm >= -512 && Imm <= 511; +} + +/// IsEligibleForTailCallOptimization - Check whether the call is eligible +/// for tail call optimization. Targets which want to do tail call +/// optimization should implement this function. +bool HexagonTargetLowering::IsEligibleForTailCallOptimization( + SDValue Callee, + CallingConv::ID CalleeCC, + bool isVarArg, + bool isCalleeStructRet, + bool isCallerStructRet, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + SelectionDAG& DAG) const { + const Function *CallerF = DAG.getMachineFunction().getFunction(); + CallingConv::ID CallerCC = CallerF->getCallingConv(); + bool CCMatch = CallerCC == CalleeCC; + + // *************************************************************************** + // Look for obvious safe cases to perform tail call optimization that do not + // require ABI changes. + // *************************************************************************** + + // If this is a tail call via a function pointer, then don't do it! + if (!(dyn_cast<GlobalAddressSDNode>(Callee)) + && !(dyn_cast<ExternalSymbolSDNode>(Callee))) { + return false; + } + + // Do not optimize if the calling conventions do not match. + if (!CCMatch) + return false; + + // Do not tail call optimize vararg calls. + if (isVarArg) + return false; + + // Also avoid tail call optimization if either caller or callee uses struct + // return semantics. + if (isCalleeStructRet || isCallerStructRet) + return false; + + // In addition to the cases above, we also disable Tail Call Optimization if + // the calling convention code that at least one outgoing argument needs to + // go on the stack. We cannot check that here because at this point that + // information is not available. + return true; +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h new file mode 100644 index 000000000000..73da226a1727 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -0,0 +1,179 @@ +//===-- HexagonISelLowering.h - Hexagon DAG Lowering Interface --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that Hexagon uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef Hexagon_ISELLOWERING_H +#define Hexagon_ISELLOWERING_H + +#include "Hexagon.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/Target/TargetLowering.h" + +namespace llvm { + namespace HexagonISD { + enum { + FIRST_NUMBER = ISD::BUILTIN_OP_END, + + CONST32, + CONST32_GP, // For marking data present in GP. + CONST32_Int_Real, + FCONST32, + SETCC, + ADJDYNALLOC, + ARGEXTEND, + + CMPICC, // Compare two GPR operands, set icc. + CMPFCC, // Compare two FP operands, set fcc. + BRICC, // Branch to dest on icc condition + BRFCC, // Branch to dest on fcc condition + SELECT_ICC, // Select between two values using the current ICC flags. + SELECT_FCC, // Select between two values using the current FCC flags. + + Hi, Lo, // Hi/Lo operations, typically on a global address. + + FTOI, // FP to Int within a FP register. + ITOF, // Int to FP within a FP register. + + CALL, // A call instruction. + RET_FLAG, // Return with a flag operand. + BR_JT, // Jump table. + BARRIER, // Memory barrier. + WrapperJT, + WrapperCP, + WrapperCombineII, + WrapperCombineRR, + WrapperCombineRI_V4, + WrapperCombineIR_V4, + WrapperPackhl, + WrapperSplatB, + WrapperSplatH, + WrapperShuffEB, + WrapperShuffEH, + WrapperShuffOB, + WrapperShuffOH, + TC_RETURN, + EH_RETURN + }; + } + + class HexagonTargetLowering : public TargetLowering { + int VarArgsFrameOffset; // Frame offset to start of varargs area. + + bool CanReturnSmallStruct(const Function* CalleeFn, + unsigned& RetSize) const; + + public: + HexagonTargetMachine &TM; + explicit HexagonTargetLowering(HexagonTargetMachine &targetmachine); + + /// IsEligibleForTailCallOptimization - Check whether the call is eligible + /// for tail call optimization. Targets which want to do tail call + /// optimization should implement this function. + bool + IsEligibleForTailCallOptimization(SDValue Callee, + CallingConv::ID CalleeCC, + bool isVarArg, + bool isCalleeStructRet, + bool isCallerStructRet, + const + SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + SelectionDAG& DAG) const; + + virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const; + virtual bool isTruncateFree(EVT VT1, EVT VT2) const; + + virtual bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const; + + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; + + virtual const char *getTargetNodeName(unsigned Opcode) const; + SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + SDLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; + SDValue LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const; + + SDValue LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + SDLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals, + const SmallVectorImpl<SDValue> &OutVals, + SDValue Callee) const; + + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const; + SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + SDLoc dl, SelectionDAG &DAG) const; + + virtual MachineBasicBlock + *EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) const; + + SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + virtual EVT getSetCCResultType(LLVMContext &C, EVT VT) const { + if (!VT.isVector()) + return MVT::i1; + else + return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements()); + } + + virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, + SDValue &Base, SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const; + + std::pair<unsigned, const TargetRegisterClass*> + getRegForInlineAsmConstraint(const std::string &Constraint, + MVT VT) const; + + // Intrinsics + virtual SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, + SelectionDAG &DAG) const; + /// isLegalAddressingMode - Return true if the addressing mode represented + /// by AM is legal for this target, for a load/store of the specified type. + /// The type may be VoidTy, in which case only return true if the addressing + /// mode is legal for a load/store of any legal type. + /// TODO: Handle pre/postinc as well. + virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const; + virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const; + + /// isLegalICmpImmediate - Return true if the specified immediate is legal + /// icmp immediate, that is the target has icmp instructions which can + /// compare a register against the immediate without having to materialize + /// the immediate into a register. + virtual bool isLegalICmpImmediate(int64_t Imm) const; + }; +} // end namespace llvm + +#endif // Hexagon_ISELLOWERING_H diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td new file mode 100644 index 000000000000..d25bfa8b0d85 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td @@ -0,0 +1,382 @@ +//==- HexagonInstrFormats.td - Hexagon Instruction Formats --*- tablegen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Hexagon Intruction Flags + +// +// *** Must match HexagonBaseInfo.h *** +//===----------------------------------------------------------------------===// + +class IType<bits<5> t> { + bits<5> Value = t; +} +def TypePSEUDO : IType<0>; +def TypeALU32 : IType<1>; +def TypeCR : IType<2>; +def TypeJR : IType<3>; +def TypeJ : IType<4>; +def TypeLD : IType<5>; +def TypeST : IType<6>; +def TypeSYSTEM : IType<7>; +def TypeXTYPE : IType<8>; +def TypeENDLOOP: IType<31>; + +// Maintain list of valid subtargets for each instruction. +class SubTarget<bits<4> value> { + bits<4> Value = value; +} + +def HasV2SubT : SubTarget<0xf>; +def HasV2SubTOnly : SubTarget<0x1>; +def NoV2SubT : SubTarget<0x0>; +def HasV3SubT : SubTarget<0xe>; +def HasV3SubTOnly : SubTarget<0x2>; +def NoV3SubT : SubTarget<0x1>; +def HasV4SubT : SubTarget<0xc>; +def NoV4SubT : SubTarget<0x3>; +def HasV5SubT : SubTarget<0x8>; +def NoV5SubT : SubTarget<0x7>; + +// Addressing modes for load/store instructions +class AddrModeType<bits<3> value> { + bits<3> Value = value; +} + +def NoAddrMode : AddrModeType<0>; // No addressing mode +def Absolute : AddrModeType<1>; // Absolute addressing mode +def AbsoluteSet : AddrModeType<2>; // Absolute set addressing mode +def BaseImmOffset : AddrModeType<3>; // Indirect with offset +def BaseLongOffset : AddrModeType<4>; // Indirect with long offset +def BaseRegOffset : AddrModeType<5>; // Indirect with register offset +def PostInc : AddrModeType<6>; // Post increment addressing mode + +class MemAccessSize<bits<3> value> { + bits<3> Value = value; +} + +def NoMemAccess : MemAccessSize<0>;// Not a memory acces instruction. +def ByteAccess : MemAccessSize<1>;// Byte access instruction (memb). +def HalfWordAccess : MemAccessSize<2>;// Half word access instruction (memh). +def WordAccess : MemAccessSize<3>;// Word access instruction (memw). +def DoubleWordAccess : MemAccessSize<4>;// Double word access instruction (memd) + + +//===----------------------------------------------------------------------===// +// Intruction Class Declaration + +//===----------------------------------------------------------------------===// + +class OpcodeHexagon { + field bits<32> Inst = ?; // Default to an invalid insn. + bits<4> IClass = 0; // ICLASS + bits<2> IParse = 0; // Parse bits. + + let Inst{31-28} = IClass; + let Inst{15-14} = IParse; + + bits<1> zero = 0; +} + +class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern, + string cstr, InstrItinClass itin, IType type> + : Instruction, OpcodeHexagon { + let Namespace = "Hexagon"; + + dag OutOperandList = outs; + dag InOperandList = ins; + let AsmString = asmstr; + let Pattern = pattern; + let Constraints = cstr; + let Itinerary = itin; + let Size = 4; + + // *** Must match MCTargetDesc/HexagonBaseInfo.h *** + + // Instruction type according to the ISA. + IType Type = type; + let TSFlags{4-0} = Type.Value; + + // Solo instructions, i.e., those that cannot be in a packet with others. + bits<1> isSolo = 0; + let TSFlags{5} = isSolo; + + // Predicated instructions. + bits<1> isPredicated = 0; + let TSFlags{6} = isPredicated; + bits<1> isPredicatedFalse = 0; + let TSFlags{7} = isPredicatedFalse; + bits<1> isPredicatedNew = 0; + let TSFlags{8} = isPredicatedNew; + + // New-value insn helper fields. + bits<1> isNewValue = 0; + let TSFlags{9} = isNewValue; // New-value consumer insn. + bits<1> hasNewValue = 0; + let TSFlags{10} = hasNewValue; // New-value producer insn. + bits<3> opNewValue = 0; + let TSFlags{13-11} = opNewValue; // New-value produced operand. + bits<2> opNewBits = 0; + let TSFlags{15-14} = opNewBits; // New-value opcode bits location: 0, 8, 16. + bits<1> isNVStorable = 0; + let TSFlags{16} = isNVStorable; // Store that can become new-value store. + bits<1> isNVStore = 0; + let TSFlags{17} = isNVStore; // New-value store insn. + + // Immediate extender helper fields. + bits<1> isExtendable = 0; + let TSFlags{18} = isExtendable; // Insn may be extended. + bits<1> isExtended = 0; + let TSFlags{19} = isExtended; // Insn must be extended. + bits<3> opExtendable = 0; + let TSFlags{22-20} = opExtendable; // Which operand may be extended. + bits<1> isExtentSigned = 0; + let TSFlags{23} = isExtentSigned; // Signed or unsigned range. + bits<5> opExtentBits = 0; + let TSFlags{28-24} = opExtentBits; //Number of bits of range before extending. + + // If an instruction is valid on a subtarget (v2-v5), set the corresponding + // bit from validSubTargets. v2 is the least significant bit. + // By default, instruction is valid on all subtargets. + SubTarget validSubTargets = HasV2SubT; + let TSFlags{32-29} = validSubTargets.Value; + + // Addressing mode for load/store instructions. + AddrModeType addrMode = NoAddrMode; + let TSFlags{35-33} = addrMode.Value; + + // Memory access size for mem access instructions (load/store) + MemAccessSize accessSize = NoMemAccess; + let TSFlags{38-36} = accessSize.Value; + + // Fields used for relation models. + string BaseOpcode = ""; + string CextOpcode = ""; + string PredSense = ""; + string PNewValue = ""; + string NValueST = ""; // Set to "true" for new-value stores. + string InputType = ""; // Input is "imm" or "reg" type. + string isMEMri = "false"; // Set to "true" for load/store with MEMri operand. + string isFloat = "false"; // Set to "true" for the floating-point load/store. + string isBrTaken = ""; // Set to "true"/"false" for jump instructions + + let PredSense = !if(isPredicated, !if(isPredicatedFalse, "false", "true"), + ""); + let PNewValue = !if(isPredicatedNew, "new", ""); + let NValueST = !if(isNVStore, "true", "false"); + + // *** Must match MCTargetDesc/HexagonBaseInfo.h *** +} + +//===----------------------------------------------------------------------===// +// Intruction Classes Definitions + +//===----------------------------------------------------------------------===// + +// LD Instruction Class in V2/V3/V4. +// Definition of the instruction class NOT CHANGED. +class LDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : InstHexagon<outs, ins, asmstr, pattern, cstr, LD, TypeLD>; + +let mayLoad = 1 in +class LDInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : LDInst<outs, ins, asmstr, pattern, cstr>; + +class CONSTLDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : LDInst<outs, ins, asmstr, pattern, cstr>; + +// LD Instruction Class in V2/V3/V4. +// Definition of the instruction class NOT CHANGED. +class LDInstPost<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : LDInst<outs, ins, asmstr, pattern, cstr>; + +let mayLoad = 1 in +class LD0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : LDInst<outs, ins, asmstr, pattern, cstr>; + +// ST Instruction Class in V2/V3 can take SLOT0 only. +// ST Instruction Class in V4 can take SLOT0 & SLOT1. +// Definition of the instruction class CHANGED from V2/V3 to V4. +let mayStore = 1 in +class STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : InstHexagon<outs, ins, asmstr, pattern, cstr, ST, TypeST>; + +class STInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : STInst<outs, ins, asmstr, pattern, cstr>; + +let mayStore = 1 in +class ST0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : InstHexagon<outs, ins, asmstr, pattern, cstr, ST0, TypeST>; + +// ST Instruction Class in V2/V3 can take SLOT0 only. +// ST Instruction Class in V4 can take SLOT0 & SLOT1. +// Definition of the instruction class CHANGED from V2/V3 to V4. +class STInstPost<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : STInst<outs, ins, asmstr, pattern, cstr>; + +// SYSTEM Instruction Class in V4 can take SLOT0 only +// In V2/V3 we used ST for this but in v4 ST can take SLOT0 or SLOT1. +class SYSInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : InstHexagon<outs, ins, asmstr, pattern, cstr, SYS, TypeSYSTEM>; + +// ALU32 Instruction Class in V2/V3/V4. +// Definition of the instruction class NOT CHANGED. +class ALU32Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : InstHexagon<outs, ins, asmstr, pattern, cstr, ALU32, TypeALU32>; + +// ALU64 Instruction Class in V2/V3. +// XTYPE Instruction Class in V4. +// Definition of the instruction class NOT CHANGED. +// Name of the Instruction Class changed from ALU64 to XTYPE from V2/V3 to V4. +class ALU64Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : InstHexagon<outs, ins, asmstr, pattern, cstr, ALU64, TypeXTYPE>; + +class ALU64_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : ALU64Inst<outs, ins, asmstr, pattern, cstr>; + + +// M Instruction Class in V2/V3. +// XTYPE Instruction Class in V4. +// Definition of the instruction class NOT CHANGED. +// Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4. +class MInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : InstHexagon<outs, ins, asmstr, pattern, cstr, M, TypeXTYPE>; + +// M Instruction Class in V2/V3. +// XTYPE Instruction Class in V4. +// Definition of the instruction class NOT CHANGED. +// Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4. +class MInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : MInst<outs, ins, asmstr, pattern, cstr>; + +// S Instruction Class in V2/V3. +// XTYPE Instruction Class in V4. +// Definition of the instruction class NOT CHANGED. +// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4. +class SInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : InstHexagon<outs, ins, asmstr, pattern, cstr, S, TypeXTYPE>; + +// S Instruction Class in V2/V3. +// XTYPE Instruction Class in V4. +// Definition of the instruction class NOT CHANGED. +// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4. +class SInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : SInst<outs, ins, asmstr, pattern, cstr>; + +// J Instruction Class in V2/V3/V4. +// Definition of the instruction class NOT CHANGED. +class JInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : InstHexagon<outs, ins, asmstr, pattern, cstr, J, TypeJ>; + +// JR Instruction Class in V2/V3/V4. +// Definition of the instruction class NOT CHANGED. +class JRInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : InstHexagon<outs, ins, asmstr, pattern, cstr, JR, TypeJR>; + +// CR Instruction Class in V2/V3/V4. +// Definition of the instruction class NOT CHANGED. +class CRInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : InstHexagon<outs, ins, asmstr, pattern, cstr, CR, TypeCR>; + +let isCodeGenOnly = 1, isPseudo = 1 in +class Endloop<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : InstHexagon<outs, ins, asmstr, pattern, cstr, ENDLOOP, TypeENDLOOP>; + +let isCodeGenOnly = 1, isPseudo = 1 in +class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDO, TypePSEUDO>; + +let isCodeGenOnly = 1, isPseudo = 1 in +class PseudoM<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr=""> + : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDOM, TypePSEUDO>; + +//===----------------------------------------------------------------------===// +// Intruction Classes Definitions - +//===----------------------------------------------------------------------===// + + +// +// ALU32 patterns +//. +class ALU32_rr<dag outs, dag ins, string asmstr, list<dag> pattern, + string cstr = ""> + : ALU32Inst<outs, ins, asmstr, pattern, cstr>; + +class ALU32_ir<dag outs, dag ins, string asmstr, list<dag> pattern, + string cstr = ""> + : ALU32Inst<outs, ins, asmstr, pattern, cstr>; + +class ALU32_ri<dag outs, dag ins, string asmstr, list<dag> pattern, + string cstr = ""> + : ALU32Inst<outs, ins, asmstr, pattern, cstr>; + +class ALU32_ii<dag outs, dag ins, string asmstr, list<dag> pattern, + string cstr = ""> + : ALU32Inst<outs, ins, asmstr, pattern, cstr>; + +// +// ALU64 patterns. +// +class ALU64_rr<dag outs, dag ins, string asmstr, list<dag> pattern, + string cstr = ""> + : ALU64Inst<outs, ins, asmstr, pattern, cstr>; + +class ALU64_ri<dag outs, dag ins, string asmstr, list<dag> pattern, + string cstr = ""> + : ALU64Inst<outs, ins, asmstr, pattern, cstr>; + +// Post increment ST Instruction. +class STInstPI<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : STInst<outs, ins, asmstr, pattern, cstr>; + +let mayStore = 1 in +class STInst2PI<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : STInst<outs, ins, asmstr, pattern, cstr>; + +// Post increment LD Instruction. +class LDInstPI<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : LDInst<outs, ins, asmstr, pattern, cstr>; + +let mayLoad = 1 in +class LDInst2PI<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : LDInst<outs, ins, asmstr, pattern, cstr>; + +//===----------------------------------------------------------------------===// +// V4 Instruction Format Definitions + +//===----------------------------------------------------------------------===// + +include "HexagonInstrFormatsV4.td" + +//===----------------------------------------------------------------------===// +// V4 Instruction Format Definitions + +//===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td new file mode 100644 index 000000000000..9fda0da91612 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td @@ -0,0 +1,66 @@ +//==- HexagonInstrFormats.td - Hexagon Instruction Formats --*- tablegen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hexagon V4 instruction classes in TableGen format. +// +//===----------------------------------------------------------------------===// + +//----------------------------------------------------------------------------// +// Hexagon Intruction Flags + +// +// *** Must match BaseInfo.h *** +//----------------------------------------------------------------------------// + +def TypeMEMOP : IType<9>; +def TypeNV : IType<10>; +def TypePREFIX : IType<30>; + +//----------------------------------------------------------------------------// +// Intruction Classes Definitions + +//----------------------------------------------------------------------------// + +// +// NV type instructions. +// +class NVInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : InstHexagon<outs, ins, asmstr, pattern, cstr, NV_V4, TypeNV>; + +class NVInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : NVInst<outs, ins, asmstr, pattern, cstr>; + +// Definition of Post increment new value store. +class NVInstPost_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : NVInst<outs, ins, asmstr, pattern, cstr>; + +// Post increment ST Instruction. +let mayStore = 1 in +class NVInstPI_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : NVInst<outs, ins, asmstr, pattern, cstr>; + +// New-value conditional branch. +class NCJInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : NVInst<outs, ins, asmstr, pattern, cstr>; + +let mayLoad = 1, mayStore = 1 in +class MEMInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : InstHexagon<outs, ins, asmstr, pattern, cstr, MEM_V4, TypeMEMOP>; + +class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : MEMInst<outs, ins, asmstr, pattern, cstr>; + +let isCodeGenOnly = 1 in +class EXTENDERInst<dag outs, dag ins, string asmstr, list<dag> pattern = []> + : InstHexagon<outs, ins, asmstr, pattern, "", PREFIX, TypePREFIX>; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp new file mode 100644 index 000000000000..6b97609415a3 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -0,0 +1,1856 @@ +//===-- HexagonInstrInfo.cpp - Hexagon Instruction Information ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Hexagon implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "HexagonInstrInfo.h" +#include "Hexagon.h" +#include "HexagonRegisterInfo.h" +#include "HexagonSubtarget.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/DFAPacketizer.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#define GET_INSTRINFO_CTOR_DTOR +#define GET_INSTRMAP_INFO +#include "HexagonGenInstrInfo.inc" +#include "HexagonGenDFAPacketizer.inc" + +using namespace llvm; + +/// +/// Constants for Hexagon instructions. +/// +const int Hexagon_MEMW_OFFSET_MAX = 4095; +const int Hexagon_MEMW_OFFSET_MIN = -4096; +const int Hexagon_MEMD_OFFSET_MAX = 8191; +const int Hexagon_MEMD_OFFSET_MIN = -8192; +const int Hexagon_MEMH_OFFSET_MAX = 2047; +const int Hexagon_MEMH_OFFSET_MIN = -2048; +const int Hexagon_MEMB_OFFSET_MAX = 1023; +const int Hexagon_MEMB_OFFSET_MIN = -1024; +const int Hexagon_ADDI_OFFSET_MAX = 32767; +const int Hexagon_ADDI_OFFSET_MIN = -32768; +const int Hexagon_MEMD_AUTOINC_MAX = 56; +const int Hexagon_MEMD_AUTOINC_MIN = -64; +const int Hexagon_MEMW_AUTOINC_MAX = 28; +const int Hexagon_MEMW_AUTOINC_MIN = -32; +const int Hexagon_MEMH_AUTOINC_MAX = 14; +const int Hexagon_MEMH_AUTOINC_MIN = -16; +const int Hexagon_MEMB_AUTOINC_MAX = 7; +const int Hexagon_MEMB_AUTOINC_MIN = -8; + +// Pin the vtable to this file. +void HexagonInstrInfo::anchor() {} + +HexagonInstrInfo::HexagonInstrInfo(HexagonSubtarget &ST) + : HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP), + RI(ST), Subtarget(ST) { +} + + +/// isLoadFromStackSlot - If the specified machine instruction is a direct +/// load from a stack slot, return the virtual or physical register number of +/// the destination along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than loading from the stack slot. +unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + + + switch (MI->getOpcode()) { + default: break; + case Hexagon::LDriw: + case Hexagon::LDrid: + case Hexagon::LDrih: + case Hexagon::LDrib: + case Hexagon::LDriub: + if (MI->getOperand(2).isFI() && + MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) { + FrameIndex = MI->getOperand(2).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + } + return 0; +} + + +/// isStoreToStackSlot - If the specified machine instruction is a direct +/// store to a stack slot, return the virtual or physical register number of +/// the source reg along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than storing to the stack slot. +unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + switch (MI->getOpcode()) { + default: break; + case Hexagon::STriw: + case Hexagon::STrid: + case Hexagon::STrih: + case Hexagon::STrib: + if (MI->getOperand(2).isFI() && + MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) { + FrameIndex = MI->getOperand(0).getIndex(); + return MI->getOperand(2).getReg(); + } + break; + } + return 0; +} + + +unsigned +HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const{ + + int BOpc = Hexagon::JMP; + int BccOpc = Hexagon::JMP_t; + + assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + + int regPos = 0; + // Check if ReverseBranchCondition has asked to reverse this branch + // If we want to reverse the branch an odd number of times, we want + // JMP_f. + if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) { + BccOpc = Hexagon::JMP_f; + regPos = 1; + } + + if (FBB == 0) { + if (Cond.empty()) { + // Due to a bug in TailMerging/CFG Optimization, we need to add a + // special case handling of a predicated jump followed by an + // unconditional jump. If not, Tail Merging and CFG Optimization go + // into an infinite loop. + MachineBasicBlock *NewTBB, *NewFBB; + SmallVector<MachineOperand, 4> Cond; + MachineInstr *Term = MBB.getFirstTerminator(); + if (isPredicated(Term) && !AnalyzeBranch(MBB, NewTBB, NewFBB, Cond, + false)) { + MachineBasicBlock *NextBB = + llvm::next(MachineFunction::iterator(&MBB)); + if (NewTBB == NextBB) { + ReverseBranchCondition(Cond); + RemoveBranch(MBB); + return InsertBranch(MBB, TBB, 0, Cond, DL); + } + } + BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); + } else { + BuildMI(&MBB, DL, + get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB); + } + return 1; + } + + BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB); + BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB); + + return 2; +} + + +bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const { + TBB = NULL; + FBB = NULL; + + // If the block has no terminators, it just falls into the block after it. + MachineBasicBlock::instr_iterator I = MBB.instr_end(); + if (I == MBB.instr_begin()) + return false; + + // A basic block may looks like this: + // + // [ insn + // EH_LABEL + // insn + // insn + // insn + // EH_LABEL + // insn ] + // + // It has two succs but does not have a terminator + // Don't know how to handle it. + do { + --I; + if (I->isEHLabel()) + return true; + } while (I != MBB.instr_begin()); + + I = MBB.instr_end(); + --I; + + while (I->isDebugValue()) { + if (I == MBB.instr_begin()) + return false; + --I; + } + + // Delete the JMP if it's equivalent to a fall-through. + if (AllowModify && I->getOpcode() == Hexagon::JMP && + MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { + DEBUG(dbgs()<< "\nErasing the jump to successor block\n";); + I->eraseFromParent(); + I = MBB.instr_end(); + if (I == MBB.instr_begin()) + return false; + --I; + } + if (!isUnpredicatedTerminator(I)) + return false; + + // Get the last instruction in the block. + MachineInstr *LastInst = I; + MachineInstr *SecondLastInst = NULL; + // Find one more terminator if present. + do { + if (&*I != LastInst && !I->isBundle() && isUnpredicatedTerminator(I)) { + if (!SecondLastInst) + SecondLastInst = I; + else + // This is a third branch. + return true; + } + if (I == MBB.instr_begin()) + break; + --I; + } while(I); + + int LastOpcode = LastInst->getOpcode(); + + bool LastOpcodeHasJMP_c = PredOpcodeHasJMP_c(LastOpcode); + bool LastOpcodeHasNot = PredOpcodeHasNot(LastOpcode); + + // If there is only one terminator instruction, process it. + if (LastInst && !SecondLastInst) { + if (LastOpcode == Hexagon::JMP) { + TBB = LastInst->getOperand(0).getMBB(); + return false; + } + if (LastOpcode == Hexagon::ENDLOOP0) { + TBB = LastInst->getOperand(0).getMBB(); + Cond.push_back(LastInst->getOperand(0)); + return false; + } + if (LastOpcodeHasJMP_c) { + TBB = LastInst->getOperand(1).getMBB(); + if (LastOpcodeHasNot) { + Cond.push_back(MachineOperand::CreateImm(0)); + } + Cond.push_back(LastInst->getOperand(0)); + return false; + } + // Otherwise, don't know what this is. + return true; + } + + int SecLastOpcode = SecondLastInst->getOpcode(); + + bool SecLastOpcodeHasJMP_c = PredOpcodeHasJMP_c(SecLastOpcode); + bool SecLastOpcodeHasNot = PredOpcodeHasNot(SecLastOpcode); + if (SecLastOpcodeHasJMP_c && (LastOpcode == Hexagon::JMP)) { + TBB = SecondLastInst->getOperand(1).getMBB(); + if (SecLastOpcodeHasNot) + Cond.push_back(MachineOperand::CreateImm(0)); + Cond.push_back(SecondLastInst->getOperand(0)); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } + + // If the block ends with two Hexagon:JMPs, handle it. The second one is not + // executed, so remove it. + if (SecLastOpcode == Hexagon::JMP && LastOpcode == Hexagon::JMP) { + TBB = SecondLastInst->getOperand(0).getMBB(); + I = LastInst; + if (AllowModify) + I->eraseFromParent(); + return false; + } + + // If the block ends with an ENDLOOP, and JMP, handle it. + if (SecLastOpcode == Hexagon::ENDLOOP0 && + LastOpcode == Hexagon::JMP) { + TBB = SecondLastInst->getOperand(0).getMBB(); + Cond.push_back(SecondLastInst->getOperand(0)); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } + + // Otherwise, can't handle this. + return true; +} + + +unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { + int BOpc = Hexagon::JMP; + int BccOpc = Hexagon::JMP_t; + int BccOpcNot = Hexagon::JMP_f; + + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin()) return 0; + --I; + if (I->getOpcode() != BOpc && I->getOpcode() != BccOpc && + I->getOpcode() != BccOpcNot) + return 0; + + // Remove the branch. + I->eraseFromParent(); + + I = MBB.end(); + + if (I == MBB.begin()) return 1; + --I; + if (I->getOpcode() != BccOpc && I->getOpcode() != BccOpcNot) + return 1; + + // Remove the branch. + I->eraseFromParent(); + return 2; +} + + +/// \brief For a comparison instruction, return the source registers in +/// \p SrcReg and \p SrcReg2 if having two register operands, and the value it +/// compares against in CmpValue. Return true if the comparison instruction +/// can be analyzed. +bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI, + unsigned &SrcReg, unsigned &SrcReg2, + int &Mask, int &Value) const { + unsigned Opc = MI->getOpcode(); + + // Set mask and the first source register. + switch (Opc) { + case Hexagon::CMPEHexagon4rr: + case Hexagon::CMPEQri: + case Hexagon::CMPEQrr: + case Hexagon::CMPGT64rr: + case Hexagon::CMPGTU64rr: + case Hexagon::CMPGTUri: + case Hexagon::CMPGTUrr: + case Hexagon::CMPGTri: + case Hexagon::CMPGTrr: + SrcReg = MI->getOperand(1).getReg(); + Mask = ~0; + break; + case Hexagon::CMPbEQri_V4: + case Hexagon::CMPbEQrr_sbsb_V4: + case Hexagon::CMPbEQrr_ubub_V4: + case Hexagon::CMPbGTUri_V4: + case Hexagon::CMPbGTUrr_V4: + case Hexagon::CMPbGTrr_V4: + SrcReg = MI->getOperand(1).getReg(); + Mask = 0xFF; + break; + case Hexagon::CMPhEQri_V4: + case Hexagon::CMPhEQrr_shl_V4: + case Hexagon::CMPhEQrr_xor_V4: + case Hexagon::CMPhGTUri_V4: + case Hexagon::CMPhGTUrr_V4: + case Hexagon::CMPhGTrr_shl_V4: + SrcReg = MI->getOperand(1).getReg(); + Mask = 0xFFFF; + break; + } + + // Set the value/second source register. + switch (Opc) { + case Hexagon::CMPEHexagon4rr: + case Hexagon::CMPEQrr: + case Hexagon::CMPGT64rr: + case Hexagon::CMPGTU64rr: + case Hexagon::CMPGTUrr: + case Hexagon::CMPGTrr: + case Hexagon::CMPbEQrr_sbsb_V4: + case Hexagon::CMPbEQrr_ubub_V4: + case Hexagon::CMPbGTUrr_V4: + case Hexagon::CMPbGTrr_V4: + case Hexagon::CMPhEQrr_shl_V4: + case Hexagon::CMPhEQrr_xor_V4: + case Hexagon::CMPhGTUrr_V4: + case Hexagon::CMPhGTrr_shl_V4: + SrcReg2 = MI->getOperand(2).getReg(); + return true; + + case Hexagon::CMPEQri: + case Hexagon::CMPGTUri: + case Hexagon::CMPGTri: + case Hexagon::CMPbEQri_V4: + case Hexagon::CMPbGTUri_V4: + case Hexagon::CMPhEQri_V4: + case Hexagon::CMPhGTUri_V4: + SrcReg2 = 0; + Value = MI->getOperand(2).getImm(); + return true; + } + + return false; +} + + +void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + if (Hexagon::IntRegsRegClass.contains(SrcReg, DestReg)) { + BuildMI(MBB, I, DL, get(Hexagon::TFR), DestReg).addReg(SrcReg); + return; + } + if (Hexagon::DoubleRegsRegClass.contains(SrcReg, DestReg)) { + BuildMI(MBB, I, DL, get(Hexagon::TFR64), DestReg).addReg(SrcReg); + return; + } + if (Hexagon::PredRegsRegClass.contains(SrcReg, DestReg)) { + // Map Pd = Ps to Pd = or(Ps, Ps). + BuildMI(MBB, I, DL, get(Hexagon::OR_pp), + DestReg).addReg(SrcReg).addReg(SrcReg); + return; + } + if (Hexagon::DoubleRegsRegClass.contains(DestReg) && + Hexagon::IntRegsRegClass.contains(SrcReg)) { + // We can have an overlap between single and double reg: r1:0 = r0. + if(SrcReg == RI.getSubReg(DestReg, Hexagon::subreg_loreg)) { + // r1:0 = r0 + BuildMI(MBB, I, DL, get(Hexagon::TFRI), (RI.getSubReg(DestReg, + Hexagon::subreg_hireg))).addImm(0); + } else { + // r1:0 = r1 or no overlap. + BuildMI(MBB, I, DL, get(Hexagon::TFR), (RI.getSubReg(DestReg, + Hexagon::subreg_loreg))).addReg(SrcReg); + BuildMI(MBB, I, DL, get(Hexagon::TFRI), (RI.getSubReg(DestReg, + Hexagon::subreg_hireg))).addImm(0); + } + return; + } + if (Hexagon::CRRegsRegClass.contains(DestReg) && + Hexagon::IntRegsRegClass.contains(SrcReg)) { + BuildMI(MBB, I, DL, get(Hexagon::TFCR), DestReg).addReg(SrcReg); + return; + } + if (Hexagon::PredRegsRegClass.contains(SrcReg) && + Hexagon::IntRegsRegClass.contains(DestReg)) { + BuildMI(MBB, I, DL, get(Hexagon::TFR_RsPd), DestReg). + addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + if (Hexagon::IntRegsRegClass.contains(SrcReg) && + Hexagon::PredRegsRegClass.contains(DestReg)) { + BuildMI(MBB, I, DL, get(Hexagon::TFR_PdRs), DestReg). + addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + + llvm_unreachable("Unimplemented"); +} + + +void HexagonInstrInfo:: +storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned SrcReg, bool isKill, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + + DebugLoc DL = MBB.findDebugLoc(I); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + unsigned Align = MFI.getObjectAlignment(FI); + + MachineMemOperand *MMO = + MF.getMachineMemOperand( + MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)), + MachineMemOperand::MOStore, + MFI.getObjectSize(FI), + Align); + + if (Hexagon::IntRegsRegClass.hasSubClassEq(RC)) { + BuildMI(MBB, I, DL, get(Hexagon::STriw)) + .addFrameIndex(FI).addImm(0) + .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); + } else if (Hexagon::DoubleRegsRegClass.hasSubClassEq(RC)) { + BuildMI(MBB, I, DL, get(Hexagon::STrid)) + .addFrameIndex(FI).addImm(0) + .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); + } else if (Hexagon::PredRegsRegClass.hasSubClassEq(RC)) { + BuildMI(MBB, I, DL, get(Hexagon::STriw_pred)) + .addFrameIndex(FI).addImm(0) + .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); + } else { + llvm_unreachable("Unimplemented"); + } +} + + +void HexagonInstrInfo::storeRegToAddr( + MachineFunction &MF, unsigned SrcReg, + bool isKill, + SmallVectorImpl<MachineOperand> &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl<MachineInstr*> &NewMIs) const +{ + llvm_unreachable("Unimplemented"); +} + + +void HexagonInstrInfo:: +loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned DestReg, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + DebugLoc DL = MBB.findDebugLoc(I); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + unsigned Align = MFI.getObjectAlignment(FI); + + MachineMemOperand *MMO = + MF.getMachineMemOperand( + MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)), + MachineMemOperand::MOLoad, + MFI.getObjectSize(FI), + Align); + if (RC == &Hexagon::IntRegsRegClass) { + BuildMI(MBB, I, DL, get(Hexagon::LDriw), DestReg) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO); + } else if (RC == &Hexagon::DoubleRegsRegClass) { + BuildMI(MBB, I, DL, get(Hexagon::LDrid), DestReg) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO); + } else if (RC == &Hexagon::PredRegsRegClass) { + BuildMI(MBB, I, DL, get(Hexagon::LDriw_pred), DestReg) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO); + } else { + llvm_unreachable("Can't store this register to stack slot"); + } +} + + +void HexagonInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, + SmallVectorImpl<MachineOperand> &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl<MachineInstr*> &NewMIs) const { + llvm_unreachable("Unimplemented"); +} + + +MachineInstr *HexagonInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl<unsigned> &Ops, + int FI) const { + // Hexagon_TODO: Implement. + return(0); +} + +unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const { + + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + const TargetRegisterClass *TRC; + if (VT == MVT::i1) { + TRC = &Hexagon::PredRegsRegClass; + } else if (VT == MVT::i32 || VT == MVT::f32) { + TRC = &Hexagon::IntRegsRegClass; + } else if (VT == MVT::i64 || VT == MVT::f64) { + TRC = &Hexagon::DoubleRegsRegClass; + } else { + llvm_unreachable("Cannot handle this register class"); + } + + unsigned NewReg = RegInfo.createVirtualRegister(TRC); + return NewReg; +} + +bool HexagonInstrInfo::isExtendable(const MachineInstr *MI) const { + // Constant extenders are allowed only for V4 and above. + if (!Subtarget.hasV4TOps()) + return false; + + const MCInstrDesc &MID = MI->getDesc(); + const uint64_t F = MID.TSFlags; + if ((F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask) + return true; + + // TODO: This is largely obsolete now. Will need to be removed + // in consecutive patches. + switch(MI->getOpcode()) { + // TFR_FI Remains a special case. + case Hexagon::TFR_FI: + return true; + default: + return false; + } + return false; +} + +// This returns true in two cases: +// - The OP code itself indicates that this is an extended instruction. +// - One of MOs has been marked with HMOTF_ConstExtended flag. +bool HexagonInstrInfo::isExtended(const MachineInstr *MI) const { + // First check if this is permanently extended op code. + const uint64_t F = MI->getDesc().TSFlags; + if ((F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask) + return true; + // Use MO operand flags to determine if one of MI's operands + // has HMOTF_ConstExtended flag set. + for (MachineInstr::const_mop_iterator I = MI->operands_begin(), + E = MI->operands_end(); I != E; ++I) { + if (I->getTargetFlags() && HexagonII::HMOTF_ConstExtended) + return true; + } + return false; +} + +bool HexagonInstrInfo::isBranch (const MachineInstr *MI) const { + return MI->getDesc().isBranch(); +} + +bool HexagonInstrInfo::isNewValueInst(const MachineInstr *MI) const { + if (isNewValueJump(MI)) + return true; + + if (isNewValueStore(MI)) + return true; + + return false; +} + +bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr *MI) const { + return MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4; +} + +bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const { + bool isPred = MI->getDesc().isPredicable(); + + if (!isPred) + return false; + + const int Opc = MI->getOpcode(); + + switch(Opc) { + case Hexagon::TFRI: + return isInt<12>(MI->getOperand(1).getImm()); + + case Hexagon::STrid: + case Hexagon::STrid_indexed: + return isShiftedUInt<6,3>(MI->getOperand(1).getImm()); + + case Hexagon::STriw: + case Hexagon::STriw_indexed: + case Hexagon::STriw_nv_V4: + return isShiftedUInt<6,2>(MI->getOperand(1).getImm()); + + case Hexagon::STrih: + case Hexagon::STrih_indexed: + case Hexagon::STrih_nv_V4: + return isShiftedUInt<6,1>(MI->getOperand(1).getImm()); + + case Hexagon::STrib: + case Hexagon::STrib_indexed: + case Hexagon::STrib_nv_V4: + return isUInt<6>(MI->getOperand(1).getImm()); + + case Hexagon::LDrid: + case Hexagon::LDrid_indexed: + return isShiftedUInt<6,3>(MI->getOperand(2).getImm()); + + case Hexagon::LDriw: + case Hexagon::LDriw_indexed: + return isShiftedUInt<6,2>(MI->getOperand(2).getImm()); + + case Hexagon::LDrih: + case Hexagon::LDriuh: + case Hexagon::LDrih_indexed: + case Hexagon::LDriuh_indexed: + return isShiftedUInt<6,1>(MI->getOperand(2).getImm()); + + case Hexagon::LDrib: + case Hexagon::LDriub: + case Hexagon::LDrib_indexed: + case Hexagon::LDriub_indexed: + return isUInt<6>(MI->getOperand(2).getImm()); + + case Hexagon::POST_LDrid: + return isShiftedInt<4,3>(MI->getOperand(3).getImm()); + + case Hexagon::POST_LDriw: + return isShiftedInt<4,2>(MI->getOperand(3).getImm()); + + case Hexagon::POST_LDrih: + case Hexagon::POST_LDriuh: + return isShiftedInt<4,1>(MI->getOperand(3).getImm()); + + case Hexagon::POST_LDrib: + case Hexagon::POST_LDriub: + return isInt<4>(MI->getOperand(3).getImm()); + + case Hexagon::STrib_imm_V4: + case Hexagon::STrih_imm_V4: + case Hexagon::STriw_imm_V4: + return (isUInt<6>(MI->getOperand(1).getImm()) && + isInt<6>(MI->getOperand(2).getImm())); + + case Hexagon::ADD_ri: + return isInt<8>(MI->getOperand(2).getImm()); + + case Hexagon::ASLH: + case Hexagon::ASRH: + case Hexagon::SXTB: + case Hexagon::SXTH: + case Hexagon::ZXTB: + case Hexagon::ZXTH: + return Subtarget.hasV4TOps(); + } + + return true; +} + +// This function performs the following inversiones: +// +// cPt ---> cNotPt +// cNotPt ---> cPt +// +unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const { + int InvPredOpcode; + InvPredOpcode = isPredicatedTrue(Opc) ? Hexagon::getFalsePredOpcode(Opc) + : Hexagon::getTruePredOpcode(Opc); + if (InvPredOpcode >= 0) // Valid instruction with the inverted predicate. + return InvPredOpcode; + + switch(Opc) { + default: llvm_unreachable("Unexpected predicated instruction"); + case Hexagon::COMBINE_rr_cPt: + return Hexagon::COMBINE_rr_cNotPt; + case Hexagon::COMBINE_rr_cNotPt: + return Hexagon::COMBINE_rr_cPt; + + // Dealloc_return. + case Hexagon::DEALLOC_RET_cPt_V4: + return Hexagon::DEALLOC_RET_cNotPt_V4; + case Hexagon::DEALLOC_RET_cNotPt_V4: + return Hexagon::DEALLOC_RET_cPt_V4; + } +} + +// New Value Store instructions. +bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const { + const uint64_t F = MI->getDesc().TSFlags; + + return ((F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask); +} + +bool HexagonInstrInfo::isNewValueStore(unsigned Opcode) const { + const uint64_t F = get(Opcode).TSFlags; + + return ((F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask); +} + +int HexagonInstrInfo:: +getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const { + enum Hexagon::PredSense inPredSense; + inPredSense = invertPredicate ? Hexagon::PredSense_false : + Hexagon::PredSense_true; + int CondOpcode = Hexagon::getPredOpcode(Opc, inPredSense); + if (CondOpcode >= 0) // Valid Conditional opcode/instruction + return CondOpcode; + + // This switch case will be removed once all the instructions have been + // modified to use relation maps. + switch(Opc) { + case Hexagon::TFRI_f: + return !invertPredicate ? Hexagon::TFRI_cPt_f : + Hexagon::TFRI_cNotPt_f; + case Hexagon::COMBINE_rr: + return !invertPredicate ? Hexagon::COMBINE_rr_cPt : + Hexagon::COMBINE_rr_cNotPt; + + // Word. + case Hexagon::STriw_f: + return !invertPredicate ? Hexagon::STriw_cPt : + Hexagon::STriw_cNotPt; + case Hexagon::STriw_indexed_f: + return !invertPredicate ? Hexagon::STriw_indexed_cPt : + Hexagon::STriw_indexed_cNotPt; + + // DEALLOC_RETURN. + case Hexagon::DEALLOC_RET_V4: + return !invertPredicate ? Hexagon::DEALLOC_RET_cPt_V4 : + Hexagon::DEALLOC_RET_cNotPt_V4; + } + llvm_unreachable("Unexpected predicable instruction"); +} + + +bool HexagonInstrInfo:: +PredicateInstruction(MachineInstr *MI, + const SmallVectorImpl<MachineOperand> &Cond) const { + int Opc = MI->getOpcode(); + assert (isPredicable(MI) && "Expected predicable instruction"); + bool invertJump = (!Cond.empty() && Cond[0].isImm() && + (Cond[0].getImm() == 0)); + + // This will change MI's opcode to its predicate version. + // However, its operand list is still the old one, i.e. the + // non-predicate one. + MI->setDesc(get(getMatchingCondBranchOpcode(Opc, invertJump))); + + int oper = -1; + unsigned int GAIdx = 0; + + // Indicates whether the current MI has a GlobalAddress operand + bool hasGAOpnd = false; + std::vector<MachineOperand> tmpOpnds; + + // Indicates whether we need to shift operands to right. + bool needShift = true; + + // The predicate is ALWAYS the FIRST input operand !!! + if (MI->getNumOperands() == 0) { + // The non-predicate version of MI does not take any operands, + // i.e. no outs and no ins. In this condition, the predicate + // operand will be directly placed at Operands[0]. No operand + // shift is needed. + // Example: BARRIER + needShift = false; + oper = -1; + } + else if ( MI->getOperand(MI->getNumOperands()-1).isReg() + && MI->getOperand(MI->getNumOperands()-1).isDef() + && !MI->getOperand(MI->getNumOperands()-1).isImplicit()) { + // The non-predicate version of MI does not have any input operands. + // In this condition, we extend the length of Operands[] by one and + // copy the original last operand to the newly allocated slot. + // At this moment, it is just a place holder. Later, we will put + // predicate operand directly into it. No operand shift is needed. + // Example: r0=BARRIER (this is a faked insn used here for illustration) + MI->addOperand(MI->getOperand(MI->getNumOperands()-1)); + needShift = false; + oper = MI->getNumOperands() - 2; + } + else { + // We need to right shift all input operands by one. Duplicate the + // last operand into the newly allocated slot. + MI->addOperand(MI->getOperand(MI->getNumOperands()-1)); + } + + if (needShift) + { + // Operands[ MI->getNumOperands() - 2 ] has been copied into + // Operands[ MI->getNumOperands() - 1 ], so we start from + // Operands[ MI->getNumOperands() - 3 ]. + // oper is a signed int. + // It is ok if "MI->getNumOperands()-3" is -3, -2, or -1. + for (oper = MI->getNumOperands() - 3; oper >= 0; --oper) + { + MachineOperand &MO = MI->getOperand(oper); + + // Opnd[0] Opnd[1] Opnd[2] Opnd[3] Opnd[4] Opnd[5] Opnd[6] Opnd[7] + // <Def0> <Def1> <Use0> <Use1> <ImpDef0> <ImpDef1> <ImpUse0> <ImpUse1> + // /\~ + // /||\~ + // || + // Predicate Operand here + if (MO.isReg() && !MO.isUse() && !MO.isImplicit()) { + break; + } + if (MO.isReg()) { + MI->getOperand(oper+1).ChangeToRegister(MO.getReg(), MO.isDef(), + MO.isImplicit(), MO.isKill(), + MO.isDead(), MO.isUndef(), + MO.isDebug()); + } + else if (MO.isImm()) { + MI->getOperand(oper+1).ChangeToImmediate(MO.getImm()); + } + else if (MO.isGlobal()) { + // MI can not have more than one GlobalAddress operand. + assert(hasGAOpnd == false && "MI can only have one GlobalAddress opnd"); + + // There is no member function called "ChangeToGlobalAddress" in the + // MachineOperand class (not like "ChangeToRegister" and + // "ChangeToImmediate"). So we have to remove them from Operands[] list + // first, and then add them back after we have inserted the predicate + // operand. tmpOpnds[] is to remember these operands before we remove + // them. + tmpOpnds.push_back(MO); + + // Operands[oper] is a GlobalAddress operand; + // Operands[oper+1] has been copied into Operands[oper+2]; + hasGAOpnd = true; + GAIdx = oper; + continue; + } + else { + assert(false && "Unexpected operand type"); + } + } + } + + int regPos = invertJump ? 1 : 0; + MachineOperand PredMO = Cond[regPos]; + + // [oper] now points to the last explicit Def. Predicate operand must be + // located at [oper+1]. See diagram above. + // This assumes that the predicate is always the first operand, + // i.e. Operands[0+numResults], in the set of inputs + // It is better to have an assert here to check this. But I don't know how + // to write this assert because findFirstPredOperandIdx() would return -1 + if (oper < -1) oper = -1; + + MI->getOperand(oper+1).ChangeToRegister(PredMO.getReg(), PredMO.isDef(), + PredMO.isImplicit(), false, + PredMO.isDead(), PredMO.isUndef(), + PredMO.isDebug()); + + MachineRegisterInfo &RegInfo = MI->getParent()->getParent()->getRegInfo(); + RegInfo.clearKillFlags(PredMO.getReg()); + + if (hasGAOpnd) + { + unsigned int i; + + // Operands[GAIdx] is the original GlobalAddress operand, which is + // already copied into tmpOpnds[0]. + // Operands[GAIdx] now stores a copy of Operands[GAIdx-1] + // Operands[GAIdx+1] has already been copied into Operands[GAIdx+2], + // so we start from [GAIdx+2] + for (i = GAIdx + 2; i < MI->getNumOperands(); ++i) + tmpOpnds.push_back(MI->getOperand(i)); + + // Remove all operands in range [ (GAIdx+1) ... (MI->getNumOperands()-1) ] + // It is very important that we always remove from the end of Operands[] + // MI->getNumOperands() is at least 2 if program goes to here. + for (i = MI->getNumOperands() - 1; i > GAIdx; --i) + MI->RemoveOperand(i); + + for (i = 0; i < tmpOpnds.size(); ++i) + MI->addOperand(tmpOpnds[i]); + } + + return true; +} + + +bool +HexagonInstrInfo:: +isProfitableToIfCvt(MachineBasicBlock &MBB, + unsigned NumCycles, + unsigned ExtraPredCycles, + const BranchProbability &Probability) const { + return true; +} + + +bool +HexagonInstrInfo:: +isProfitableToIfCvt(MachineBasicBlock &TMBB, + unsigned NumTCycles, + unsigned ExtraTCycles, + MachineBasicBlock &FMBB, + unsigned NumFCycles, + unsigned ExtraFCycles, + const BranchProbability &Probability) const { + return true; +} + +// Returns true if an instruction is predicated irrespective of the predicate +// sense. For example, all of the following will return true. +// if (p0) R1 = add(R2, R3) +// if (!p0) R1 = add(R2, R3) +// if (p0.new) R1 = add(R2, R3) +// if (!p0.new) R1 = add(R2, R3) +bool HexagonInstrInfo::isPredicated(const MachineInstr *MI) const { + const uint64_t F = MI->getDesc().TSFlags; + + return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask); +} + +bool HexagonInstrInfo::isPredicated(unsigned Opcode) const { + const uint64_t F = get(Opcode).TSFlags; + + return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask); +} + +bool HexagonInstrInfo::isPredicatedTrue(const MachineInstr *MI) const { + const uint64_t F = MI->getDesc().TSFlags; + + assert(isPredicated(MI)); + return (!((F >> HexagonII::PredicatedFalsePos) & + HexagonII::PredicatedFalseMask)); +} + +bool HexagonInstrInfo::isPredicatedTrue(unsigned Opcode) const { + const uint64_t F = get(Opcode).TSFlags; + + // Make sure that the instruction is predicated. + assert((F>> HexagonII::PredicatedPos) & HexagonII::PredicatedMask); + return (!((F >> HexagonII::PredicatedFalsePos) & + HexagonII::PredicatedFalseMask)); +} + +bool HexagonInstrInfo::isPredicatedNew(const MachineInstr *MI) const { + const uint64_t F = MI->getDesc().TSFlags; + + assert(isPredicated(MI)); + return ((F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask); +} + +bool HexagonInstrInfo::isPredicatedNew(unsigned Opcode) const { + const uint64_t F = get(Opcode).TSFlags; + + assert(isPredicated(Opcode)); + return ((F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask); +} + +// Returns true, if a ST insn can be promoted to a new-value store. +bool HexagonInstrInfo::mayBeNewStore(const MachineInstr *MI) const { + const HexagonRegisterInfo& QRI = getRegisterInfo(); + const uint64_t F = MI->getDesc().TSFlags; + + return ((F >> HexagonII::mayNVStorePos) & + HexagonII::mayNVStoreMask & + QRI.Subtarget.hasV4TOps()); +} + +bool +HexagonInstrInfo::DefinesPredicate(MachineInstr *MI, + std::vector<MachineOperand> &Pred) const { + for (unsigned oper = 0; oper < MI->getNumOperands(); ++oper) { + MachineOperand MO = MI->getOperand(oper); + if (MO.isReg() && MO.isDef()) { + const TargetRegisterClass* RC = RI.getMinimalPhysRegClass(MO.getReg()); + if (RC == &Hexagon::PredRegsRegClass) { + Pred.push_back(MO); + return true; + } + } + } + return false; +} + + +bool +HexagonInstrInfo:: +SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, + const SmallVectorImpl<MachineOperand> &Pred2) const { + // TODO: Fix this + return false; +} + + +// +// We indicate that we want to reverse the branch by +// inserting a 0 at the beginning of the Cond vector. +// +bool HexagonInstrInfo:: +ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { + if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) { + Cond.erase(Cond.begin()); + } else { + Cond.insert(Cond.begin(), MachineOperand::CreateImm(0)); + } + return false; +} + + +bool HexagonInstrInfo:: +isProfitableToDupForIfCvt(MachineBasicBlock &MBB,unsigned NumInstrs, + const BranchProbability &Probability) const { + return (NumInstrs <= 4); +} + +bool HexagonInstrInfo::isDeallocRet(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + default: return false; + case Hexagon::DEALLOC_RET_V4 : + case Hexagon::DEALLOC_RET_cPt_V4 : + case Hexagon::DEALLOC_RET_cNotPt_V4 : + case Hexagon::DEALLOC_RET_cdnPnt_V4 : + case Hexagon::DEALLOC_RET_cNotdnPnt_V4 : + case Hexagon::DEALLOC_RET_cdnPt_V4 : + case Hexagon::DEALLOC_RET_cNotdnPt_V4 : + return true; + } +} + + +bool HexagonInstrInfo:: +isValidOffset(const int Opcode, const int Offset) const { + // This function is to check whether the "Offset" is in the correct range of + // the given "Opcode". If "Offset" is not in the correct range, "ADD_ri" is + // inserted to calculate the final address. Due to this reason, the function + // assumes that the "Offset" has correct alignment. + // We used to assert if the offset was not properly aligned, however, + // there are cases where a misaligned pointer recast can cause this + // problem, and we need to allow for it. The front end warns of such + // misaligns with respect to load size. + + switch(Opcode) { + + case Hexagon::LDriw: + case Hexagon::LDriw_indexed: + case Hexagon::LDriw_f: + case Hexagon::STriw_indexed: + case Hexagon::STriw: + case Hexagon::STriw_f: + return (Offset >= Hexagon_MEMW_OFFSET_MIN) && + (Offset <= Hexagon_MEMW_OFFSET_MAX); + + case Hexagon::LDrid: + case Hexagon::LDrid_indexed: + case Hexagon::LDrid_f: + case Hexagon::STrid: + case Hexagon::STrid_indexed: + case Hexagon::STrid_f: + return (Offset >= Hexagon_MEMD_OFFSET_MIN) && + (Offset <= Hexagon_MEMD_OFFSET_MAX); + + case Hexagon::LDrih: + case Hexagon::LDriuh: + case Hexagon::STrih: + return (Offset >= Hexagon_MEMH_OFFSET_MIN) && + (Offset <= Hexagon_MEMH_OFFSET_MAX); + + case Hexagon::LDrib: + case Hexagon::STrib: + case Hexagon::LDriub: + return (Offset >= Hexagon_MEMB_OFFSET_MIN) && + (Offset <= Hexagon_MEMB_OFFSET_MAX); + + case Hexagon::ADD_ri: + case Hexagon::TFR_FI: + return (Offset >= Hexagon_ADDI_OFFSET_MIN) && + (Offset <= Hexagon_ADDI_OFFSET_MAX); + + case Hexagon::MemOPw_ADDi_V4 : + case Hexagon::MemOPw_SUBi_V4 : + case Hexagon::MemOPw_ADDr_V4 : + case Hexagon::MemOPw_SUBr_V4 : + case Hexagon::MemOPw_ANDr_V4 : + case Hexagon::MemOPw_ORr_V4 : + return (0 <= Offset && Offset <= 255); + + case Hexagon::MemOPh_ADDi_V4 : + case Hexagon::MemOPh_SUBi_V4 : + case Hexagon::MemOPh_ADDr_V4 : + case Hexagon::MemOPh_SUBr_V4 : + case Hexagon::MemOPh_ANDr_V4 : + case Hexagon::MemOPh_ORr_V4 : + return (0 <= Offset && Offset <= 127); + + case Hexagon::MemOPb_ADDi_V4 : + case Hexagon::MemOPb_SUBi_V4 : + case Hexagon::MemOPb_ADDr_V4 : + case Hexagon::MemOPb_SUBr_V4 : + case Hexagon::MemOPb_ANDr_V4 : + case Hexagon::MemOPb_ORr_V4 : + return (0 <= Offset && Offset <= 63); + + // LDri_pred and STriw_pred are pseudo operations, so it has to take offset of + // any size. Later pass knows how to handle it. + case Hexagon::STriw_pred: + case Hexagon::LDriw_pred: + return true; + + case Hexagon::LOOP0_i: + return isUInt<10>(Offset); + + // INLINEASM is very special. + case Hexagon::INLINEASM: + return true; + } + + llvm_unreachable("No offset range is defined for this opcode. " + "Please define it in the above switch statement!"); +} + + +// +// Check if the Offset is a valid auto-inc imm by Load/Store Type. +// +bool HexagonInstrInfo:: +isValidAutoIncImm(const EVT VT, const int Offset) const { + + if (VT == MVT::i64) { + return (Offset >= Hexagon_MEMD_AUTOINC_MIN && + Offset <= Hexagon_MEMD_AUTOINC_MAX && + (Offset & 0x7) == 0); + } + if (VT == MVT::i32) { + return (Offset >= Hexagon_MEMW_AUTOINC_MIN && + Offset <= Hexagon_MEMW_AUTOINC_MAX && + (Offset & 0x3) == 0); + } + if (VT == MVT::i16) { + return (Offset >= Hexagon_MEMH_AUTOINC_MIN && + Offset <= Hexagon_MEMH_AUTOINC_MAX && + (Offset & 0x1) == 0); + } + if (VT == MVT::i8) { + return (Offset >= Hexagon_MEMB_AUTOINC_MIN && + Offset <= Hexagon_MEMB_AUTOINC_MAX); + } + llvm_unreachable("Not an auto-inc opc!"); +} + + +bool HexagonInstrInfo:: +isMemOp(const MachineInstr *MI) const { +// return MI->getDesc().mayLoad() && MI->getDesc().mayStore(); + + switch (MI->getOpcode()) + { + default: return false; + case Hexagon::MemOPw_ADDi_V4 : + case Hexagon::MemOPw_SUBi_V4 : + case Hexagon::MemOPw_ADDr_V4 : + case Hexagon::MemOPw_SUBr_V4 : + case Hexagon::MemOPw_ANDr_V4 : + case Hexagon::MemOPw_ORr_V4 : + case Hexagon::MemOPh_ADDi_V4 : + case Hexagon::MemOPh_SUBi_V4 : + case Hexagon::MemOPh_ADDr_V4 : + case Hexagon::MemOPh_SUBr_V4 : + case Hexagon::MemOPh_ANDr_V4 : + case Hexagon::MemOPh_ORr_V4 : + case Hexagon::MemOPb_ADDi_V4 : + case Hexagon::MemOPb_SUBi_V4 : + case Hexagon::MemOPb_ADDr_V4 : + case Hexagon::MemOPb_SUBr_V4 : + case Hexagon::MemOPb_ANDr_V4 : + case Hexagon::MemOPb_ORr_V4 : + case Hexagon::MemOPb_SETBITi_V4: + case Hexagon::MemOPh_SETBITi_V4: + case Hexagon::MemOPw_SETBITi_V4: + case Hexagon::MemOPb_CLRBITi_V4: + case Hexagon::MemOPh_CLRBITi_V4: + case Hexagon::MemOPw_CLRBITi_V4: + return true; + } + return false; +} + + +bool HexagonInstrInfo:: +isSpillPredRegOp(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + default: return false; + case Hexagon::STriw_pred : + case Hexagon::LDriw_pred : + return true; + } +} + +bool HexagonInstrInfo::isNewValueJumpCandidate(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + default: return false; + case Hexagon::CMPEQrr: + case Hexagon::CMPEQri: + case Hexagon::CMPGTrr: + case Hexagon::CMPGTri: + case Hexagon::CMPGTUrr: + case Hexagon::CMPGTUri: + return true; + } +} + +bool HexagonInstrInfo:: +isConditionalTransfer (const MachineInstr *MI) const { + switch (MI->getOpcode()) { + default: return false; + case Hexagon::TFR_cPt: + case Hexagon::TFR_cNotPt: + case Hexagon::TFRI_cPt: + case Hexagon::TFRI_cNotPt: + case Hexagon::TFR_cdnPt: + case Hexagon::TFR_cdnNotPt: + case Hexagon::TFRI_cdnPt: + case Hexagon::TFRI_cdnNotPt: + return true; + } +} + +bool HexagonInstrInfo::isConditionalALU32 (const MachineInstr* MI) const { + const HexagonRegisterInfo& QRI = getRegisterInfo(); + switch (MI->getOpcode()) + { + default: return false; + case Hexagon::ADD_ri_cPt: + case Hexagon::ADD_ri_cNotPt: + case Hexagon::ADD_rr_cPt: + case Hexagon::ADD_rr_cNotPt: + case Hexagon::XOR_rr_cPt: + case Hexagon::XOR_rr_cNotPt: + case Hexagon::AND_rr_cPt: + case Hexagon::AND_rr_cNotPt: + case Hexagon::OR_rr_cPt: + case Hexagon::OR_rr_cNotPt: + case Hexagon::SUB_rr_cPt: + case Hexagon::SUB_rr_cNotPt: + case Hexagon::COMBINE_rr_cPt: + case Hexagon::COMBINE_rr_cNotPt: + return true; + case Hexagon::ASLH_cPt_V4: + case Hexagon::ASLH_cNotPt_V4: + case Hexagon::ASRH_cPt_V4: + case Hexagon::ASRH_cNotPt_V4: + case Hexagon::SXTB_cPt_V4: + case Hexagon::SXTB_cNotPt_V4: + case Hexagon::SXTH_cPt_V4: + case Hexagon::SXTH_cNotPt_V4: + case Hexagon::ZXTB_cPt_V4: + case Hexagon::ZXTB_cNotPt_V4: + case Hexagon::ZXTH_cPt_V4: + case Hexagon::ZXTH_cNotPt_V4: + return QRI.Subtarget.hasV4TOps(); + } +} + +bool HexagonInstrInfo:: +isConditionalLoad (const MachineInstr* MI) const { + const HexagonRegisterInfo& QRI = getRegisterInfo(); + switch (MI->getOpcode()) + { + default: return false; + case Hexagon::LDrid_cPt : + case Hexagon::LDrid_cNotPt : + case Hexagon::LDrid_indexed_cPt : + case Hexagon::LDrid_indexed_cNotPt : + case Hexagon::LDriw_cPt : + case Hexagon::LDriw_cNotPt : + case Hexagon::LDriw_indexed_cPt : + case Hexagon::LDriw_indexed_cNotPt : + case Hexagon::LDrih_cPt : + case Hexagon::LDrih_cNotPt : + case Hexagon::LDrih_indexed_cPt : + case Hexagon::LDrih_indexed_cNotPt : + case Hexagon::LDrib_cPt : + case Hexagon::LDrib_cNotPt : + case Hexagon::LDrib_indexed_cPt : + case Hexagon::LDrib_indexed_cNotPt : + case Hexagon::LDriuh_cPt : + case Hexagon::LDriuh_cNotPt : + case Hexagon::LDriuh_indexed_cPt : + case Hexagon::LDriuh_indexed_cNotPt : + case Hexagon::LDriub_cPt : + case Hexagon::LDriub_cNotPt : + case Hexagon::LDriub_indexed_cPt : + case Hexagon::LDriub_indexed_cNotPt : + return true; + case Hexagon::POST_LDrid_cPt : + case Hexagon::POST_LDrid_cNotPt : + case Hexagon::POST_LDriw_cPt : + case Hexagon::POST_LDriw_cNotPt : + case Hexagon::POST_LDrih_cPt : + case Hexagon::POST_LDrih_cNotPt : + case Hexagon::POST_LDrib_cPt : + case Hexagon::POST_LDrib_cNotPt : + case Hexagon::POST_LDriuh_cPt : + case Hexagon::POST_LDriuh_cNotPt : + case Hexagon::POST_LDriub_cPt : + case Hexagon::POST_LDriub_cNotPt : + return QRI.Subtarget.hasV4TOps(); + case Hexagon::LDrid_indexed_shl_cPt_V4 : + case Hexagon::LDrid_indexed_shl_cNotPt_V4 : + case Hexagon::LDrib_indexed_shl_cPt_V4 : + case Hexagon::LDrib_indexed_shl_cNotPt_V4 : + case Hexagon::LDriub_indexed_shl_cPt_V4 : + case Hexagon::LDriub_indexed_shl_cNotPt_V4 : + case Hexagon::LDrih_indexed_shl_cPt_V4 : + case Hexagon::LDrih_indexed_shl_cNotPt_V4 : + case Hexagon::LDriuh_indexed_shl_cPt_V4 : + case Hexagon::LDriuh_indexed_shl_cNotPt_V4 : + case Hexagon::LDriw_indexed_shl_cPt_V4 : + case Hexagon::LDriw_indexed_shl_cNotPt_V4 : + return QRI.Subtarget.hasV4TOps(); + } +} + +// Returns true if an instruction is a conditional store. +// +// Note: It doesn't include conditional new-value stores as they can't be +// converted to .new predicate. +// +// p.new NV store [ if(p0.new)memw(R0+#0)=R2.new ] +// ^ ^ +// / \ (not OK. it will cause new-value store to be +// / X conditional on p0.new while R2 producer is +// / \ on p0) +// / \. +// p.new store p.old NV store +// [if(p0.new)memw(R0+#0)=R2] [if(p0)memw(R0+#0)=R2.new] +// ^ ^ +// \ / +// \ / +// \ / +// p.old store +// [if (p0)memw(R0+#0)=R2] +// +// The above diagram shows the steps involoved in the conversion of a predicated +// store instruction to its .new predicated new-value form. +// +// The following set of instructions further explains the scenario where +// conditional new-value store becomes invalid when promoted to .new predicate +// form. +// +// { 1) if (p0) r0 = add(r1, r2) +// 2) p0 = cmp.eq(r3, #0) } +// +// 3) if (p0) memb(r1+#0) = r0 --> this instruction can't be grouped with +// the first two instructions because in instr 1, r0 is conditional on old value +// of p0 but its use in instr 3 is conditional on p0 modified by instr 2 which +// is not valid for new-value stores. +bool HexagonInstrInfo:: +isConditionalStore (const MachineInstr* MI) const { + const HexagonRegisterInfo& QRI = getRegisterInfo(); + switch (MI->getOpcode()) + { + default: return false; + case Hexagon::STrib_imm_cPt_V4 : + case Hexagon::STrib_imm_cNotPt_V4 : + case Hexagon::STrib_indexed_shl_cPt_V4 : + case Hexagon::STrib_indexed_shl_cNotPt_V4 : + case Hexagon::STrib_cPt : + case Hexagon::STrib_cNotPt : + case Hexagon::POST_STbri_cPt : + case Hexagon::POST_STbri_cNotPt : + case Hexagon::STrid_indexed_cPt : + case Hexagon::STrid_indexed_cNotPt : + case Hexagon::STrid_indexed_shl_cPt_V4 : + case Hexagon::POST_STdri_cPt : + case Hexagon::POST_STdri_cNotPt : + case Hexagon::STrih_cPt : + case Hexagon::STrih_cNotPt : + case Hexagon::STrih_indexed_cPt : + case Hexagon::STrih_indexed_cNotPt : + case Hexagon::STrih_imm_cPt_V4 : + case Hexagon::STrih_imm_cNotPt_V4 : + case Hexagon::STrih_indexed_shl_cPt_V4 : + case Hexagon::STrih_indexed_shl_cNotPt_V4 : + case Hexagon::POST_SThri_cPt : + case Hexagon::POST_SThri_cNotPt : + case Hexagon::STriw_cPt : + case Hexagon::STriw_cNotPt : + case Hexagon::STriw_indexed_cPt : + case Hexagon::STriw_indexed_cNotPt : + case Hexagon::STriw_imm_cPt_V4 : + case Hexagon::STriw_imm_cNotPt_V4 : + case Hexagon::STriw_indexed_shl_cPt_V4 : + case Hexagon::STriw_indexed_shl_cNotPt_V4 : + case Hexagon::POST_STwri_cPt : + case Hexagon::POST_STwri_cNotPt : + return QRI.Subtarget.hasV4TOps(); + + // V4 global address store before promoting to dot new. + case Hexagon::STd_GP_cPt_V4 : + case Hexagon::STd_GP_cNotPt_V4 : + case Hexagon::STb_GP_cPt_V4 : + case Hexagon::STb_GP_cNotPt_V4 : + case Hexagon::STh_GP_cPt_V4 : + case Hexagon::STh_GP_cNotPt_V4 : + case Hexagon::STw_GP_cPt_V4 : + case Hexagon::STw_GP_cNotPt_V4 : + return QRI.Subtarget.hasV4TOps(); + + // Predicated new value stores (i.e. if (p0) memw(..)=r0.new) are excluded + // from the "Conditional Store" list. Because a predicated new value store + // would NOT be promoted to a double dot new store. See diagram below: + // This function returns yes for those stores that are predicated but not + // yet promoted to predicate dot new instructions. + // + // +---------------------+ + // /-----| if (p0) memw(..)=r0 |---------\~ + // || +---------------------+ || + // promote || /\ /\ || promote + // || /||\ /||\ || + // \||/ demote || \||/ + // \/ || || \/ + // +-------------------------+ || +-------------------------+ + // | if (p0.new) memw(..)=r0 | || | if (p0) memw(..)=r0.new | + // +-------------------------+ || +-------------------------+ + // || || || + // || demote \||/ + // promote || \/ NOT possible + // || || /\~ + // \||/ || /||\~ + // \/ || || + // +-----------------------------+ + // | if (p0.new) memw(..)=r0.new | + // +-----------------------------+ + // Double Dot New Store + // + } +} + + +bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const { + if (isNewValue(MI) && isBranch(MI)) + return true; + return false; +} + +bool HexagonInstrInfo::isPostIncrement (const MachineInstr* MI) const { + return (getAddrMode(MI) == HexagonII::PostInc); +} + +bool HexagonInstrInfo::isNewValue(const MachineInstr* MI) const { + const uint64_t F = MI->getDesc().TSFlags; + return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask); +} + +// Returns true, if any one of the operands is a dot new +// insn, whether it is predicated dot new or register dot new. +bool HexagonInstrInfo::isDotNewInst (const MachineInstr* MI) const { + return (isNewValueInst(MI) || + (isPredicated(MI) && isPredicatedNew(MI))); +} + +// Returns the most basic instruction for the .new predicated instructions and +// new-value stores. +// For example, all of the following instructions will be converted back to the +// same instruction: +// 1) if (p0.new) memw(R0+#0) = R1.new ---> +// 2) if (p0) memw(R0+#0)= R1.new -------> if (p0) memw(R0+#0) = R1 +// 3) if (p0.new) memw(R0+#0) = R1 ---> +// + +int HexagonInstrInfo::GetDotOldOp(const int opc) const { + int NewOp = opc; + if (isPredicated(NewOp) && isPredicatedNew(NewOp)) { // Get predicate old form + NewOp = Hexagon::getPredOldOpcode(NewOp); + if (NewOp < 0) + assert(0 && "Couldn't change predicate new instruction to its old form."); + } + + if (isNewValueStore(NewOp)) { // Convert into non new-value format + NewOp = Hexagon::getNonNVStore(NewOp); + if (NewOp < 0) + assert(0 && "Couldn't change new-value store to its old form."); + } + return NewOp; +} + +// Return the new value instruction for a given store. +int HexagonInstrInfo::GetDotNewOp(const MachineInstr* MI) const { + int NVOpcode = Hexagon::getNewValueOpcode(MI->getOpcode()); + if (NVOpcode >= 0) // Valid new-value store instruction. + return NVOpcode; + + switch (MI->getOpcode()) { + default: llvm_unreachable("Unknown .new type"); + // store new value byte + case Hexagon::STrib_shl_V4: + return Hexagon::STrib_shl_nv_V4; + + case Hexagon::STrih_shl_V4: + return Hexagon::STrih_shl_nv_V4; + + case Hexagon::STriw_f: + return Hexagon::STriw_nv_V4; + + case Hexagon::STriw_indexed_f: + return Hexagon::STriw_indexed_nv_V4; + + case Hexagon::STriw_shl_V4: + return Hexagon::STriw_shl_nv_V4; + + } + return 0; +} + +// Return .new predicate version for an instruction. +int HexagonInstrInfo::GetDotNewPredOp(MachineInstr *MI, + const MachineBranchProbabilityInfo + *MBPI) const { + + int NewOpcode = Hexagon::getPredNewOpcode(MI->getOpcode()); + if (NewOpcode >= 0) // Valid predicate new instruction + return NewOpcode; + + switch (MI->getOpcode()) { + default: llvm_unreachable("Unknown .new type"); + // Condtional Jumps + case Hexagon::JMP_t: + case Hexagon::JMP_f: + return getDotNewPredJumpOp(MI, MBPI); + + case Hexagon::JMPR_t: + return Hexagon::JMPR_tnew_tV3; + + case Hexagon::JMPR_f: + return Hexagon::JMPR_fnew_tV3; + + case Hexagon::JMPret_t: + return Hexagon::JMPret_tnew_tV3; + + case Hexagon::JMPret_f: + return Hexagon::JMPret_fnew_tV3; + + + // Conditional combine + case Hexagon::COMBINE_rr_cPt : + return Hexagon::COMBINE_rr_cdnPt; + case Hexagon::COMBINE_rr_cNotPt : + return Hexagon::COMBINE_rr_cdnNotPt; + } +} + + +unsigned HexagonInstrInfo::getAddrMode(const MachineInstr* MI) const { + const uint64_t F = MI->getDesc().TSFlags; + + return((F >> HexagonII::AddrModePos) & HexagonII::AddrModeMask); +} + +/// immediateExtend - Changes the instruction in place to one using an immediate +/// extender. +void HexagonInstrInfo::immediateExtend(MachineInstr *MI) const { + assert((isExtendable(MI)||isConstExtended(MI)) && + "Instruction must be extendable"); + // Find which operand is extendable. + short ExtOpNum = getCExtOpNum(MI); + MachineOperand &MO = MI->getOperand(ExtOpNum); + // This needs to be something we understand. + assert((MO.isMBB() || MO.isImm()) && + "Branch with unknown extendable field type"); + // Mark given operand as extended. + MO.addTargetFlag(HexagonII::HMOTF_ConstExtended); +} + +DFAPacketizer *HexagonInstrInfo:: +CreateTargetScheduleState(const TargetMachine *TM, + const ScheduleDAG *DAG) const { + const InstrItineraryData *II = TM->getInstrItineraryData(); + return TM->getSubtarget<HexagonGenSubtargetInfo>().createDFAPacketizer(II); +} + +bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr *MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const { + // Debug info is never a scheduling boundary. It's necessary to be explicit + // due to the special treatment of IT instructions below, otherwise a + // dbg_value followed by an IT will result in the IT instruction being + // considered a scheduling hazard, which is wrong. It should be the actual + // instruction preceding the dbg_value instruction(s), just like it is + // when debug info is not present. + if (MI->isDebugValue()) + return false; + + // Terminators and labels can't be scheduled around. + if (MI->getDesc().isTerminator() || MI->isLabel() || MI->isInlineAsm()) + return true; + + return false; +} + +bool HexagonInstrInfo::isConstExtended(MachineInstr *MI) const { + + // Constant extenders are allowed only for V4 and above. + if (!Subtarget.hasV4TOps()) + return false; + + const uint64_t F = MI->getDesc().TSFlags; + unsigned isExtended = (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask; + if (isExtended) // Instruction must be extended. + return true; + + unsigned isExtendable = (F >> HexagonII::ExtendablePos) + & HexagonII::ExtendableMask; + if (!isExtendable) + return false; + + short ExtOpNum = getCExtOpNum(MI); + const MachineOperand &MO = MI->getOperand(ExtOpNum); + // Use MO operand flags to determine if MO + // has the HMOTF_ConstExtended flag set. + if (MO.getTargetFlags() && HexagonII::HMOTF_ConstExtended) + return true; + // If this is a Machine BB address we are talking about, and it is + // not marked as extended, say so. + if (MO.isMBB()) + return false; + + // We could be using an instruction with an extendable immediate and shoehorn + // a global address into it. If it is a global address it will be constant + // extended. We do this for COMBINE. + // We currently only handle isGlobal() because it is the only kind of + // object we are going to end up with here for now. + // In the future we probably should add isSymbol(), etc. + if (MO.isGlobal() || MO.isSymbol()) + return true; + + // If the extendable operand is not 'Immediate' type, the instruction should + // have 'isExtended' flag set. + assert(MO.isImm() && "Extendable operand must be Immediate type"); + + int MinValue = getMinValue(MI); + int MaxValue = getMaxValue(MI); + int ImmValue = MO.getImm(); + + return (ImmValue < MinValue || ImmValue > MaxValue); +} + +// Returns the opcode to use when converting MI, which is a conditional jump, +// into a conditional instruction which uses the .new value of the predicate. +// We also use branch probabilities to add a hint to the jump. +int +HexagonInstrInfo::getDotNewPredJumpOp(MachineInstr *MI, + const + MachineBranchProbabilityInfo *MBPI) const { + + // We assume that block can have at most two successors. + bool taken = false; + MachineBasicBlock *Src = MI->getParent(); + MachineOperand *BrTarget = &MI->getOperand(1); + MachineBasicBlock *Dst = BrTarget->getMBB(); + + const BranchProbability Prediction = MBPI->getEdgeProbability(Src, Dst); + if (Prediction >= BranchProbability(1,2)) + taken = true; + + switch (MI->getOpcode()) { + case Hexagon::JMP_t: + return taken ? Hexagon::JMP_tnew_t : Hexagon::JMP_tnew_nt; + case Hexagon::JMP_f: + return taken ? Hexagon::JMP_fnew_t : Hexagon::JMP_fnew_nt; + + default: + llvm_unreachable("Unexpected jump instruction."); + } +} +// Returns true if a particular operand is extendable for an instruction. +bool HexagonInstrInfo::isOperandExtended(const MachineInstr *MI, + unsigned short OperandNum) const { + // Constant extenders are allowed only for V4 and above. + if (!Subtarget.hasV4TOps()) + return false; + + const uint64_t F = MI->getDesc().TSFlags; + + return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask) + == OperandNum; +} + +// Returns Operand Index for the constant extended instruction. +unsigned short HexagonInstrInfo::getCExtOpNum(const MachineInstr *MI) const { + const uint64_t F = MI->getDesc().TSFlags; + return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask); +} + +// Returns the min value that doesn't need to be extended. +int HexagonInstrInfo::getMinValue(const MachineInstr *MI) const { + const uint64_t F = MI->getDesc().TSFlags; + unsigned isSigned = (F >> HexagonII::ExtentSignedPos) + & HexagonII::ExtentSignedMask; + unsigned bits = (F >> HexagonII::ExtentBitsPos) + & HexagonII::ExtentBitsMask; + + if (isSigned) // if value is signed + return -1 << (bits - 1); + else + return 0; +} + +// Returns the max value that doesn't need to be extended. +int HexagonInstrInfo::getMaxValue(const MachineInstr *MI) const { + const uint64_t F = MI->getDesc().TSFlags; + unsigned isSigned = (F >> HexagonII::ExtentSignedPos) + & HexagonII::ExtentSignedMask; + unsigned bits = (F >> HexagonII::ExtentBitsPos) + & HexagonII::ExtentBitsMask; + + if (isSigned) // if value is signed + return ~(-1 << (bits - 1)); + else + return ~(-1 << bits); +} + +// Returns true if an instruction can be converted into a non-extended +// equivalent instruction. +bool HexagonInstrInfo::NonExtEquivalentExists (const MachineInstr *MI) const { + + short NonExtOpcode; + // Check if the instruction has a register form that uses register in place + // of the extended operand, if so return that as the non-extended form. + if (Hexagon::getRegForm(MI->getOpcode()) >= 0) + return true; + + if (MI->getDesc().mayLoad() || MI->getDesc().mayStore()) { + // Check addressing mode and retreive non-ext equivalent instruction. + + switch (getAddrMode(MI)) { + case HexagonII::Absolute : + // Load/store with absolute addressing mode can be converted into + // base+offset mode. + NonExtOpcode = Hexagon::getBasedWithImmOffset(MI->getOpcode()); + break; + case HexagonII::BaseImmOffset : + // Load/store with base+offset addressing mode can be converted into + // base+register offset addressing mode. However left shift operand should + // be set to 0. + NonExtOpcode = Hexagon::getBaseWithRegOffset(MI->getOpcode()); + break; + default: + return false; + } + if (NonExtOpcode < 0) + return false; + return true; + } + return false; +} + +// Returns opcode of the non-extended equivalent instruction. +short HexagonInstrInfo::getNonExtOpcode (const MachineInstr *MI) const { + + // Check if the instruction has a register form that uses register in place + // of the extended operand, if so return that as the non-extended form. + short NonExtOpcode = Hexagon::getRegForm(MI->getOpcode()); + if (NonExtOpcode >= 0) + return NonExtOpcode; + + if (MI->getDesc().mayLoad() || MI->getDesc().mayStore()) { + // Check addressing mode and retreive non-ext equivalent instruction. + switch (getAddrMode(MI)) { + case HexagonII::Absolute : + return Hexagon::getBasedWithImmOffset(MI->getOpcode()); + case HexagonII::BaseImmOffset : + return Hexagon::getBaseWithRegOffset(MI->getOpcode()); + default: + return -1; + } + } + return -1; +} + +bool HexagonInstrInfo::PredOpcodeHasJMP_c(Opcode_t Opcode) const { + return (Opcode == Hexagon::JMP_t) || + (Opcode == Hexagon::JMP_f) || + (Opcode == Hexagon::JMP_tnew_t) || + (Opcode == Hexagon::JMP_fnew_t) || + (Opcode == Hexagon::JMP_tnew_nt) || + (Opcode == Hexagon::JMP_fnew_nt); +} + +bool HexagonInstrInfo::PredOpcodeHasNot(Opcode_t Opcode) const { + return (Opcode == Hexagon::JMP_f) || + (Opcode == Hexagon::JMP_fnew_t) || + (Opcode == Hexagon::JMP_fnew_nt); +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h new file mode 100644 index 000000000000..3f45b8b2986e --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h @@ -0,0 +1,223 @@ +//===- HexagonInstrInfo.h - Hexagon Instruction Information -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Hexagon implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef HexagonINSTRUCTIONINFO_H +#define HexagonINSTRUCTIONINFO_H + +#include "HexagonRegisterInfo.h" +#include "MCTargetDesc/HexagonBaseInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" + +#define GET_INSTRINFO_HEADER +#include "HexagonGenInstrInfo.inc" + +namespace llvm { + +class HexagonInstrInfo : public HexagonGenInstrInfo { + virtual void anchor(); + const HexagonRegisterInfo RI; + const HexagonSubtarget &Subtarget; + typedef unsigned Opcode_t; + +public: + explicit HexagonInstrInfo(HexagonSubtarget &ST); + + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As + /// such, whenever a client has an instance of instruction info, it should + /// always be able to get register info as well (through this method). + /// + virtual const HexagonRegisterInfo &getRegisterInfo() const { return RI; } + + /// isLoadFromStackSlot - If the specified machine instruction is a direct + /// load from a stack slot, return the virtual or physical register number of + /// the destination along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than loading from the stack slot. + virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + /// isStoreToStackSlot - If the specified machine instruction is a direct + /// store to a stack slot, return the virtual or physical register number of + /// the source reg along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than storing to the stack slot. + virtual unsigned isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + + virtual bool AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const; + + virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; + + virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const; + + virtual bool analyzeCompare(const MachineInstr *MI, + unsigned &SrcReg, unsigned &SrcReg2, + int &Mask, int &Value) const; + + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; + + virtual void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + + virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, + SmallVectorImpl<MachineOperand> &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl<MachineInstr*> &NewMIs) const; + + virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + + virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, + SmallVectorImpl<MachineOperand> &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl<MachineInstr*> &NewMIs) const; + + virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl<unsigned> &Ops, + int FrameIndex) const; + + virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl<unsigned> &Ops, + MachineInstr* LoadMI) const { + return 0; + } + + unsigned createVR(MachineFunction* MF, MVT VT) const; + + virtual bool isBranch(const MachineInstr *MI) const; + virtual bool isPredicable(MachineInstr *MI) const; + virtual bool + PredicateInstruction(MachineInstr *MI, + const SmallVectorImpl<MachineOperand> &Cond) const; + + virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, + unsigned ExtraPredCycles, + const BranchProbability &Probability) const; + + virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB, + unsigned NumTCycles, unsigned ExtraTCycles, + MachineBasicBlock &FMBB, + unsigned NumFCycles, unsigned ExtraFCycles, + const BranchProbability &Probability) const; + + virtual bool isPredicated(const MachineInstr *MI) const; + virtual bool isPredicated(unsigned Opcode) const; + virtual bool isPredicatedTrue(const MachineInstr *MI) const; + virtual bool isPredicatedTrue(unsigned Opcode) const; + virtual bool isPredicatedNew(const MachineInstr *MI) const; + virtual bool isPredicatedNew(unsigned Opcode) const; + virtual bool DefinesPredicate(MachineInstr *MI, + std::vector<MachineOperand> &Pred) const; + virtual bool + SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, + const SmallVectorImpl<MachineOperand> &Pred2) const; + + virtual bool + ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; + + virtual bool + isProfitableToDupForIfCvt(MachineBasicBlock &MBB,unsigned NumCycles, + const BranchProbability &Probability) const; + + virtual DFAPacketizer* + CreateTargetScheduleState(const TargetMachine *TM, + const ScheduleDAG *DAG) const; + + virtual bool isSchedulingBoundary(const MachineInstr *MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const; + bool isValidOffset(const int Opcode, const int Offset) const; + bool isValidAutoIncImm(const EVT VT, const int Offset) const; + bool isMemOp(const MachineInstr *MI) const; + bool isSpillPredRegOp(const MachineInstr *MI) const; + bool isU6_3Immediate(const int value) const; + bool isU6_2Immediate(const int value) const; + bool isU6_1Immediate(const int value) const; + bool isU6_0Immediate(const int value) const; + bool isS4_3Immediate(const int value) const; + bool isS4_2Immediate(const int value) const; + bool isS4_1Immediate(const int value) const; + bool isS4_0Immediate(const int value) const; + bool isS12_Immediate(const int value) const; + bool isU6_Immediate(const int value) const; + bool isS8_Immediate(const int value) const; + bool isS6_Immediate(const int value) const; + + bool isSaveCalleeSavedRegsCall(const MachineInstr* MI) const; + bool isConditionalTransfer(const MachineInstr* MI) const; + bool isConditionalALU32 (const MachineInstr* MI) const; + bool isConditionalLoad (const MachineInstr* MI) const; + bool isConditionalStore(const MachineInstr* MI) const; + bool isNewValueInst(const MachineInstr* MI) const; + bool isNewValue(const MachineInstr* MI) const; + bool isDotNewInst(const MachineInstr* MI) const; + int GetDotOldOp(const int opc) const; + int GetDotNewOp(const MachineInstr* MI) const; + int GetDotNewPredOp(MachineInstr *MI, + const MachineBranchProbabilityInfo + *MBPI) const; + bool mayBeNewStore(const MachineInstr* MI) const; + bool isDeallocRet(const MachineInstr *MI) const; + unsigned getInvertedPredicatedOpcode(const int Opc) const; + bool isExtendable(const MachineInstr* MI) const; + bool isExtended(const MachineInstr* MI) const; + bool isPostIncrement(const MachineInstr* MI) const; + bool isNewValueStore(const MachineInstr* MI) const; + bool isNewValueStore(unsigned Opcode) const; + bool isNewValueJump(const MachineInstr* MI) const; + bool isNewValueJumpCandidate(const MachineInstr *MI) const; + + + void immediateExtend(MachineInstr *MI) const; + bool isConstExtended(MachineInstr *MI) const; + int getDotNewPredJumpOp(MachineInstr *MI, + const MachineBranchProbabilityInfo *MBPI) const; + unsigned getAddrMode(const MachineInstr* MI) const; + bool isOperandExtended(const MachineInstr *MI, + unsigned short OperandNum) const; + unsigned short getCExtOpNum(const MachineInstr *MI) const; + int getMinValue(const MachineInstr *MI) const; + int getMaxValue(const MachineInstr *MI) const; + bool NonExtEquivalentExists (const MachineInstr *MI) const; + short getNonExtOpcode(const MachineInstr *MI) const; + bool PredOpcodeHasJMP_c(Opcode_t Opcode) const; + bool PredOpcodeHasNot(Opcode_t Opcode) const; + +private: + int getMatchingCondBranchOpcode(int Opc, bool sense) const; + +}; + +} + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td new file mode 100644 index 000000000000..c96aaca8f8d2 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td @@ -0,0 +1,2851 @@ +//==- HexagonInstrInfo.td - Target Description for Hexagon -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hexagon instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + +include "HexagonInstrFormats.td" +include "HexagonOperands.td" + +//===----------------------------------------------------------------------===// + +// Multi-class for logical operators. +multiclass ALU32_rr_ri<string OpcStr, SDNode OpNode> { + def rr : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), + !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")), + [(set (i32 IntRegs:$dst), (OpNode (i32 IntRegs:$b), + (i32 IntRegs:$c)))]>; + def ri : ALU32_ri<(outs IntRegs:$dst), (ins s10Imm:$b, IntRegs:$c), + !strconcat("$dst = ", !strconcat(OpcStr, "(#$b, $c)")), + [(set (i32 IntRegs:$dst), (OpNode s10Imm:$b, + (i32 IntRegs:$c)))]>; +} + +// Multi-class for compare ops. +let isCompare = 1 in { +multiclass CMP64_rr<string OpcStr, PatFrag OpNode> { + def rr : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$b, DoubleRegs:$c), + !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")), + [(set (i1 PredRegs:$dst), + (OpNode (i64 DoubleRegs:$b), (i64 DoubleRegs:$c)))]>; +} + +multiclass CMP32_rr_ri_s10<string OpcStr, string CextOp, PatFrag OpNode> { + let CextOpcode = CextOp in { + let InputType = "reg" in + def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c), + !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")), + [(set (i1 PredRegs:$dst), + (OpNode (i32 IntRegs:$b), (i32 IntRegs:$c)))]>; + + let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, + opExtentBits = 10, InputType = "imm" in + def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s10Ext:$c), + !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")), + [(set (i1 PredRegs:$dst), + (OpNode (i32 IntRegs:$b), s10ExtPred:$c))]>; + } +} + +multiclass CMP32_rr_ri_u9<string OpcStr, string CextOp, PatFrag OpNode> { + let CextOpcode = CextOp in { + let InputType = "reg" in + def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c), + !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")), + [(set (i1 PredRegs:$dst), + (OpNode (i32 IntRegs:$b), (i32 IntRegs:$c)))]>; + + let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, + opExtentBits = 9, InputType = "imm" in + def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u9Ext:$c), + !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")), + [(set (i1 PredRegs:$dst), + (OpNode (i32 IntRegs:$b), u9ExtPred:$c))]>; + } +} + +multiclass CMP32_ri_s8<string OpcStr, PatFrag OpNode> { +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8 in + def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s8Ext:$c), + !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")), + [(set (i1 PredRegs:$dst), (OpNode (i32 IntRegs:$b), + s8ExtPred:$c))]>; +} +} + +//===----------------------------------------------------------------------===// +// ALU32/ALU (Instructions with register-register form) +//===----------------------------------------------------------------------===// +def SDTHexagonI64I32I32 : SDTypeProfile<1, 2, + [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; + +def HexagonWrapperCombineII : + SDNode<"HexagonISD::WrapperCombineII", SDTHexagonI64I32I32>; + +def HexagonWrapperCombineRR : + SDNode<"HexagonISD::WrapperCombineRR", SDTHexagonI64I32I32>; + +multiclass ALU32_Pbase<string mnemonic, RegisterClass RC, bit isNot, + bit isPredNew> { + let isPredicatedNew = isPredNew in + def NAME : ALU32_rr<(outs RC:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs: $src3), + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ", + ") $dst = ")#mnemonic#"($src2, $src3)", + []>; +} + +multiclass ALU32_Pred<string mnemonic, RegisterClass RC, bit PredNot> { + let isPredicatedFalse = PredNot in { + defm _c#NAME : ALU32_Pbase<mnemonic, RC, PredNot, 0>; + // Predicate new + defm _cdn#NAME : ALU32_Pbase<mnemonic, RC, PredNot, 1>; + } +} + +let InputType = "reg" in +multiclass ALU32_base<string mnemonic, string CextOp, SDNode OpNode> { + let CextOpcode = CextOp, BaseOpcode = CextOp#_rr in { + let isPredicable = 1 in + def NAME : ALU32_rr<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = "#mnemonic#"($src1, $src2)", + [(set (i32 IntRegs:$dst), (OpNode (i32 IntRegs:$src1), + (i32 IntRegs:$src2)))]>; + + let neverHasSideEffects = 1, isPredicated = 1 in { + defm Pt : ALU32_Pred<mnemonic, IntRegs, 0>; + defm NotPt : ALU32_Pred<mnemonic, IntRegs, 1>; + } + } +} + +let isCommutable = 1 in { + defm ADD_rr : ALU32_base<"add", "ADD", add>, ImmRegRel, PredNewRel; + defm AND_rr : ALU32_base<"and", "AND", and>, ImmRegRel, PredNewRel; + defm XOR_rr : ALU32_base<"xor", "XOR", xor>, ImmRegRel, PredNewRel; + defm OR_rr : ALU32_base<"or", "OR", or>, ImmRegRel, PredNewRel; +} + +defm SUB_rr : ALU32_base<"sub", "SUB", sub>, ImmRegRel, PredNewRel; + +// Combines the two integer registers SRC1 and SRC2 into a double register. +let isPredicable = 1 in +class T_Combine : ALU32_rr<(outs DoubleRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = combine($src1, $src2)", + [(set (i64 DoubleRegs:$dst), + (i64 (HexagonWrapperCombineRR (i32 IntRegs:$src1), + (i32 IntRegs:$src2))))]>; + +multiclass Combine_base { + let BaseOpcode = "combine" in { + def NAME : T_Combine; + let neverHasSideEffects = 1, isPredicated = 1 in { + defm Pt : ALU32_Pred<"combine", DoubleRegs, 0>; + defm NotPt : ALU32_Pred<"combine", DoubleRegs, 1>; + } + } +} + +defm COMBINE_rr : Combine_base, PredNewRel; + +// Combines the two immediates SRC1 and SRC2 into a double register. +class COMBINE_imm<Operand imm1, Operand imm2, PatLeaf pat1, PatLeaf pat2> : + ALU32_ii<(outs DoubleRegs:$dst), (ins imm1:$src1, imm2:$src2), + "$dst = combine(#$src1, #$src2)", + [(set (i64 DoubleRegs:$dst), + (i64 (HexagonWrapperCombineII (i32 pat1:$src1), (i32 pat2:$src2))))]>; + +let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 8 in +def COMBINE_Ii : COMBINE_imm<s8Ext, s8Imm, s8ExtPred, s8ImmPred>; + +//===----------------------------------------------------------------------===// +// ALU32/ALU (ADD with register-immediate form) +//===----------------------------------------------------------------------===// +multiclass ALU32ri_Pbase<string mnemonic, bit isNot, bit isPredNew> { + let isPredicatedNew = isPredNew in + def NAME : ALU32_ri<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, s8Ext: $src3), + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ", + ") $dst = ")#mnemonic#"($src2, #$src3)", + []>; +} + +multiclass ALU32ri_Pred<string mnemonic, bit PredNot> { + let isPredicatedFalse = PredNot in { + defm _c#NAME : ALU32ri_Pbase<mnemonic, PredNot, 0>; + // Predicate new + defm _cdn#NAME : ALU32ri_Pbase<mnemonic, PredNot, 1>; + } +} + +let isExtendable = 1, InputType = "imm" in +multiclass ALU32ri_base<string mnemonic, string CextOp, SDNode OpNode> { + let CextOpcode = CextOp, BaseOpcode = CextOp#_ri in { + let opExtendable = 2, isExtentSigned = 1, opExtentBits = 16, + isPredicable = 1 in + def NAME : ALU32_ri<(outs IntRegs:$dst), + (ins IntRegs:$src1, s16Ext:$src2), + "$dst = "#mnemonic#"($src1, #$src2)", + [(set (i32 IntRegs:$dst), (OpNode (i32 IntRegs:$src1), + (s16ExtPred:$src2)))]>; + + let opExtendable = 3, isExtentSigned = 1, opExtentBits = 8, + neverHasSideEffects = 1, isPredicated = 1 in { + defm Pt : ALU32ri_Pred<mnemonic, 0>; + defm NotPt : ALU32ri_Pred<mnemonic, 1>; + } + } +} + +defm ADD_ri : ALU32ri_base<"add", "ADD", add>, ImmRegRel, PredNewRel; + +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 10, +CextOpcode = "OR", InputType = "imm" in +def OR_ri : ALU32_ri<(outs IntRegs:$dst), + (ins IntRegs:$src1, s10Ext:$src2), + "$dst = or($src1, #$src2)", + [(set (i32 IntRegs:$dst), (or (i32 IntRegs:$src1), + s10ExtPred:$src2))]>, ImmRegRel; + +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 10, +InputType = "imm", CextOpcode = "AND" in +def AND_ri : ALU32_ri<(outs IntRegs:$dst), + (ins IntRegs:$src1, s10Ext:$src2), + "$dst = and($src1, #$src2)", + [(set (i32 IntRegs:$dst), (and (i32 IntRegs:$src1), + s10ExtPred:$src2))]>, ImmRegRel; + +// Nop. +let neverHasSideEffects = 1 in +def NOP : ALU32_rr<(outs), (ins), + "nop", + []>; + +// Rd32=sub(#s10,Rs32) +let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 10, +CextOpcode = "SUB", InputType = "imm" in +def SUB_ri : ALU32_ri<(outs IntRegs:$dst), + (ins s10Ext:$src1, IntRegs:$src2), + "$dst = sub(#$src1, $src2)", + [(set IntRegs:$dst, (sub s10ExtPred:$src1, IntRegs:$src2))]>, + ImmRegRel; + +// Rd = not(Rs) gets mapped to Rd=sub(#-1, Rs). +def : Pat<(not (i32 IntRegs:$src1)), + (SUB_ri -1, (i32 IntRegs:$src1))>; + +// Rd = neg(Rs) gets mapped to Rd=sub(#0, Rs). +// Pattern definition for 'neg' was not necessary. + +multiclass TFR_Pred<bit PredNot> { + let isPredicatedFalse = PredNot in { + def _c#NAME : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + !if(PredNot, "if (!$src1", "if ($src1")#") $dst = $src2", + []>; + // Predicate new + let isPredicatedNew = 1 in + def _cdn#NAME : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + !if(PredNot, "if (!$src1", "if ($src1")#".new) $dst = $src2", + []>; + } +} + +let InputType = "reg", neverHasSideEffects = 1 in +multiclass TFR_base<string CextOp> { + let CextOpcode = CextOp, BaseOpcode = CextOp in { + let isPredicable = 1 in + def NAME : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), + "$dst = $src1", + []>; + + let isPredicated = 1 in { + defm Pt : TFR_Pred<0>; + defm NotPt : TFR_Pred<1>; + } + } +} + +class T_TFR64_Pred<bit PredNot, bit isPredNew> + : ALU32_rr<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, DoubleRegs:$src2), + !if(PredNot, "if (!$src1", "if ($src1")# + !if(isPredNew, ".new) ", ") ")#"$dst = $src2", []> +{ + bits<5> dst; + bits<2> src1; + bits<5> src2; + + let IClass = 0b1111; + let Inst{27-24} = 0b1101; + let Inst{13} = isPredNew; + let Inst{7} = PredNot; + let Inst{4-0} = dst; + let Inst{6-5} = src1; + let Inst{20-17} = src2{4-1}; + let Inst{16} = 0b1; + let Inst{12-9} = src2{4-1}; + let Inst{8} = 0b0; +} + +multiclass TFR64_Pred<bit PredNot> { + let isPredicatedFalse = PredNot in { + def _c#NAME : T_TFR64_Pred<PredNot, 0>; + + let isPredicatedNew = 1 in + def _cdn#NAME : T_TFR64_Pred<PredNot, 1>; // Predicate new + } +} + +let neverHasSideEffects = 1 in +multiclass TFR64_base<string BaseName> { + let BaseOpcode = BaseName in { + let isPredicable = 1 in + def NAME : ALU32Inst <(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1), + "$dst = $src1" > { + bits<5> dst; + bits<5> src1; + + let IClass = 0b1111; + let Inst{27-23} = 0b01010; + let Inst{4-0} = dst; + let Inst{20-17} = src1{4-1}; + let Inst{16} = 0b1; + let Inst{12-9} = src1{4-1}; + let Inst{8} = 0b0; + } + + let isPredicated = 1 in { + defm Pt : TFR64_Pred<0>; + defm NotPt : TFR64_Pred<1>; + } + } +} + +multiclass TFRI_Pred<bit PredNot> { + let isMoveImm = 1, isPredicatedFalse = PredNot in { + def _c#NAME : ALU32_ri<(outs IntRegs:$dst), + (ins PredRegs:$src1, s12Ext:$src2), + !if(PredNot, "if (!$src1", "if ($src1")#") $dst = #$src2", + []>; + + // Predicate new + let isPredicatedNew = 1 in + def _cdn#NAME : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, s12Ext:$src2), + !if(PredNot, "if (!$src1", "if ($src1")#".new) $dst = #$src2", + []>; + } +} + +let InputType = "imm", isExtendable = 1, isExtentSigned = 1 in +multiclass TFRI_base<string CextOp> { + let CextOpcode = CextOp, BaseOpcode = CextOp#I in { + let isAsCheapAsAMove = 1 , opExtendable = 1, opExtentBits = 16, + isMoveImm = 1, isPredicable = 1, isReMaterializable = 1 in + def NAME : ALU32_ri<(outs IntRegs:$dst), (ins s16Ext:$src1), + "$dst = #$src1", + [(set (i32 IntRegs:$dst), s16ExtPred:$src1)]>; + + let opExtendable = 2, opExtentBits = 12, neverHasSideEffects = 1, + isPredicated = 1 in { + defm Pt : TFRI_Pred<0>; + defm NotPt : TFRI_Pred<1>; + } + } +} + +defm TFRI : TFRI_base<"TFR">, ImmRegRel, PredNewRel; +defm TFR : TFR_base<"TFR">, ImmRegRel, PredNewRel; +defm TFR64 : TFR64_base<"TFR64">, PredNewRel; + +// Transfer control register. +let neverHasSideEffects = 1 in +def TFCR : CRInst<(outs CRRegs:$dst), (ins IntRegs:$src1), + "$dst = $src1", + []>; +//===----------------------------------------------------------------------===// +// ALU32/ALU - +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// ALU32/PERM + +//===----------------------------------------------------------------------===// + +let neverHasSideEffects = 1 in +def COMBINE_ii : ALU32_ii<(outs DoubleRegs:$dst), + (ins s8Imm:$src1, s8Imm:$src2), + "$dst = combine(#$src1, #$src2)", + []>; + +// Mux. +def VMUX_prr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1, + DoubleRegs:$src2, + DoubleRegs:$src3), + "$dst = vmux($src1, $src2, $src3)", + []>; + +let CextOpcode = "MUX", InputType = "reg" in +def MUX_rr : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, + IntRegs:$src2, IntRegs:$src3), + "$dst = mux($src1, $src2, $src3)", + [(set (i32 IntRegs:$dst), + (i32 (select (i1 PredRegs:$src1), (i32 IntRegs:$src2), + (i32 IntRegs:$src3))))]>, ImmRegRel; + +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8, +CextOpcode = "MUX", InputType = "imm" in +def MUX_ir : ALU32_ir<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Ext:$src2, + IntRegs:$src3), + "$dst = mux($src1, #$src2, $src3)", + [(set (i32 IntRegs:$dst), + (i32 (select (i1 PredRegs:$src1), s8ExtPred:$src2, + (i32 IntRegs:$src3))))]>, ImmRegRel; + +let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 8, +CextOpcode = "MUX", InputType = "imm" in +def MUX_ri : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, + s8Ext:$src3), + "$dst = mux($src1, $src2, #$src3)", + [(set (i32 IntRegs:$dst), + (i32 (select (i1 PredRegs:$src1), (i32 IntRegs:$src2), + s8ExtPred:$src3)))]>, ImmRegRel; + +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8 in +def MUX_ii : ALU32_ii<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Ext:$src2, + s8Imm:$src3), + "$dst = mux($src1, #$src2, #$src3)", + [(set (i32 IntRegs:$dst), (i32 (select (i1 PredRegs:$src1), + s8ExtPred:$src2, + s8ImmPred:$src3)))]>; + +// ALU32 - aslh, asrh, sxtb, sxth, zxtb, zxth +multiclass ALU32_2op_Pbase<string mnemonic, bit isNot, bit isPredNew> { + let isPredicatedNew = isPredNew in + def NAME : ALU32Inst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ", + ") $dst = ")#mnemonic#"($src2)">, + Requires<[HasV4T]>; +} + +multiclass ALU32_2op_Pred<string mnemonic, bit PredNot> { + let isPredicatedFalse = PredNot in { + defm _c#NAME : ALU32_2op_Pbase<mnemonic, PredNot, 0>; + // Predicate new + defm _cdn#NAME : ALU32_2op_Pbase<mnemonic, PredNot, 1>; + } +} + +multiclass ALU32_2op_base<string mnemonic> { + let BaseOpcode = mnemonic in { + let isPredicable = 1, neverHasSideEffects = 1 in + def NAME : ALU32Inst<(outs IntRegs:$dst), + (ins IntRegs:$src1), + "$dst = "#mnemonic#"($src1)">; + + let Predicates = [HasV4T], validSubTargets = HasV4SubT, isPredicated = 1, + neverHasSideEffects = 1 in { + defm Pt_V4 : ALU32_2op_Pred<mnemonic, 0>; + defm NotPt_V4 : ALU32_2op_Pred<mnemonic, 1>; + } + } +} + +defm ASLH : ALU32_2op_base<"aslh">, PredNewRel; +defm ASRH : ALU32_2op_base<"asrh">, PredNewRel; +defm SXTB : ALU32_2op_base<"sxtb">, PredNewRel; +defm SXTH : ALU32_2op_base<"sxth">, PredNewRel; +defm ZXTB : ALU32_2op_base<"zxtb">, PredNewRel; +defm ZXTH : ALU32_2op_base<"zxth">, PredNewRel; + +def : Pat <(shl (i32 IntRegs:$src1), (i32 16)), + (ASLH IntRegs:$src1)>; + +def : Pat <(sra (i32 IntRegs:$src1), (i32 16)), + (ASRH IntRegs:$src1)>; + +def : Pat <(sext_inreg (i32 IntRegs:$src1), i8), + (SXTB IntRegs:$src1)>; + +def : Pat <(sext_inreg (i32 IntRegs:$src1), i16), + (SXTH IntRegs:$src1)>; + +//===----------------------------------------------------------------------===// +// ALU32/PERM - +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// ALU32/PRED + +//===----------------------------------------------------------------------===// + +// Compare. +defm CMPGTU : CMP32_rr_ri_u9<"cmp.gtu", "CMPGTU", setugt>, ImmRegRel; +defm CMPGT : CMP32_rr_ri_s10<"cmp.gt", "CMPGT", setgt>, ImmRegRel; +defm CMPEQ : CMP32_rr_ri_s10<"cmp.eq", "CMPEQ", seteq>, ImmRegRel; + +// SDNode for converting immediate C to C-1. +def DEC_CONST_SIGNED : SDNodeXForm<imm, [{ + // Return the byte immediate const-1 as an SDNode. + int32_t imm = N->getSExtValue(); + return XformSToSM1Imm(imm); +}]>; + +// SDNode for converting immediate C to C-1. +def DEC_CONST_UNSIGNED : SDNodeXForm<imm, [{ + // Return the byte immediate const-1 as an SDNode. + uint32_t imm = N->getZExtValue(); + return XformUToUM1Imm(imm); +}]>; + +def CTLZ_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1), + "$dst = cl0($src1)", + [(set (i32 IntRegs:$dst), (ctlz (i32 IntRegs:$src1)))]>; + +def CTTZ_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1), + "$dst = ct0($src1)", + [(set (i32 IntRegs:$dst), (cttz (i32 IntRegs:$src1)))]>; + +def CTLZ64_rr : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1), + "$dst = cl0($src1)", + [(set (i32 IntRegs:$dst), (i32 (trunc (ctlz (i64 DoubleRegs:$src1)))))]>; + +def CTTZ64_rr : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1), + "$dst = ct0($src1)", + [(set (i32 IntRegs:$dst), (i32 (trunc (cttz (i64 DoubleRegs:$src1)))))]>; + +def TSTBIT_rr : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = tstbit($src1, $src2)", + [(set (i1 PredRegs:$dst), + (setne (and (shl 1, (i32 IntRegs:$src2)), (i32 IntRegs:$src1)), 0))]>; + +def TSTBIT_ri : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), + "$dst = tstbit($src1, $src2)", + [(set (i1 PredRegs:$dst), + (setne (and (shl 1, (u5ImmPred:$src2)), (i32 IntRegs:$src1)), 0))]>; + +//===----------------------------------------------------------------------===// +// ALU32/PRED - +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// ALU64/ALU + +//===----------------------------------------------------------------------===// +// Add. +def ADD64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2), + "$dst = add($src1, $src2)", + [(set (i64 DoubleRegs:$dst), (add (i64 DoubleRegs:$src1), + (i64 DoubleRegs:$src2)))]>; + +// Add halfword. + +// Compare. +defm CMPEHexagon4 : CMP64_rr<"cmp.eq", seteq>; +defm CMPGT64 : CMP64_rr<"cmp.gt", setgt>; +defm CMPGTU64 : CMP64_rr<"cmp.gtu", setugt>; + +// Logical operations. +def AND_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2), + "$dst = and($src1, $src2)", + [(set (i64 DoubleRegs:$dst), (and (i64 DoubleRegs:$src1), + (i64 DoubleRegs:$src2)))]>; + +def OR_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2), + "$dst = or($src1, $src2)", + [(set (i64 DoubleRegs:$dst), (or (i64 DoubleRegs:$src1), + (i64 DoubleRegs:$src2)))]>; + +def XOR_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2), + "$dst = xor($src1, $src2)", + [(set (i64 DoubleRegs:$dst), (xor (i64 DoubleRegs:$src1), + (i64 DoubleRegs:$src2)))]>; + +// Maximum. +def MAXw_rr : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = max($src2, $src1)", + [(set (i32 IntRegs:$dst), + (i32 (select (i1 (setlt (i32 IntRegs:$src2), + (i32 IntRegs:$src1))), + (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>; + +def MAXUw_rr : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = maxu($src2, $src1)", + [(set (i32 IntRegs:$dst), + (i32 (select (i1 (setult (i32 IntRegs:$src2), + (i32 IntRegs:$src1))), + (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>; + +def MAXd_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2), + "$dst = max($src2, $src1)", + [(set (i64 DoubleRegs:$dst), + (i64 (select (i1 (setlt (i64 DoubleRegs:$src2), + (i64 DoubleRegs:$src1))), + (i64 DoubleRegs:$src1), + (i64 DoubleRegs:$src2))))]>; + +def MAXUd_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2), + "$dst = maxu($src2, $src1)", + [(set (i64 DoubleRegs:$dst), + (i64 (select (i1 (setult (i64 DoubleRegs:$src2), + (i64 DoubleRegs:$src1))), + (i64 DoubleRegs:$src1), + (i64 DoubleRegs:$src2))))]>; + +// Minimum. +def MINw_rr : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = min($src2, $src1)", + [(set (i32 IntRegs:$dst), + (i32 (select (i1 (setgt (i32 IntRegs:$src2), + (i32 IntRegs:$src1))), + (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>; + +def MINUw_rr : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = minu($src2, $src1)", + [(set (i32 IntRegs:$dst), + (i32 (select (i1 (setugt (i32 IntRegs:$src2), + (i32 IntRegs:$src1))), + (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>; + +def MINd_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2), + "$dst = min($src2, $src1)", + [(set (i64 DoubleRegs:$dst), + (i64 (select (i1 (setgt (i64 DoubleRegs:$src2), + (i64 DoubleRegs:$src1))), + (i64 DoubleRegs:$src1), + (i64 DoubleRegs:$src2))))]>; + +def MINUd_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2), + "$dst = minu($src2, $src1)", + [(set (i64 DoubleRegs:$dst), + (i64 (select (i1 (setugt (i64 DoubleRegs:$src2), + (i64 DoubleRegs:$src1))), + (i64 DoubleRegs:$src1), + (i64 DoubleRegs:$src2))))]>; + +// Subtract. +def SUB64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2), + "$dst = sub($src1, $src2)", + [(set (i64 DoubleRegs:$dst), (sub (i64 DoubleRegs:$src1), + (i64 DoubleRegs:$src2)))]>; + +// Subtract halfword. + +//===----------------------------------------------------------------------===// +// ALU64/ALU - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ALU64/BIT + +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// +// ALU64/BIT - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ALU64/PERM + +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// +// ALU64/PERM - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// CR + +//===----------------------------------------------------------------------===// +// Logical reductions on predicates. + +// Looping instructions. + +// Pipelined looping instructions. + +// Logical operations on predicates. +def AND_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2), + "$dst = and($src1, $src2)", + [(set (i1 PredRegs:$dst), (and (i1 PredRegs:$src1), + (i1 PredRegs:$src2)))]>; + +let neverHasSideEffects = 1 in +def AND_pnotp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, + PredRegs:$src2), + "$dst = and($src1, !$src2)", + []>; + +def ANY_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1), + "$dst = any8($src1)", + []>; + +def ALL_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1), + "$dst = all8($src1)", + []>; + +def VITPACK_pp : SInst<(outs IntRegs:$dst), (ins PredRegs:$src1, + PredRegs:$src2), + "$dst = vitpack($src1, $src2)", + []>; + +def VALIGN_rrp : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2, + PredRegs:$src3), + "$dst = valignb($src1, $src2, $src3)", + []>; + +def VSPLICE_rrp : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2, + PredRegs:$src3), + "$dst = vspliceb($src1, $src2, $src3)", + []>; + +def MASK_p : SInst<(outs DoubleRegs:$dst), (ins PredRegs:$src1), + "$dst = mask($src1)", + []>; + +def NOT_p : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1), + "$dst = not($src1)", + [(set (i1 PredRegs:$dst), (not (i1 PredRegs:$src1)))]>; + +def OR_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2), + "$dst = or($src1, $src2)", + [(set (i1 PredRegs:$dst), (or (i1 PredRegs:$src1), + (i1 PredRegs:$src2)))]>; + +def XOR_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2), + "$dst = xor($src1, $src2)", + [(set (i1 PredRegs:$dst), (xor (i1 PredRegs:$src1), + (i1 PredRegs:$src2)))]>; + + +// User control register transfer. +//===----------------------------------------------------------------------===// +// CR - +//===----------------------------------------------------------------------===// + +def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone, + [SDNPHasChain]>; + +def SDHexagonBR_JT: SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; +def HexagonBR_JT: SDNode<"HexagonISD::BR_JT", SDHexagonBR_JT, [SDNPHasChain]>; + +let InputType = "imm", isBarrier = 1, isPredicable = 1, +Defs = [PC], isExtendable = 1, opExtendable = 0, isExtentSigned = 1, +opExtentBits = 24 in +class T_JMP <dag InsDag, list<dag> JumpList = []> + : JInst<(outs), InsDag, + "jump $dst" , JumpList> { + bits<24> dst; + + let IClass = 0b0101; + + let Inst{27-25} = 0b100; + let Inst{24-16} = dst{23-15}; + let Inst{13-1} = dst{14-2}; +} + +let InputType = "imm", isExtendable = 1, opExtendable = 1, isExtentSigned = 1, +Defs = [PC], isPredicated = 1, opExtentBits = 17 in +class T_JMP_c <bit PredNot, bit isPredNew, bit isTaken>: + JInst<(outs ), (ins PredRegs:$src, brtarget:$dst), + !if(PredNot, "if (!$src", "if ($src")# + !if(isPredNew, ".new) ", ") ")#"jump"# + !if(isPredNew, !if(isTaken, ":t ", ":nt "), " ")#"$dst"> { + + let isBrTaken = !if(isPredNew, !if(isTaken, "true", "false"), ""); + let isPredicatedFalse = PredNot; + let isPredicatedNew = isPredNew; + bits<2> src; + bits<17> dst; + + let IClass = 0b0101; + + let Inst{27-24} = 0b1100; + let Inst{21} = PredNot; + let Inst{12} = !if(isPredNew, isTaken, zero); + let Inst{11} = isPredNew; + let Inst{9-8} = src; + let Inst{23-22} = dst{16-15}; + let Inst{20-16} = dst{14-10}; + let Inst{13} = dst{9}; + let Inst{7-1} = dst{8-2}; + } + +let isBarrier = 1, Defs = [PC], isPredicable = 1, InputType = "reg" in +class T_JMPr<dag InsDag = (ins IntRegs:$dst)> + : JRInst<(outs ), InsDag, + "jumpr $dst" , + []> { + bits<5> dst; + + let IClass = 0b0101; + let Inst{27-21} = 0b0010100; + let Inst{20-16} = dst; +} + +let Defs = [PC], isPredicated = 1, InputType = "reg" in +class T_JMPr_c <bit PredNot, bit isPredNew, bit isTaken>: + JRInst <(outs ), (ins PredRegs:$src, IntRegs:$dst), + !if(PredNot, "if (!$src", "if ($src")# + !if(isPredNew, ".new) ", ") ")#"jumpr"# + !if(isPredNew, !if(isTaken, ":t ", ":nt "), " ")#"$dst"> { + + let isBrTaken = !if(isPredNew, !if(isTaken, "true", "false"), ""); + let isPredicatedFalse = PredNot; + let isPredicatedNew = isPredNew; + bits<2> src; + bits<5> dst; + + let IClass = 0b0101; + + let Inst{27-22} = 0b001101; + let Inst{21} = PredNot; + let Inst{20-16} = dst; + let Inst{12} = !if(isPredNew, isTaken, zero); + let Inst{11} = isPredNew; + let Inst{9-8} = src; + let Predicates = !if(isPredNew, [HasV3T], [HasV2T]); + let validSubTargets = !if(isPredNew, HasV3SubT, HasV2SubT); +} + +multiclass JMP_Pred<bit PredNot> { + def _#NAME : T_JMP_c<PredNot, 0, 0>; + // Predicate new + def _#NAME#new_t : T_JMP_c<PredNot, 1, 1>; // taken + def _#NAME#new_nt : T_JMP_c<PredNot, 1, 0>; // not taken +} + +multiclass JMP_base<string BaseOp> { + let BaseOpcode = BaseOp in { + def NAME : T_JMP<(ins brtarget:$dst), [(br bb:$dst)]>; + defm t : JMP_Pred<0>; + defm f : JMP_Pred<1>; + } +} + +multiclass JMPR_Pred<bit PredNot> { + def NAME: T_JMPr_c<PredNot, 0, 0>; + // Predicate new + def NAME#new_tV3 : T_JMPr_c<PredNot, 1, 1>; // taken + def NAME#new_ntV3 : T_JMPr_c<PredNot, 1, 0>; // not taken +} + +multiclass JMPR_base<string BaseOp> { + let BaseOpcode = BaseOp in { + def NAME : T_JMPr; + defm _t : JMPR_Pred<0>; + defm _f : JMPR_Pred<1>; + } +} + +let isTerminator = 1, neverHasSideEffects = 1 in { +let isBranch = 1 in +defm JMP : JMP_base<"JMP">, PredNewRel; + +let isBranch = 1, isIndirectBranch = 1 in +defm JMPR : JMPR_base<"JMPr">, PredNewRel; + +let isReturn = 1, isCodeGenOnly = 1 in +defm JMPret : JMPR_base<"JMPret">, PredNewRel; +} + +def : Pat<(retflag), + (JMPret (i32 R31))>; + +def : Pat <(brcond (i1 PredRegs:$src1), bb:$offset), + (JMP_t (i1 PredRegs:$src1), bb:$offset)>; + +// A return through builtin_eh_return. +let isReturn = 1, isTerminator = 1, isBarrier = 1, neverHasSideEffects = 1, +isCodeGenOnly = 1, Defs = [PC], Uses = [R28], isPredicable = 0 in +def EH_RETURN_JMPR : T_JMPr; + +def : Pat<(eh_return), + (EH_RETURN_JMPR (i32 R31))>; + +def : Pat<(HexagonBR_JT (i32 IntRegs:$dst)), + (JMPR (i32 IntRegs:$dst))>; + +def : Pat<(brind (i32 IntRegs:$dst)), + (JMPR (i32 IntRegs:$dst))>; + +//===----------------------------------------------------------------------===// +// JR - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// LD + +//===----------------------------------------------------------------------===// +/// +// Load -- MEMri operand +multiclass LD_MEMri_Pbase<string mnemonic, RegisterClass RC, + bit isNot, bit isPredNew> { + let isPredicatedNew = isPredNew in + def NAME : LDInst2<(outs RC:$dst), + (ins PredRegs:$src1, MEMri:$addr), + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#"$dst = "#mnemonic#"($addr)", + []>; +} + +multiclass LD_MEMri_Pred<string mnemonic, RegisterClass RC, bit PredNot> { + let isPredicatedFalse = PredNot in { + defm _c#NAME : LD_MEMri_Pbase<mnemonic, RC, PredNot, 0>; + // Predicate new + defm _cdn#NAME : LD_MEMri_Pbase<mnemonic, RC, PredNot, 1>; + } +} + +let isExtendable = 1, neverHasSideEffects = 1 in +multiclass LD_MEMri<string mnemonic, string CextOp, RegisterClass RC, + bits<5> ImmBits, bits<5> PredImmBits> { + + let CextOpcode = CextOp, BaseOpcode = CextOp in { + let opExtendable = 2, isExtentSigned = 1, opExtentBits = ImmBits, + isPredicable = 1 in + def NAME : LDInst2<(outs RC:$dst), (ins MEMri:$addr), + "$dst = "#mnemonic#"($addr)", + []>; + + let opExtendable = 3, isExtentSigned = 0, opExtentBits = PredImmBits, + isPredicated = 1 in { + defm Pt : LD_MEMri_Pred<mnemonic, RC, 0 >; + defm NotPt : LD_MEMri_Pred<mnemonic, RC, 1 >; + } + } +} + +let addrMode = BaseImmOffset, isMEMri = "true" in { + let accessSize = ByteAccess in { + defm LDrib: LD_MEMri < "memb", "LDrib", IntRegs, 11, 6>, AddrModeRel; + defm LDriub: LD_MEMri < "memub" , "LDriub", IntRegs, 11, 6>, AddrModeRel; + } + + let accessSize = HalfWordAccess in { + defm LDrih: LD_MEMri < "memh", "LDrih", IntRegs, 12, 7>, AddrModeRel; + defm LDriuh: LD_MEMri < "memuh", "LDriuh", IntRegs, 12, 7>, AddrModeRel; + } + + let accessSize = WordAccess in + defm LDriw: LD_MEMri < "memw", "LDriw", IntRegs, 13, 8>, AddrModeRel; + + let accessSize = DoubleWordAccess in + defm LDrid: LD_MEMri < "memd", "LDrid", DoubleRegs, 14, 9>, AddrModeRel; +} + +def : Pat < (i32 (sextloadi8 ADDRriS11_0:$addr)), + (LDrib ADDRriS11_0:$addr) >; + +def : Pat < (i32 (zextloadi8 ADDRriS11_0:$addr)), + (LDriub ADDRriS11_0:$addr) >; + +def : Pat < (i32 (sextloadi16 ADDRriS11_1:$addr)), + (LDrih ADDRriS11_1:$addr) >; + +def : Pat < (i32 (zextloadi16 ADDRriS11_1:$addr)), + (LDriuh ADDRriS11_1:$addr) >; + +def : Pat < (i32 (load ADDRriS11_2:$addr)), + (LDriw ADDRriS11_2:$addr) >; + +def : Pat < (i64 (load ADDRriS11_3:$addr)), + (LDrid ADDRriS11_3:$addr) >; + + +// Load - Base with Immediate offset addressing mode +multiclass LD_Idxd_Pbase<string mnemonic, RegisterClass RC, Operand predImmOp, + bit isNot, bit isPredNew> { + let isPredicatedNew = isPredNew in + def NAME : LDInst2<(outs RC:$dst), + (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3), + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#"$dst = "#mnemonic#"($src2+#$src3)", + []>; +} + +multiclass LD_Idxd_Pred<string mnemonic, RegisterClass RC, Operand predImmOp, + bit PredNot> { + let isPredicatedFalse = PredNot in { + defm _c#NAME : LD_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 0>; + // Predicate new + defm _cdn#NAME : LD_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 1>; + } +} + +let isExtendable = 1, neverHasSideEffects = 1 in +multiclass LD_Idxd<string mnemonic, string CextOp, RegisterClass RC, + Operand ImmOp, Operand predImmOp, bits<5> ImmBits, + bits<5> PredImmBits> { + + let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in { + let opExtendable = 2, isExtentSigned = 1, opExtentBits = ImmBits, + isPredicable = 1, AddedComplexity = 20 in + def NAME : LDInst2<(outs RC:$dst), (ins IntRegs:$src1, ImmOp:$offset), + "$dst = "#mnemonic#"($src1+#$offset)", + []>; + + let opExtendable = 3, isExtentSigned = 0, opExtentBits = PredImmBits, + isPredicated = 1 in { + defm Pt : LD_Idxd_Pred<mnemonic, RC, predImmOp, 0 >; + defm NotPt : LD_Idxd_Pred<mnemonic, RC, predImmOp, 1 >; + } + } +} + +let addrMode = BaseImmOffset in { + let accessSize = ByteAccess in { + defm LDrib_indexed: LD_Idxd <"memb", "LDrib", IntRegs, s11_0Ext, u6_0Ext, + 11, 6>, AddrModeRel; + defm LDriub_indexed: LD_Idxd <"memub" , "LDriub", IntRegs, s11_0Ext, u6_0Ext, + 11, 6>, AddrModeRel; + } + let accessSize = HalfWordAccess in { + defm LDrih_indexed: LD_Idxd <"memh", "LDrih", IntRegs, s11_1Ext, u6_1Ext, + 12, 7>, AddrModeRel; + defm LDriuh_indexed: LD_Idxd <"memuh", "LDriuh", IntRegs, s11_1Ext, u6_1Ext, + 12, 7>, AddrModeRel; + } + let accessSize = WordAccess in + defm LDriw_indexed: LD_Idxd <"memw", "LDriw", IntRegs, s11_2Ext, u6_2Ext, + 13, 8>, AddrModeRel; + + let accessSize = DoubleWordAccess in + defm LDrid_indexed: LD_Idxd <"memd", "LDrid", DoubleRegs, s11_3Ext, u6_3Ext, + 14, 9>, AddrModeRel; +} + +let AddedComplexity = 20 in { +def : Pat < (i32 (sextloadi8 (add IntRegs:$src1, s11_0ExtPred:$offset))), + (LDrib_indexed IntRegs:$src1, s11_0ExtPred:$offset) >; + +def : Pat < (i32 (zextloadi8 (add IntRegs:$src1, s11_0ExtPred:$offset))), + (LDriub_indexed IntRegs:$src1, s11_0ExtPred:$offset) >; + +def : Pat < (i32 (sextloadi16 (add IntRegs:$src1, s11_1ExtPred:$offset))), + (LDrih_indexed IntRegs:$src1, s11_1ExtPred:$offset) >; + +def : Pat < (i32 (zextloadi16 (add IntRegs:$src1, s11_1ExtPred:$offset))), + (LDriuh_indexed IntRegs:$src1, s11_1ExtPred:$offset) >; + +def : Pat < (i32 (load (add IntRegs:$src1, s11_2ExtPred:$offset))), + (LDriw_indexed IntRegs:$src1, s11_2ExtPred:$offset) >; + +def : Pat < (i64 (load (add IntRegs:$src1, s11_3ExtPred:$offset))), + (LDrid_indexed IntRegs:$src1, s11_3ExtPred:$offset) >; +} + +//===----------------------------------------------------------------------===// +// Post increment load +//===----------------------------------------------------------------------===// + +multiclass LD_PostInc_Pbase<string mnemonic, RegisterClass RC, Operand ImmOp, + bit isNot, bit isPredNew> { + let isPredicatedNew = isPredNew in + def NAME : LDInst2PI<(outs RC:$dst, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset), + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#"$dst = "#mnemonic#"($src2++#$offset)", + [], + "$src2 = $dst2">; +} + +multiclass LD_PostInc_Pred<string mnemonic, RegisterClass RC, + Operand ImmOp, bit PredNot> { + let isPredicatedFalse = PredNot in { + defm _c#NAME : LD_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 0>; + // Predicate new + let Predicates = [HasV4T], validSubTargets = HasV4SubT in + defm _cdn#NAME#_V4 : LD_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 1>; + } +} + +multiclass LD_PostInc<string mnemonic, string BaseOp, RegisterClass RC, + Operand ImmOp> { + + let BaseOpcode = "POST_"#BaseOp in { + let isPredicable = 1 in + def NAME : LDInst2PI<(outs RC:$dst, IntRegs:$dst2), + (ins IntRegs:$src1, ImmOp:$offset), + "$dst = "#mnemonic#"($src1++#$offset)", + [], + "$src1 = $dst2">; + + let isPredicated = 1 in { + defm Pt : LD_PostInc_Pred<mnemonic, RC, ImmOp, 0 >; + defm NotPt : LD_PostInc_Pred<mnemonic, RC, ImmOp, 1 >; + } + } +} + +let hasCtrlDep = 1, neverHasSideEffects = 1, addrMode = PostInc in { + defm POST_LDrib : LD_PostInc<"memb", "LDrib", IntRegs, s4_0Imm>, + PredNewRel; + defm POST_LDriub : LD_PostInc<"memub", "LDriub", IntRegs, s4_0Imm>, + PredNewRel; + defm POST_LDrih : LD_PostInc<"memh", "LDrih", IntRegs, s4_1Imm>, + PredNewRel; + defm POST_LDriuh : LD_PostInc<"memuh", "LDriuh", IntRegs, s4_1Imm>, + PredNewRel; + defm POST_LDriw : LD_PostInc<"memw", "LDriw", IntRegs, s4_2Imm>, + PredNewRel; + defm POST_LDrid : LD_PostInc<"memd", "LDrid", DoubleRegs, s4_3Imm>, + PredNewRel; +} + +def : Pat< (i32 (extloadi1 ADDRriS11_0:$addr)), + (i32 (LDrib ADDRriS11_0:$addr)) >; + +// Load byte any-extend. +def : Pat < (i32 (extloadi8 ADDRriS11_0:$addr)), + (i32 (LDrib ADDRriS11_0:$addr)) >; + +// Indexed load byte any-extend. +let AddedComplexity = 20 in +def : Pat < (i32 (extloadi8 (add IntRegs:$src1, s11_0ImmPred:$offset))), + (i32 (LDrib_indexed IntRegs:$src1, s11_0ImmPred:$offset)) >; + +def : Pat < (i32 (extloadi16 ADDRriS11_1:$addr)), + (i32 (LDrih ADDRriS11_1:$addr))>; + +let AddedComplexity = 20 in +def : Pat < (i32 (extloadi16 (add IntRegs:$src1, s11_1ImmPred:$offset))), + (i32 (LDrih_indexed IntRegs:$src1, s11_1ImmPred:$offset)) >; + +let AddedComplexity = 10 in +def : Pat < (i32 (zextloadi1 ADDRriS11_0:$addr)), + (i32 (LDriub ADDRriS11_0:$addr))>; + +let AddedComplexity = 20 in +def : Pat < (i32 (zextloadi1 (add IntRegs:$src1, s11_0ImmPred:$offset))), + (i32 (LDriub_indexed IntRegs:$src1, s11_0ImmPred:$offset))>; + +// Load predicate. +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13, +isPseudo = 1, Defs = [R10,R11,D5], neverHasSideEffects = 1 in +def LDriw_pred : LDInst2<(outs PredRegs:$dst), + (ins MEMri:$addr), + "Error; should not emit", + []>; + +// Deallocate stack frame. +let Defs = [R29, R30, R31], Uses = [R29], neverHasSideEffects = 1 in { + def DEALLOCFRAME : LDInst2<(outs), (ins), + "deallocframe", + []>; +} + +// Load and unpack bytes to halfwords. +//===----------------------------------------------------------------------===// +// LD - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MTYPE/ALU + +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// MTYPE/ALU - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MTYPE/COMPLEX + +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// MTYPE/COMPLEX - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MTYPE/MPYH + +//===----------------------------------------------------------------------===// +// Multiply and use lower result. +// Rd=+mpyi(Rs,#u8) +let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 8 in +def MPYI_riu : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u8Ext:$src2), + "$dst =+ mpyi($src1, #$src2)", + [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1), + u8ExtPred:$src2))]>; + +// Rd=-mpyi(Rs,#u8) +def MPYI_rin : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u8Imm:$src2), + "$dst =- mpyi($src1, #$src2)", + [(set (i32 IntRegs:$dst), (ineg (mul (i32 IntRegs:$src1), + u8ImmPred:$src2)))]>; + +// Rd=mpyi(Rs,#m9) +// s9 is NOT the same as m9 - but it works.. so far. +// Assembler maps to either Rd=+mpyi(Rs,#u8 or Rd=-mpyi(Rs,#u8) +// depending on the value of m9. See Arch Spec. +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 9, +CextOpcode = "MPYI", InputType = "imm" in +def MPYI_ri : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Ext:$src2), + "$dst = mpyi($src1, #$src2)", + [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1), + s9ExtPred:$src2))]>, ImmRegRel; + +// Rd=mpyi(Rs,Rt) +let CextOpcode = "MPYI", InputType = "reg" in +def MPYI : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = mpyi($src1, $src2)", + [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1), + (i32 IntRegs:$src2)))]>, ImmRegRel; + +// Rx+=mpyi(Rs,#u8) +let isExtendable = 1, opExtendable = 3, isExtentSigned = 0, opExtentBits = 8, +CextOpcode = "MPYI_acc", InputType = "imm" in +def MPYI_acc_ri : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, u8Ext:$src3), + "$dst += mpyi($src2, #$src3)", + [(set (i32 IntRegs:$dst), + (add (mul (i32 IntRegs:$src2), u8ExtPred:$src3), + (i32 IntRegs:$src1)))], + "$src1 = $dst">, ImmRegRel; + +// Rx+=mpyi(Rs,Rt) +let CextOpcode = "MPYI_acc", InputType = "reg" in +def MPYI_acc_rr : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "$dst += mpyi($src2, $src3)", + [(set (i32 IntRegs:$dst), + (add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)), + (i32 IntRegs:$src1)))], + "$src1 = $dst">, ImmRegRel; + +// Rx-=mpyi(Rs,#u8) +let isExtendable = 1, opExtendable = 3, isExtentSigned = 0, opExtentBits = 8 in +def MPYI_sub_ri : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, u8Ext:$src3), + "$dst -= mpyi($src2, #$src3)", + [(set (i32 IntRegs:$dst), + (sub (i32 IntRegs:$src1), (mul (i32 IntRegs:$src2), + u8ExtPred:$src3)))], + "$src1 = $dst">; + +// Multiply and use upper result. +// Rd=mpy(Rs,Rt.H):<<1:rnd:sat +// Rd=mpy(Rs,Rt.L):<<1:rnd:sat +// Rd=mpy(Rs,Rt) +def MPY : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = mpy($src1, $src2)", + [(set (i32 IntRegs:$dst), (mulhs (i32 IntRegs:$src1), + (i32 IntRegs:$src2)))]>; + +// Rd=mpy(Rs,Rt):rnd +// Rd=mpyu(Rs,Rt) +def MPYU : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = mpyu($src1, $src2)", + [(set (i32 IntRegs:$dst), (mulhu (i32 IntRegs:$src1), + (i32 IntRegs:$src2)))]>; + +// Multiply and use full result. +// Rdd=mpyu(Rs,Rt) +def MPYU64 : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = mpyu($src1, $src2)", + [(set (i64 DoubleRegs:$dst), + (mul (i64 (anyext (i32 IntRegs:$src1))), + (i64 (anyext (i32 IntRegs:$src2)))))]>; + +// Rdd=mpy(Rs,Rt) +def MPY64 : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = mpy($src1, $src2)", + [(set (i64 DoubleRegs:$dst), + (mul (i64 (sext (i32 IntRegs:$src1))), + (i64 (sext (i32 IntRegs:$src2)))))]>; + +// Multiply and accumulate, use full result. +// Rxx[+-]=mpy(Rs,Rt) +// Rxx+=mpy(Rs,Rt) +def MPY64_acc : MInst_acc<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "$dst += mpy($src2, $src3)", + [(set (i64 DoubleRegs:$dst), + (add (mul (i64 (sext (i32 IntRegs:$src2))), + (i64 (sext (i32 IntRegs:$src3)))), + (i64 DoubleRegs:$src1)))], + "$src1 = $dst">; + +// Rxx-=mpy(Rs,Rt) +def MPY64_sub : MInst_acc<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "$dst -= mpy($src2, $src3)", + [(set (i64 DoubleRegs:$dst), + (sub (i64 DoubleRegs:$src1), + (mul (i64 (sext (i32 IntRegs:$src2))), + (i64 (sext (i32 IntRegs:$src3))))))], + "$src1 = $dst">; + +// Rxx[+-]=mpyu(Rs,Rt) +// Rxx+=mpyu(Rs,Rt) +def MPYU64_acc : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + IntRegs:$src2, IntRegs:$src3), + "$dst += mpyu($src2, $src3)", + [(set (i64 DoubleRegs:$dst), + (add (mul (i64 (anyext (i32 IntRegs:$src2))), + (i64 (anyext (i32 IntRegs:$src3)))), + (i64 DoubleRegs:$src1)))], "$src1 = $dst">; + +// Rxx-=mpyu(Rs,Rt) +def MPYU64_sub : MInst_acc<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "$dst -= mpyu($src2, $src3)", + [(set (i64 DoubleRegs:$dst), + (sub (i64 DoubleRegs:$src1), + (mul (i64 (anyext (i32 IntRegs:$src2))), + (i64 (anyext (i32 IntRegs:$src3))))))], + "$src1 = $dst">; + + +let InputType = "reg", CextOpcode = "ADD_acc" in +def ADDrr_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1, + IntRegs:$src2, IntRegs:$src3), + "$dst += add($src2, $src3)", + [(set (i32 IntRegs:$dst), (add (add (i32 IntRegs:$src2), + (i32 IntRegs:$src3)), + (i32 IntRegs:$src1)))], + "$src1 = $dst">, ImmRegRel; + +let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 8, +InputType = "imm", CextOpcode = "ADD_acc" in +def ADDri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1, + IntRegs:$src2, s8Ext:$src3), + "$dst += add($src2, #$src3)", + [(set (i32 IntRegs:$dst), (add (add (i32 IntRegs:$src2), + s8_16ExtPred:$src3), + (i32 IntRegs:$src1)))], + "$src1 = $dst">, ImmRegRel; + +let CextOpcode = "SUB_acc", InputType = "reg" in +def SUBrr_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1, + IntRegs:$src2, IntRegs:$src3), + "$dst -= add($src2, $src3)", + [(set (i32 IntRegs:$dst), + (sub (i32 IntRegs:$src1), (add (i32 IntRegs:$src2), + (i32 IntRegs:$src3))))], + "$src1 = $dst">, ImmRegRel; + +let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 8, +CextOpcode = "SUB_acc", InputType = "imm" in +def SUBri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1, + IntRegs:$src2, s8Ext:$src3), + "$dst -= add($src2, #$src3)", + [(set (i32 IntRegs:$dst), (sub (i32 IntRegs:$src1), + (add (i32 IntRegs:$src2), + s8_16ExtPred:$src3)))], + "$src1 = $dst">, ImmRegRel; + +//===----------------------------------------------------------------------===// +// MTYPE/MPYH - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MTYPE/MPYS + +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// MTYPE/MPYS - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MTYPE/VB + +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// MTYPE/VB - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MTYPE/VH + +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// MTYPE/VH - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ST + +//===----------------------------------------------------------------------===// +/// +// Store doubleword. + +//===----------------------------------------------------------------------===// +// Post increment store +//===----------------------------------------------------------------------===// + +multiclass ST_PostInc_Pbase<string mnemonic, RegisterClass RC, Operand ImmOp, + bit isNot, bit isPredNew> { + let isPredicatedNew = isPredNew in + def NAME : STInst2PI<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset, RC:$src3), + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#mnemonic#"($src2++#$offset) = $src3", + [], + "$src2 = $dst">; +} + +multiclass ST_PostInc_Pred<string mnemonic, RegisterClass RC, + Operand ImmOp, bit PredNot> { + let isPredicatedFalse = PredNot in { + defm _c#NAME : ST_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 0>; + // Predicate new + let Predicates = [HasV4T], validSubTargets = HasV4SubT in + defm _cdn#NAME#_V4 : ST_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 1>; + } +} + +let hasCtrlDep = 1, isNVStorable = 1, neverHasSideEffects = 1 in +multiclass ST_PostInc<string mnemonic, string BaseOp, RegisterClass RC, + Operand ImmOp> { + + let hasCtrlDep = 1, BaseOpcode = "POST_"#BaseOp in { + let isPredicable = 1 in + def NAME : STInst2PI<(outs IntRegs:$dst), + (ins IntRegs:$src1, ImmOp:$offset, RC:$src2), + mnemonic#"($src1++#$offset) = $src2", + [], + "$src1 = $dst">; + + let isPredicated = 1 in { + defm Pt : ST_PostInc_Pred<mnemonic, RC, ImmOp, 0 >; + defm NotPt : ST_PostInc_Pred<mnemonic, RC, ImmOp, 1 >; + } + } +} + +defm POST_STbri: ST_PostInc <"memb", "STrib", IntRegs, s4_0Imm>, AddrModeRel; +defm POST_SThri: ST_PostInc <"memh", "STrih", IntRegs, s4_1Imm>, AddrModeRel; +defm POST_STwri: ST_PostInc <"memw", "STriw", IntRegs, s4_2Imm>, AddrModeRel; + +let isNVStorable = 0 in +defm POST_STdri: ST_PostInc <"memd", "STrid", DoubleRegs, s4_3Imm>, AddrModeRel; + +def : Pat<(post_truncsti8 (i32 IntRegs:$src1), IntRegs:$src2, + s4_3ImmPred:$offset), + (POST_STbri IntRegs:$src2, s4_0ImmPred:$offset, IntRegs:$src1)>; + +def : Pat<(post_truncsti16 (i32 IntRegs:$src1), IntRegs:$src2, + s4_3ImmPred:$offset), + (POST_SThri IntRegs:$src2, s4_1ImmPred:$offset, IntRegs:$src1)>; + +def : Pat<(post_store (i32 IntRegs:$src1), IntRegs:$src2, s4_2ImmPred:$offset), + (POST_STwri IntRegs:$src2, s4_1ImmPred:$offset, IntRegs:$src1)>; + +def : Pat<(post_store (i64 DoubleRegs:$src1), IntRegs:$src2, + s4_3ImmPred:$offset), + (POST_STdri IntRegs:$src2, s4_3ImmPred:$offset, DoubleRegs:$src1)>; + +//===----------------------------------------------------------------------===// +// multiclass for the store instructions with MEMri operand. +//===----------------------------------------------------------------------===// +multiclass ST_MEMri_Pbase<string mnemonic, RegisterClass RC, bit isNot, + bit isPredNew> { + let isPredicatedNew = isPredNew in + def NAME : STInst2<(outs), + (ins PredRegs:$src1, MEMri:$addr, RC: $src2), + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#mnemonic#"($addr) = $src2", + []>; +} + +multiclass ST_MEMri_Pred<string mnemonic, RegisterClass RC, bit PredNot> { + let isPredicatedFalse = PredNot in { + defm _c#NAME : ST_MEMri_Pbase<mnemonic, RC, PredNot, 0>; + + // Predicate new + let validSubTargets = HasV4SubT, Predicates = [HasV4T] in + defm _cdn#NAME#_V4 : ST_MEMri_Pbase<mnemonic, RC, PredNot, 1>; + } +} + +let isExtendable = 1, isNVStorable = 1, neverHasSideEffects = 1 in +multiclass ST_MEMri<string mnemonic, string CextOp, RegisterClass RC, + bits<5> ImmBits, bits<5> PredImmBits> { + + let CextOpcode = CextOp, BaseOpcode = CextOp in { + let opExtendable = 1, isExtentSigned = 1, opExtentBits = ImmBits, + isPredicable = 1 in + def NAME : STInst2<(outs), + (ins MEMri:$addr, RC:$src), + mnemonic#"($addr) = $src", + []>; + + let opExtendable = 2, isExtentSigned = 0, opExtentBits = PredImmBits, + isPredicated = 1 in { + defm Pt : ST_MEMri_Pred<mnemonic, RC, 0>; + defm NotPt : ST_MEMri_Pred<mnemonic, RC, 1>; + } + } +} + +let addrMode = BaseImmOffset, isMEMri = "true" in { + let accessSize = ByteAccess in + defm STrib: ST_MEMri < "memb", "STrib", IntRegs, 11, 6>, AddrModeRel; + + let accessSize = HalfWordAccess in + defm STrih: ST_MEMri < "memh", "STrih", IntRegs, 12, 7>, AddrModeRel; + + let accessSize = WordAccess in + defm STriw: ST_MEMri < "memw", "STriw", IntRegs, 13, 8>, AddrModeRel; + + let accessSize = DoubleWordAccess, isNVStorable = 0 in + defm STrid: ST_MEMri < "memd", "STrid", DoubleRegs, 14, 9>, AddrModeRel; +} + +def : Pat<(truncstorei8 (i32 IntRegs:$src1), ADDRriS11_0:$addr), + (STrib ADDRriS11_0:$addr, (i32 IntRegs:$src1))>; + +def : Pat<(truncstorei16 (i32 IntRegs:$src1), ADDRriS11_1:$addr), + (STrih ADDRriS11_1:$addr, (i32 IntRegs:$src1))>; + +def : Pat<(store (i32 IntRegs:$src1), ADDRriS11_2:$addr), + (STriw ADDRriS11_2:$addr, (i32 IntRegs:$src1))>; + +def : Pat<(store (i64 DoubleRegs:$src1), ADDRriS11_3:$addr), + (STrid ADDRriS11_3:$addr, (i64 DoubleRegs:$src1))>; + + +//===----------------------------------------------------------------------===// +// multiclass for the store instructions with base+immediate offset +// addressing mode +//===----------------------------------------------------------------------===// +multiclass ST_Idxd_Pbase<string mnemonic, RegisterClass RC, Operand predImmOp, + bit isNot, bit isPredNew> { + let isPredicatedNew = isPredNew in + def NAME : STInst2<(outs), + (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3, RC: $src4), + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#mnemonic#"($src2+#$src3) = $src4", + []>; +} + +multiclass ST_Idxd_Pred<string mnemonic, RegisterClass RC, Operand predImmOp, + bit PredNot> { + let isPredicatedFalse = PredNot, isPredicated = 1 in { + defm _c#NAME : ST_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 0>; + + // Predicate new + let validSubTargets = HasV4SubT, Predicates = [HasV4T] in + defm _cdn#NAME#_V4 : ST_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 1>; + } +} + +let isExtendable = 1, isNVStorable = 1, neverHasSideEffects = 1 in +multiclass ST_Idxd<string mnemonic, string CextOp, RegisterClass RC, + Operand ImmOp, Operand predImmOp, bits<5> ImmBits, + bits<5> PredImmBits> { + + let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in { + let opExtendable = 1, isExtentSigned = 1, opExtentBits = ImmBits, + isPredicable = 1 in + def NAME : STInst2<(outs), + (ins IntRegs:$src1, ImmOp:$src2, RC:$src3), + mnemonic#"($src1+#$src2) = $src3", + []>; + + let opExtendable = 2, isExtentSigned = 0, opExtentBits = PredImmBits in { + defm Pt : ST_Idxd_Pred<mnemonic, RC, predImmOp, 0>; + defm NotPt : ST_Idxd_Pred<mnemonic, RC, predImmOp, 1>; + } + } +} + +let addrMode = BaseImmOffset, InputType = "reg" in { + let accessSize = ByteAccess in + defm STrib_indexed: ST_Idxd < "memb", "STrib", IntRegs, s11_0Ext, + u6_0Ext, 11, 6>, AddrModeRel, ImmRegRel; + + let accessSize = HalfWordAccess in + defm STrih_indexed: ST_Idxd < "memh", "STrih", IntRegs, s11_1Ext, + u6_1Ext, 12, 7>, AddrModeRel, ImmRegRel; + + let accessSize = WordAccess in + defm STriw_indexed: ST_Idxd < "memw", "STriw", IntRegs, s11_2Ext, + u6_2Ext, 13, 8>, AddrModeRel, ImmRegRel; + + let accessSize = DoubleWordAccess, isNVStorable = 0 in + defm STrid_indexed: ST_Idxd < "memd", "STrid", DoubleRegs, s11_3Ext, + u6_3Ext, 14, 9>, AddrModeRel; +} + +let AddedComplexity = 10 in { +def : Pat<(truncstorei8 (i32 IntRegs:$src1), (add IntRegs:$src2, + s11_0ExtPred:$offset)), + (STrib_indexed IntRegs:$src2, s11_0ImmPred:$offset, + (i32 IntRegs:$src1))>; + +def : Pat<(truncstorei16 (i32 IntRegs:$src1), (add IntRegs:$src2, + s11_1ExtPred:$offset)), + (STrih_indexed IntRegs:$src2, s11_1ImmPred:$offset, + (i32 IntRegs:$src1))>; + +def : Pat<(store (i32 IntRegs:$src1), (add IntRegs:$src2, + s11_2ExtPred:$offset)), + (STriw_indexed IntRegs:$src2, s11_2ImmPred:$offset, + (i32 IntRegs:$src1))>; + +def : Pat<(store (i64 DoubleRegs:$src1), (add IntRegs:$src2, + s11_3ExtPred:$offset)), + (STrid_indexed IntRegs:$src2, s11_3ImmPred:$offset, + (i64 DoubleRegs:$src1))>; +} + +// memh(Rx++#s4:1)=Rt.H + +// Store word. +// Store predicate. +let Defs = [R10,R11,D5], neverHasSideEffects = 1 in +def STriw_pred : STInst2<(outs), + (ins MEMri:$addr, PredRegs:$src1), + "Error; should not emit", + []>; + +// Allocate stack frame. +let Defs = [R29, R30], Uses = [R31, R30], neverHasSideEffects = 1 in { + def ALLOCFRAME : STInst2<(outs), + (ins i32imm:$amt), + "allocframe(#$amt)", + []>; +} +//===----------------------------------------------------------------------===// +// ST - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// STYPE/ALU + +//===----------------------------------------------------------------------===// +// Logical NOT. +def NOT_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1), + "$dst = not($src1)", + [(set (i64 DoubleRegs:$dst), (not (i64 DoubleRegs:$src1)))]>; + + +// Sign extend word to doubleword. +def SXTW : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1), + "$dst = sxtw($src1)", + [(set (i64 DoubleRegs:$dst), (sext (i32 IntRegs:$src1)))]>; +//===----------------------------------------------------------------------===// +// STYPE/ALU - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// STYPE/BIT + +//===----------------------------------------------------------------------===// +// clrbit. +def CLRBIT : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), + "$dst = clrbit($src1, #$src2)", + [(set (i32 IntRegs:$dst), (and (i32 IntRegs:$src1), + (not + (shl 1, u5ImmPred:$src2))))]>; + +def CLRBIT_31 : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), + "$dst = clrbit($src1, #$src2)", + []>; + +// Map from r0 = and(r1, 2147483647) to r0 = clrbit(r1, #31). +def : Pat <(and (i32 IntRegs:$src1), 2147483647), + (CLRBIT_31 (i32 IntRegs:$src1), 31)>; + +// setbit. +def SETBIT : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), + "$dst = setbit($src1, #$src2)", + [(set (i32 IntRegs:$dst), (or (i32 IntRegs:$src1), + (shl 1, u5ImmPred:$src2)))]>; + +// Map from r0 = or(r1, -2147483648) to r0 = setbit(r1, #31). +def SETBIT_31 : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), + "$dst = setbit($src1, #$src2)", + []>; + +def : Pat <(or (i32 IntRegs:$src1), -2147483648), + (SETBIT_31 (i32 IntRegs:$src1), 31)>; + +// togglebit. +def TOGBIT : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), + "$dst = setbit($src1, #$src2)", + [(set (i32 IntRegs:$dst), (xor (i32 IntRegs:$src1), + (shl 1, u5ImmPred:$src2)))]>; + +// Map from r0 = xor(r1, -2147483648) to r0 = togglebit(r1, #31). +def TOGBIT_31 : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), + "$dst = togglebit($src1, #$src2)", + []>; + +def : Pat <(xor (i32 IntRegs:$src1), -2147483648), + (TOGBIT_31 (i32 IntRegs:$src1), 31)>; + +// Predicate transfer. +let neverHasSideEffects = 1 in +def TFR_RsPd : SInst<(outs IntRegs:$dst), (ins PredRegs:$src1), + "$dst = $src1 /* Should almost never emit this. */", + []>; + +def TFR_PdRs : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1), + "$dst = $src1 /* Should almost never emit this. */", + [(set (i1 PredRegs:$dst), (trunc (i32 IntRegs:$src1)))]>; +//===----------------------------------------------------------------------===// +// STYPE/PRED - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// STYPE/SHIFT + +//===----------------------------------------------------------------------===// +// Shift by immediate. +def ASR_ri : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), + "$dst = asr($src1, #$src2)", + [(set (i32 IntRegs:$dst), (sra (i32 IntRegs:$src1), + u5ImmPred:$src2))]>; + +def ASRd_ri : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2), + "$dst = asr($src1, #$src2)", + [(set (i64 DoubleRegs:$dst), (sra (i64 DoubleRegs:$src1), + u6ImmPred:$src2))]>; + +def ASL : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), + "$dst = asl($src1, #$src2)", + [(set (i32 IntRegs:$dst), (shl (i32 IntRegs:$src1), + u5ImmPred:$src2))]>; + +def ASLd_ri : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2), + "$dst = asl($src1, #$src2)", + [(set (i64 DoubleRegs:$dst), (shl (i64 DoubleRegs:$src1), + u6ImmPred:$src2))]>; + +def LSR_ri : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), + "$dst = lsr($src1, #$src2)", + [(set (i32 IntRegs:$dst), (srl (i32 IntRegs:$src1), + u5ImmPred:$src2))]>; + +def LSRd_ri : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2), + "$dst = lsr($src1, #$src2)", + [(set (i64 DoubleRegs:$dst), (srl (i64 DoubleRegs:$src1), + u6ImmPred:$src2))]>; + +// Shift by immediate and add. +let AddedComplexity = 100 in +def ADDASL : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, + u3Imm:$src3), + "$dst = addasl($src1, $src2, #$src3)", + [(set (i32 IntRegs:$dst), (add (i32 IntRegs:$src1), + (shl (i32 IntRegs:$src2), + u3ImmPred:$src3)))]>; + +// Shift by register. +def ASL_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = asl($src1, $src2)", + [(set (i32 IntRegs:$dst), (shl (i32 IntRegs:$src1), + (i32 IntRegs:$src2)))]>; + +def ASR_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = asr($src1, $src2)", + [(set (i32 IntRegs:$dst), (sra (i32 IntRegs:$src1), + (i32 IntRegs:$src2)))]>; + +def LSL_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = lsl($src1, $src2)", + [(set (i32 IntRegs:$dst), (shl (i32 IntRegs:$src1), + (i32 IntRegs:$src2)))]>; + +def LSR_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = lsr($src1, $src2)", + [(set (i32 IntRegs:$dst), (srl (i32 IntRegs:$src1), + (i32 IntRegs:$src2)))]>; + +def ASLd : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2), + "$dst = asl($src1, $src2)", + [(set (i64 DoubleRegs:$dst), (shl (i64 DoubleRegs:$src1), + (i32 IntRegs:$src2)))]>; + +def LSLd : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2), + "$dst = lsl($src1, $src2)", + [(set (i64 DoubleRegs:$dst), (shl (i64 DoubleRegs:$src1), + (i32 IntRegs:$src2)))]>; + +def ASRd_rr : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + IntRegs:$src2), + "$dst = asr($src1, $src2)", + [(set (i64 DoubleRegs:$dst), (sra (i64 DoubleRegs:$src1), + (i32 IntRegs:$src2)))]>; + +def LSRd_rr : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + IntRegs:$src2), + "$dst = lsr($src1, $src2)", + [(set (i64 DoubleRegs:$dst), (srl (i64 DoubleRegs:$src1), + (i32 IntRegs:$src2)))]>; + +//===----------------------------------------------------------------------===// +// STYPE/SHIFT - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// STYPE/VH + +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// STYPE/VH - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// STYPE/VW + +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// STYPE/VW - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// SYSTEM/SUPER + +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// SYSTEM/USER + +//===----------------------------------------------------------------------===// +def SDHexagonBARRIER: SDTypeProfile<0, 0, []>; +def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDHexagonBARRIER, + [SDNPHasChain]>; + +let hasSideEffects = 1, isSolo = 1 in +def BARRIER : SYSInst<(outs), (ins), + "barrier", + [(HexagonBARRIER)]>; + +//===----------------------------------------------------------------------===// +// SYSTEM/SUPER - +//===----------------------------------------------------------------------===// + +// TFRI64 - assembly mapped. +let isReMaterializable = 1 in +def TFRI64 : ALU64_rr<(outs DoubleRegs:$dst), (ins s8Imm64:$src1), + "$dst = #$src1", + [(set (i64 DoubleRegs:$dst), s8Imm64Pred:$src1)]>; + +// Pseudo instruction to encode a set of conditional transfers. +// This instruction is used instead of a mux and trades-off codesize +// for performance. We conduct this transformation optimistically in +// the hope that these instructions get promoted to dot-new transfers. +let AddedComplexity = 100, isPredicated = 1 in +def TFR_condset_rr : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, + IntRegs:$src2, + IntRegs:$src3), + "Error; should not emit", + [(set (i32 IntRegs:$dst), + (i32 (select (i1 PredRegs:$src1), + (i32 IntRegs:$src2), + (i32 IntRegs:$src3))))]>; +let AddedComplexity = 100, isPredicated = 1 in +def TFR_condset_ri : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, s12Imm:$src3), + "Error; should not emit", + [(set (i32 IntRegs:$dst), + (i32 (select (i1 PredRegs:$src1), (i32 IntRegs:$src2), + s12ImmPred:$src3)))]>; + +let AddedComplexity = 100, isPredicated = 1 in +def TFR_condset_ir : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, s12Imm:$src2, IntRegs:$src3), + "Error; should not emit", + [(set (i32 IntRegs:$dst), + (i32 (select (i1 PredRegs:$src1), s12ImmPred:$src2, + (i32 IntRegs:$src3))))]>; + +let AddedComplexity = 100, isPredicated = 1 in +def TFR_condset_ii : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, s12Imm:$src2, s12Imm:$src3), + "Error; should not emit", + [(set (i32 IntRegs:$dst), + (i32 (select (i1 PredRegs:$src1), s12ImmPred:$src2, + s12ImmPred:$src3)))]>; + +// Generate frameindex addresses. +let isReMaterializable = 1 in +def TFR_FI : ALU32_ri<(outs IntRegs:$dst), (ins FrameIndex:$src1), + "$dst = add($src1)", + [(set (i32 IntRegs:$dst), ADDRri:$src1)]>; + +// +// CR - Type. +// +let neverHasSideEffects = 1, Defs = [SA0, LC0] in { +def LOOP0_i : CRInst<(outs), (ins brtarget:$offset, u10Imm:$src2), + "loop0($offset, #$src2)", + []>; +} + +let neverHasSideEffects = 1, Defs = [SA0, LC0] in { +def LOOP0_r : CRInst<(outs), (ins brtarget:$offset, IntRegs:$src2), + "loop0($offset, $src2)", + []>; +} + +let isBranch = 1, isTerminator = 1, neverHasSideEffects = 1, + Defs = [PC, LC0], Uses = [SA0, LC0] in { +def ENDLOOP0 : Endloop<(outs), (ins brtarget:$offset), + ":endloop0", + []>; +} + +// Support for generating global address. +// Taken from X86InstrInfo.td. +def SDTHexagonCONST32 : SDTypeProfile<1, 1, [ + SDTCisVT<0, i32>, + SDTCisVT<1, i32>, + SDTCisPtrTy<0>]>; +def HexagonCONST32 : SDNode<"HexagonISD::CONST32", SDTHexagonCONST32>; +def HexagonCONST32_GP : SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>; + +// HI/LO Instructions +let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in +def LO : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global), + "$dst.l = #LO($global)", + []>; + +let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in +def HI : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global), + "$dst.h = #HI($global)", + []>; + +let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in +def LOi : ALU32_ri<(outs IntRegs:$dst), (ins i32imm:$imm_value), + "$dst.l = #LO($imm_value)", + []>; + + +let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in +def HIi : ALU32_ri<(outs IntRegs:$dst), (ins i32imm:$imm_value), + "$dst.h = #HI($imm_value)", + []>; + +let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in +def LO_jt : ALU32_ri<(outs IntRegs:$dst), (ins jumptablebase:$jt), + "$dst.l = #LO($jt)", + []>; + +let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in +def HI_jt : ALU32_ri<(outs IntRegs:$dst), (ins jumptablebase:$jt), + "$dst.h = #HI($jt)", + []>; + + +let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in +def LO_label : ALU32_ri<(outs IntRegs:$dst), (ins bblabel:$label), + "$dst.l = #LO($label)", + []>; + +let isReMaterializable = 1, isMoveImm = 1 , neverHasSideEffects = 1 in +def HI_label : ALU32_ri<(outs IntRegs:$dst), (ins bblabel:$label), + "$dst.h = #HI($label)", + []>; + +// This pattern is incorrect. When we add small data, we should change +// this pattern to use memw(#foo). +// This is for sdata. +let isMoveImm = 1 in +def CONST32 : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global), + "$dst = CONST32(#$global)", + [(set (i32 IntRegs:$dst), + (load (HexagonCONST32 tglobaltlsaddr:$global)))]>; + +// This is for non-sdata. +let isReMaterializable = 1, isMoveImm = 1 in +def CONST32_set : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global), + "$dst = CONST32(#$global)", + [(set (i32 IntRegs:$dst), + (HexagonCONST32 tglobaladdr:$global))]>; + +let isReMaterializable = 1, isMoveImm = 1 in +def CONST32_set_jt : LDInst2<(outs IntRegs:$dst), (ins jumptablebase:$jt), + "$dst = CONST32(#$jt)", + [(set (i32 IntRegs:$dst), + (HexagonCONST32 tjumptable:$jt))]>; + +let isReMaterializable = 1, isMoveImm = 1 in +def CONST32GP_set : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global), + "$dst = CONST32(#$global)", + [(set (i32 IntRegs:$dst), + (HexagonCONST32_GP tglobaladdr:$global))]>; + +let isReMaterializable = 1, isMoveImm = 1 in +def CONST32_Int_Real : LDInst2<(outs IntRegs:$dst), (ins i32imm:$global), + "$dst = CONST32(#$global)", + [(set (i32 IntRegs:$dst), imm:$global) ]>; + +// Map BlockAddress lowering to CONST32_Int_Real +def : Pat<(HexagonCONST32_GP tblockaddress:$addr), + (CONST32_Int_Real tblockaddress:$addr)>; + +let isReMaterializable = 1, isMoveImm = 1 in +def CONST32_Label : LDInst2<(outs IntRegs:$dst), (ins bblabel:$label), + "$dst = CONST32($label)", + [(set (i32 IntRegs:$dst), (HexagonCONST32 bbl:$label))]>; + +let isReMaterializable = 1, isMoveImm = 1 in +def CONST64_Int_Real : LDInst2<(outs DoubleRegs:$dst), (ins i64imm:$global), + "$dst = CONST64(#$global)", + [(set (i64 DoubleRegs:$dst), imm:$global) ]>; + +def TFR_PdFalse : SInst<(outs PredRegs:$dst), (ins), + "$dst = xor($dst, $dst)", + [(set (i1 PredRegs:$dst), 0)]>; + +def MPY_trsext : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = mpy($src1, $src2)", + [(set (i32 IntRegs:$dst), + (trunc (i64 (srl (i64 (mul (i64 (sext (i32 IntRegs:$src1))), + (i64 (sext (i32 IntRegs:$src2))))), + (i32 32)))))]>; + +// Pseudo instructions. +def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; + +def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, + SDTCisVT<1, i32> ]>; + +def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + +def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart, + [SDNPHasChain, SDNPOutGlue]>; + +def SDT_SPCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; + +def call : SDNode<"HexagonISD::CALL", SDT_SPCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; + +// For tailcalls a HexagonTCRet SDNode has 3 SDNode Properties - a chain, +// Optional Flag and Variable Arguments. +// Its 1 Operand has pointer type. +def HexagonTCRet : SDNode<"HexagonISD::TC_RETURN", SDT_SPCall, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + +let Defs = [R29, R30], Uses = [R31, R30, R29] in { + def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt), + "Should never be emitted", + [(callseq_start timm:$amt)]>; +} + +let Defs = [R29, R30, R31], Uses = [R29] in { + def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), + "Should never be emitted", + [(callseq_end timm:$amt1, timm:$amt2)]>; +} +// Call subroutine. +let isCall = 1, neverHasSideEffects = 1, + Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, + R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in { + def CALL : JInst<(outs), (ins calltarget:$dst), + "call $dst", []>; +} + +// Call subroutine from register. +let isCall = 1, neverHasSideEffects = 1, + Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, + R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in { + def CALLR : JRInst<(outs), (ins IntRegs:$dst), + "callr $dst", + []>; + } + + +// Indirect tail-call. +let isCodeGenOnly = 1, isCall = 1, isReturn = 1 in +def TCRETURNR : T_JMPr; + +// Direct tail-calls. +let isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0, +isTerminator = 1, isCodeGenOnly = 1 in { + def TCRETURNtg : T_JMP<(ins calltarget:$dst)>; + def TCRETURNtext : T_JMP<(ins calltarget:$dst)>; +} + +// Map call instruction. +def : Pat<(call (i32 IntRegs:$dst)), + (CALLR (i32 IntRegs:$dst))>, Requires<[HasV2TOnly]>; +def : Pat<(call tglobaladdr:$dst), + (CALL tglobaladdr:$dst)>, Requires<[HasV2TOnly]>; +def : Pat<(call texternalsym:$dst), + (CALL texternalsym:$dst)>, Requires<[HasV2TOnly]>; +//Tail calls. +def : Pat<(HexagonTCRet tglobaladdr:$dst), + (TCRETURNtg tglobaladdr:$dst)>; +def : Pat<(HexagonTCRet texternalsym:$dst), + (TCRETURNtext texternalsym:$dst)>; +def : Pat<(HexagonTCRet (i32 IntRegs:$dst)), + (TCRETURNR (i32 IntRegs:$dst))>; + +// Atomic load and store support +// 8 bit atomic load +def : Pat<(atomic_load_8 ADDRriS11_0:$src1), + (i32 (LDriub ADDRriS11_0:$src1))>; + +def : Pat<(atomic_load_8 (add (i32 IntRegs:$src1), s11_0ImmPred:$offset)), + (i32 (LDriub_indexed (i32 IntRegs:$src1), s11_0ImmPred:$offset))>; + +// 16 bit atomic load +def : Pat<(atomic_load_16 ADDRriS11_1:$src1), + (i32 (LDriuh ADDRriS11_1:$src1))>; + +def : Pat<(atomic_load_16 (add (i32 IntRegs:$src1), s11_1ImmPred:$offset)), + (i32 (LDriuh_indexed (i32 IntRegs:$src1), s11_1ImmPred:$offset))>; + +def : Pat<(atomic_load_32 ADDRriS11_2:$src1), + (i32 (LDriw ADDRriS11_2:$src1))>; + +def : Pat<(atomic_load_32 (add (i32 IntRegs:$src1), s11_2ImmPred:$offset)), + (i32 (LDriw_indexed (i32 IntRegs:$src1), s11_2ImmPred:$offset))>; + +// 64 bit atomic load +def : Pat<(atomic_load_64 ADDRriS11_3:$src1), + (i64 (LDrid ADDRriS11_3:$src1))>; + +def : Pat<(atomic_load_64 (add (i32 IntRegs:$src1), s11_3ImmPred:$offset)), + (i64 (LDrid_indexed (i32 IntRegs:$src1), s11_3ImmPred:$offset))>; + + +def : Pat<(atomic_store_8 ADDRriS11_0:$src2, (i32 IntRegs:$src1)), + (STrib ADDRriS11_0:$src2, (i32 IntRegs:$src1))>; + +def : Pat<(atomic_store_8 (add (i32 IntRegs:$src2), s11_0ImmPred:$offset), + (i32 IntRegs:$src1)), + (STrib_indexed (i32 IntRegs:$src2), s11_0ImmPred:$offset, + (i32 IntRegs:$src1))>; + + +def : Pat<(atomic_store_16 ADDRriS11_1:$src2, (i32 IntRegs:$src1)), + (STrih ADDRriS11_1:$src2, (i32 IntRegs:$src1))>; + +def : Pat<(atomic_store_16 (i32 IntRegs:$src1), + (add (i32 IntRegs:$src2), s11_1ImmPred:$offset)), + (STrih_indexed (i32 IntRegs:$src2), s11_1ImmPred:$offset, + (i32 IntRegs:$src1))>; + +def : Pat<(atomic_store_32 ADDRriS11_2:$src2, (i32 IntRegs:$src1)), + (STriw ADDRriS11_2:$src2, (i32 IntRegs:$src1))>; + +def : Pat<(atomic_store_32 (add (i32 IntRegs:$src2), s11_2ImmPred:$offset), + (i32 IntRegs:$src1)), + (STriw_indexed (i32 IntRegs:$src2), s11_2ImmPred:$offset, + (i32 IntRegs:$src1))>; + + + + +def : Pat<(atomic_store_64 ADDRriS11_3:$src2, (i64 DoubleRegs:$src1)), + (STrid ADDRriS11_3:$src2, (i64 DoubleRegs:$src1))>; + +def : Pat<(atomic_store_64 (add (i32 IntRegs:$src2), s11_3ImmPred:$offset), + (i64 DoubleRegs:$src1)), + (STrid_indexed (i32 IntRegs:$src2), s11_3ImmPred:$offset, + (i64 DoubleRegs:$src1))>; + +// Map from r0 = and(r1, 65535) to r0 = zxth(r1) +def : Pat <(and (i32 IntRegs:$src1), 65535), + (ZXTH (i32 IntRegs:$src1))>; + +// Map from r0 = and(r1, 255) to r0 = zxtb(r1). +def : Pat <(and (i32 IntRegs:$src1), 255), + (ZXTB (i32 IntRegs:$src1))>; + +// Map Add(p1, true) to p1 = not(p1). +// Add(p1, false) should never be produced, +// if it does, it got to be mapped to NOOP. +def : Pat <(add (i1 PredRegs:$src1), -1), + (NOT_p (i1 PredRegs:$src1))>; + +// Map from p0 = setlt(r0, r1) r2 = mux(p0, r3, r4) => +// p0 = cmp.lt(r0, r1), r0 = mux(p0, r2, r1). +// cmp.lt(r0, r1) -> cmp.gt(r1, r0) +def : Pat <(select (i1 (setlt (i32 IntRegs:$src1), (i32 IntRegs:$src2))), + (i32 IntRegs:$src3), + (i32 IntRegs:$src4)), + (i32 (TFR_condset_rr (CMPGTrr (i32 IntRegs:$src2), (i32 IntRegs:$src1)), + (i32 IntRegs:$src4), (i32 IntRegs:$src3)))>, + Requires<[HasV2TOnly]>; + +// Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i). +def : Pat <(select (not (i1 PredRegs:$src1)), s8ImmPred:$src2, s8ImmPred:$src3), + (i32 (TFR_condset_ii (i1 PredRegs:$src1), s8ImmPred:$src3, + s8ImmPred:$src2))>; + +// Map from p0 = pnot(p0); r0 = select(p0, #i, r1) +// => r0 = TFR_condset_ri(p0, r1, #i) +def : Pat <(select (not (i1 PredRegs:$src1)), s12ImmPred:$src2, + (i32 IntRegs:$src3)), + (i32 (TFR_condset_ri (i1 PredRegs:$src1), (i32 IntRegs:$src3), + s12ImmPred:$src2))>; + +// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i) +// => r0 = TFR_condset_ir(p0, #i, r1) +def : Pat <(select (not (i1 PredRegs:$src1)), IntRegs:$src2, s12ImmPred:$src3), + (i32 (TFR_condset_ir (i1 PredRegs:$src1), s12ImmPred:$src3, + (i32 IntRegs:$src2)))>; + +// Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump. +def : Pat <(brcond (not (i1 PredRegs:$src1)), bb:$offset), + (JMP_f (i1 PredRegs:$src1), bb:$offset)>; + +// Map from p2 = pnot(p2); p1 = and(p0, p2) => p1 = and(p0, !p2). +def : Pat <(and (i1 PredRegs:$src1), (not (i1 PredRegs:$src2))), + (i1 (AND_pnotp (i1 PredRegs:$src1), (i1 PredRegs:$src2)))>; + + +let AddedComplexity = 100 in +def : Pat <(i64 (zextloadi1 (HexagonCONST32 tglobaladdr:$global))), + (i64 (COMBINE_rr (TFRI 0), + (LDriub_indexed (CONST32_set tglobaladdr:$global), 0)))>, + Requires<[NoV4T]>; + +// Map from i1 loads to 32 bits. This assumes that the i1* is byte aligned. +let AddedComplexity = 10 in +def : Pat <(i32 (zextloadi1 ADDRriS11_0:$addr)), + (i32 (AND_rr (i32 (LDrib ADDRriS11_0:$addr)), (TFRI 0x1)))>; + +// Map from Rdd = sign_extend_inreg(Rss, i32) -> Rdd = SXTW(Rss.lo). +def : Pat <(i64 (sext_inreg (i64 DoubleRegs:$src1), i32)), + (i64 (SXTW (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg))))>; + +// Map from Rdd = sign_extend_inreg(Rss, i16) -> Rdd = SXTW(SXTH(Rss.lo)). +def : Pat <(i64 (sext_inreg (i64 DoubleRegs:$src1), i16)), + (i64 (SXTW (i32 (SXTH (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), + subreg_loreg))))))>; + +// Map from Rdd = sign_extend_inreg(Rss, i8) -> Rdd = SXTW(SXTB(Rss.lo)). +def : Pat <(i64 (sext_inreg (i64 DoubleRegs:$src1), i8)), + (i64 (SXTW (i32 (SXTB (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), + subreg_loreg))))))>; + +// We want to prevent emitting pnot's as much as possible. +// Map brcond with an unsupported setcc to a JMP_f. +def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2))), + bb:$offset), + (JMP_f (CMPEQrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)), + bb:$offset)>; + +def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), s10ImmPred:$src2)), + bb:$offset), + (JMP_f (CMPEQri (i32 IntRegs:$src1), s10ImmPred:$src2), bb:$offset)>; + +def : Pat <(brcond (i1 (setne (i1 PredRegs:$src1), (i1 -1))), bb:$offset), + (JMP_f (i1 PredRegs:$src1), bb:$offset)>; + +def : Pat <(brcond (i1 (setne (i1 PredRegs:$src1), (i1 0))), bb:$offset), + (JMP_t (i1 PredRegs:$src1), bb:$offset)>; + +// cmp.lt(Rs, Imm) -> !cmp.ge(Rs, Imm) -> !cmp.gt(Rs, Imm-1) +def : Pat <(brcond (i1 (setlt (i32 IntRegs:$src1), s8ImmPred:$src2)), + bb:$offset), + (JMP_f (CMPGTri (i32 IntRegs:$src1), + (DEC_CONST_SIGNED s8ImmPred:$src2)), bb:$offset)>; + +// cmp.lt(r0, r1) -> cmp.gt(r1, r0) +def : Pat <(brcond (i1 (setlt (i32 IntRegs:$src1), (i32 IntRegs:$src2))), + bb:$offset), + (JMP_t (CMPGTrr (i32 IntRegs:$src2), (i32 IntRegs:$src1)), bb:$offset)>; + +def : Pat <(brcond (i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), + bb:$offset), + (JMP_f (CMPGTU64rr (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)), + bb:$offset)>; + +def : Pat <(brcond (i1 (setule (i32 IntRegs:$src1), (i32 IntRegs:$src2))), + bb:$offset), + (JMP_f (CMPGTUrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)), + bb:$offset)>; + +def : Pat <(brcond (i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), + bb:$offset), + (JMP_f (CMPGTU64rr (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)), + bb:$offset)>; + +// Map from a 64-bit select to an emulated 64-bit mux. +// Hexagon does not support 64-bit MUXes; so emulate with combines. +def : Pat <(select (i1 PredRegs:$src1), (i64 DoubleRegs:$src2), + (i64 DoubleRegs:$src3)), + (i64 (COMBINE_rr (i32 (MUX_rr (i1 PredRegs:$src1), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), + subreg_hireg)), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src3), + subreg_hireg)))), + (i32 (MUX_rr (i1 PredRegs:$src1), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), + subreg_loreg)), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src3), + subreg_loreg))))))>; + +// Map from a 1-bit select to logical ops. +// From LegalizeDAG.cpp: (B1 ? B2 : B3) <=> (B1 & B2)|(!B1&B3). +def : Pat <(select (i1 PredRegs:$src1), (i1 PredRegs:$src2), + (i1 PredRegs:$src3)), + (OR_pp (AND_pp (i1 PredRegs:$src1), (i1 PredRegs:$src2)), + (AND_pp (NOT_p (i1 PredRegs:$src1)), (i1 PredRegs:$src3)))>; + +// Map Pd = load(addr) -> Rs = load(addr); Pd = Rs. +def : Pat<(i1 (load ADDRriS11_2:$addr)), + (i1 (TFR_PdRs (i32 (LDrib ADDRriS11_2:$addr))))>; + +// Map for truncating from 64 immediates to 32 bit immediates. +def : Pat<(i32 (trunc (i64 DoubleRegs:$src))), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src), subreg_loreg))>; + +// Map for truncating from i64 immediates to i1 bit immediates. +def : Pat<(i1 (trunc (i64 DoubleRegs:$src))), + (i1 (TFR_PdRs (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src), + subreg_loreg))))>; + +// Map memb(Rs) = Rdd -> memb(Rs) = Rt. +def : Pat<(truncstorei8 (i64 DoubleRegs:$src), ADDRriS11_0:$addr), + (STrib ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src), + subreg_loreg)))>; + +// Map memh(Rs) = Rdd -> memh(Rs) = Rt. +def : Pat<(truncstorei16 (i64 DoubleRegs:$src), ADDRriS11_0:$addr), + (STrih ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src), + subreg_loreg)))>; +// Map memw(Rs) = Rdd -> memw(Rs) = Rt +def : Pat<(truncstorei32 (i64 DoubleRegs:$src), ADDRriS11_0:$addr), + (STriw ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src), + subreg_loreg)))>; + +// Map memw(Rs) = Rdd -> memw(Rs) = Rt. +def : Pat<(truncstorei32 (i64 DoubleRegs:$src), ADDRriS11_0:$addr), + (STriw ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src), + subreg_loreg)))>; + +// Map from i1 = constant<-1>; memw(addr) = i1 -> r0 = 1; memw(addr) = r0. +def : Pat<(store (i1 -1), ADDRriS11_2:$addr), + (STrib ADDRriS11_2:$addr, (TFRI 1))>; + + +// Map from i1 = constant<-1>; store i1 -> r0 = 1; store r0. +def : Pat<(store (i1 -1), ADDRriS11_2:$addr), + (STrib ADDRriS11_2:$addr, (TFRI 1))>; + +// Map from memb(Rs) = Pd -> Rt = mux(Pd, #0, #1); store Rt. +def : Pat<(store (i1 PredRegs:$src1), ADDRriS11_2:$addr), + (STrib ADDRriS11_2:$addr, (i32 (MUX_ii (i1 PredRegs:$src1), 1, 0)) )>; + +// Map Rdd = anyext(Rs) -> Rdd = sxtw(Rs). +// Hexagon_TODO: We can probably use combine but that will cost 2 instructions. +// Better way to do this? +def : Pat<(i64 (anyext (i32 IntRegs:$src1))), + (i64 (SXTW (i32 IntRegs:$src1)))>; + +// Map cmple -> cmpgt. +// rs <= rt -> !(rs > rt). +def : Pat<(i1 (setle (i32 IntRegs:$src1), s10ExtPred:$src2)), + (i1 (NOT_p (CMPGTri (i32 IntRegs:$src1), s10ExtPred:$src2)))>; + +// rs <= rt -> !(rs > rt). +def : Pat<(i1 (setle (i32 IntRegs:$src1), (i32 IntRegs:$src2))), + (i1 (NOT_p (CMPGTrr (i32 IntRegs:$src1), (i32 IntRegs:$src2))))>; + +// Rss <= Rtt -> !(Rss > Rtt). +def : Pat<(i1 (setle (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), + (i1 (NOT_p (CMPGT64rr (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))))>; + +// Map cmpne -> cmpeq. +// Hexagon_TODO: We should improve on this. +// rs != rt -> !(rs == rt). +def : Pat <(i1 (setne (i32 IntRegs:$src1), s10ExtPred:$src2)), + (i1 (NOT_p(i1 (CMPEQri (i32 IntRegs:$src1), s10ExtPred:$src2))))>; + +// Map cmpne(Rs) -> !cmpeqe(Rs). +// rs != rt -> !(rs == rt). +def : Pat <(i1 (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2))), + (i1 (NOT_p (i1 (CMPEQrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)))))>; + +// Convert setne back to xor for hexagon since we compute w/ pred registers. +def : Pat <(i1 (setne (i1 PredRegs:$src1), (i1 PredRegs:$src2))), + (i1 (XOR_pp (i1 PredRegs:$src1), (i1 PredRegs:$src2)))>; + +// Map cmpne(Rss) -> !cmpew(Rss). +// rs != rt -> !(rs == rt). +def : Pat <(i1 (setne (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), + (i1 (NOT_p (i1 (CMPEHexagon4rr (i64 DoubleRegs:$src1), + (i64 DoubleRegs:$src2)))))>; + +// Map cmpge(Rs, Rt) -> !(cmpgt(Rs, Rt). +// rs >= rt -> !(rt > rs). +def : Pat <(i1 (setge (i32 IntRegs:$src1), (i32 IntRegs:$src2))), + (i1 (NOT_p (i1 (CMPGTrr (i32 IntRegs:$src2), (i32 IntRegs:$src1)))))>; + +// cmpge(Rs, Imm) -> cmpgt(Rs, Imm-1) +def : Pat <(i1 (setge (i32 IntRegs:$src1), s8ExtPred:$src2)), + (i1 (CMPGTri (i32 IntRegs:$src1), (DEC_CONST_SIGNED s8ExtPred:$src2)))>; + +// Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss). +// rss >= rtt -> !(rtt > rss). +def : Pat <(i1 (setge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), + (i1 (NOT_p (i1 (CMPGT64rr (i64 DoubleRegs:$src2), + (i64 DoubleRegs:$src1)))))>; + +// Map cmplt(Rs, Imm) -> !cmpge(Rs, Imm). +// !cmpge(Rs, Imm) -> !cmpgt(Rs, Imm-1). +// rs < rt -> !(rs >= rt). +def : Pat <(i1 (setlt (i32 IntRegs:$src1), s8ExtPred:$src2)), + (i1 (NOT_p (CMPGTri (i32 IntRegs:$src1), (DEC_CONST_SIGNED s8ExtPred:$src2))))>; + +// Map cmplt(Rs, Rt) -> cmpgt(Rt, Rs). +// rs < rt -> rt > rs. +// We can let assembler map it, or we can do in the compiler itself. +def : Pat <(i1 (setlt (i32 IntRegs:$src1), (i32 IntRegs:$src2))), + (i1 (CMPGTrr (i32 IntRegs:$src2), (i32 IntRegs:$src1)))>; + +// Map cmplt(Rss, Rtt) -> cmpgt(Rtt, Rss). +// rss < rtt -> (rtt > rss). +def : Pat <(i1 (setlt (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), + (i1 (CMPGT64rr (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)))>; + +// Map from cmpltu(Rs, Rd) -> cmpgtu(Rd, Rs) +// rs < rt -> rt > rs. +// We can let assembler map it, or we can do in the compiler itself. +def : Pat <(i1 (setult (i32 IntRegs:$src1), (i32 IntRegs:$src2))), + (i1 (CMPGTUrr (i32 IntRegs:$src2), (i32 IntRegs:$src1)))>; + +// Map from cmpltu(Rss, Rdd) -> cmpgtu(Rdd, Rss). +// rs < rt -> rt > rs. +def : Pat <(i1 (setult (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), + (i1 (CMPGTU64rr (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)))>; + +// Generate cmpgeu(Rs, #0) -> cmpeq(Rs, Rs) +def : Pat <(i1 (setuge (i32 IntRegs:$src1), 0)), + (i1 (CMPEQrr (i32 IntRegs:$src1), (i32 IntRegs:$src1)))>; + +// Generate cmpgeu(Rs, #u8) -> cmpgtu(Rs, #u8 -1) +def : Pat <(i1 (setuge (i32 IntRegs:$src1), u8ExtPred:$src2)), + (i1 (CMPGTUri (i32 IntRegs:$src1), (DEC_CONST_UNSIGNED u8ExtPred:$src2)))>; + +// Generate cmpgtu(Rs, #u9) +def : Pat <(i1 (setugt (i32 IntRegs:$src1), u9ExtPred:$src2)), + (i1 (CMPGTUri (i32 IntRegs:$src1), u9ExtPred:$src2))>; + +// Map from Rs >= Rt -> !(Rt > Rs). +// rs >= rt -> !(rt > rs). +def : Pat <(i1 (setuge (i32 IntRegs:$src1), (i32 IntRegs:$src2))), + (i1 (NOT_p (CMPGTUrr (i32 IntRegs:$src2), (i32 IntRegs:$src1))))>; + +// Map from Rs >= Rt -> !(Rt > Rs). +// rs >= rt -> !(rt > rs). +def : Pat <(i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), + (i1 (NOT_p (CMPGTU64rr (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1))))>; + +// Map from cmpleu(Rs, Rt) -> !cmpgtu(Rs, Rt). +// Map from (Rs <= Rt) -> !(Rs > Rt). +def : Pat <(i1 (setule (i32 IntRegs:$src1), (i32 IntRegs:$src2))), + (i1 (NOT_p (CMPGTUrr (i32 IntRegs:$src1), (i32 IntRegs:$src2))))>; + +// Map from cmpleu(Rss, Rtt) -> !cmpgtu(Rss, Rtt-1). +// Map from (Rs <= Rt) -> !(Rs > Rt). +def : Pat <(i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), + (i1 (NOT_p (CMPGTU64rr (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))))>; + +// Sign extends. +// i1 -> i32 +def : Pat <(i32 (sext (i1 PredRegs:$src1))), + (i32 (MUX_ii (i1 PredRegs:$src1), -1, 0))>; + +// i1 -> i64 +def : Pat <(i64 (sext (i1 PredRegs:$src1))), + (i64 (COMBINE_rr (TFRI -1), (MUX_ii (i1 PredRegs:$src1), -1, 0)))>; + +// Convert sign-extended load back to load and sign extend. +// i8 -> i64 +def: Pat <(i64 (sextloadi8 ADDRriS11_0:$src1)), + (i64 (SXTW (LDrib ADDRriS11_0:$src1)))>; + +// Convert any-extended load back to load and sign extend. +// i8 -> i64 +def: Pat <(i64 (extloadi8 ADDRriS11_0:$src1)), + (i64 (SXTW (LDrib ADDRriS11_0:$src1)))>; + +// Convert sign-extended load back to load and sign extend. +// i16 -> i64 +def: Pat <(i64 (sextloadi16 ADDRriS11_1:$src1)), + (i64 (SXTW (LDrih ADDRriS11_1:$src1)))>; + +// Convert sign-extended load back to load and sign extend. +// i32 -> i64 +def: Pat <(i64 (sextloadi32 ADDRriS11_2:$src1)), + (i64 (SXTW (LDriw ADDRriS11_2:$src1)))>; + + +// Zero extends. +// i1 -> i32 +def : Pat <(i32 (zext (i1 PredRegs:$src1))), + (i32 (MUX_ii (i1 PredRegs:$src1), 1, 0))>; + +// i1 -> i64 +def : Pat <(i64 (zext (i1 PredRegs:$src1))), + (i64 (COMBINE_rr (TFRI 0), (MUX_ii (i1 PredRegs:$src1), 1, 0)))>, + Requires<[NoV4T]>; + +// i32 -> i64 +def : Pat <(i64 (zext (i32 IntRegs:$src1))), + (i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>, + Requires<[NoV4T]>; + +// i8 -> i64 +def: Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)), + (i64 (COMBINE_rr (TFRI 0), (LDriub ADDRriS11_0:$src1)))>, + Requires<[NoV4T]>; + +let AddedComplexity = 20 in +def: Pat <(i64 (zextloadi8 (add (i32 IntRegs:$src1), + s11_0ExtPred:$offset))), + (i64 (COMBINE_rr (TFRI 0), (LDriub_indexed IntRegs:$src1, + s11_0ExtPred:$offset)))>, + Requires<[NoV4T]>; + +// i1 -> i64 +def: Pat <(i64 (zextloadi1 ADDRriS11_0:$src1)), + (i64 (COMBINE_rr (TFRI 0), (LDriub ADDRriS11_0:$src1)))>, + Requires<[NoV4T]>; + +let AddedComplexity = 20 in +def: Pat <(i64 (zextloadi1 (add (i32 IntRegs:$src1), + s11_0ExtPred:$offset))), + (i64 (COMBINE_rr (TFRI 0), (LDriub_indexed IntRegs:$src1, + s11_0ExtPred:$offset)))>, + Requires<[NoV4T]>; + +// i16 -> i64 +def: Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)), + (i64 (COMBINE_rr (TFRI 0), (LDriuh ADDRriS11_1:$src1)))>, + Requires<[NoV4T]>; + +let AddedComplexity = 20 in +def: Pat <(i64 (zextloadi16 (add (i32 IntRegs:$src1), + s11_1ExtPred:$offset))), + (i64 (COMBINE_rr (TFRI 0), (LDriuh_indexed IntRegs:$src1, + s11_1ExtPred:$offset)))>, + Requires<[NoV4T]>; + +// i32 -> i64 +def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)), + (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>, + Requires<[NoV4T]>; + +let AddedComplexity = 100 in +def: Pat <(i64 (zextloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))), + (i64 (COMBINE_rr (TFRI 0), (LDriw_indexed IntRegs:$src1, + s11_2ExtPred:$offset)))>, + Requires<[NoV4T]>; + +let AddedComplexity = 10 in +def: Pat <(i32 (zextloadi1 ADDRriS11_0:$src1)), + (i32 (LDriw ADDRriS11_0:$src1))>; + +// Map from Rs = Pd to Pd = mux(Pd, #1, #0) +def : Pat <(i32 (zext (i1 PredRegs:$src1))), + (i32 (MUX_ii (i1 PredRegs:$src1), 1, 0))>; + +// Map from Rs = Pd to Pd = mux(Pd, #1, #0) +def : Pat <(i32 (anyext (i1 PredRegs:$src1))), + (i32 (MUX_ii (i1 PredRegs:$src1), 1, 0))>; + +// Map from Rss = Pd to Rdd = sxtw (mux(Pd, #1, #0)) +def : Pat <(i64 (anyext (i1 PredRegs:$src1))), + (i64 (SXTW (i32 (MUX_ii (i1 PredRegs:$src1), 1, 0))))>; + + +let AddedComplexity = 100 in +def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh), + (i32 32))), + (i64 (zextloadi32 (i32 (add IntRegs:$src2, + s11_2ExtPred:$offset2)))))), + (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg), + (LDriw_indexed IntRegs:$src2, + s11_2ExtPred:$offset2)))>; + +def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh), + (i32 32))), + (i64 (zextloadi32 ADDRriS11_2:$srcLow)))), + (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg), + (LDriw ADDRriS11_2:$srcLow)))>; + +def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh), + (i32 32))), + (i64 (zext (i32 IntRegs:$srcLow))))), + (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg), + IntRegs:$srcLow))>; + +let AddedComplexity = 100 in +def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh), + (i32 32))), + (i64 (zextloadi32 (i32 (add IntRegs:$src2, + s11_2ExtPred:$offset2)))))), + (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg), + (LDriw_indexed IntRegs:$src2, + s11_2ExtPred:$offset2)))>; + +def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh), + (i32 32))), + (i64 (zextloadi32 ADDRriS11_2:$srcLow)))), + (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg), + (LDriw ADDRriS11_2:$srcLow)))>; + +def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh), + (i32 32))), + (i64 (zext (i32 IntRegs:$srcLow))))), + (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg), + IntRegs:$srcLow))>; + +// Any extended 64-bit load. +// anyext i32 -> i64 +def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)), + (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>, + Requires<[NoV4T]>; + +// When there is an offset we should prefer the pattern below over the pattern above. +// The complexity of the above is 13 (gleaned from HexagonGenDAGIsel.inc) +// So this complexity below is comfortably higher to allow for choosing the below. +// If this is not done then we generate addresses such as +// ******************************************** +// r1 = add (r0, #4) +// r1 = memw(r1 + #0) +// instead of +// r1 = memw(r0 + #4) +// ******************************************** +let AddedComplexity = 100 in +def: Pat <(i64 (extloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))), + (i64 (COMBINE_rr (TFRI 0), (LDriw_indexed IntRegs:$src1, + s11_2ExtPred:$offset)))>, + Requires<[NoV4T]>; + +// anyext i16 -> i64. +def: Pat <(i64 (extloadi16 ADDRriS11_2:$src1)), + (i64 (COMBINE_rr (TFRI 0), (LDrih ADDRriS11_2:$src1)))>, + Requires<[NoV4T]>; + +let AddedComplexity = 20 in +def: Pat <(i64 (extloadi16 (add (i32 IntRegs:$src1), + s11_1ExtPred:$offset))), + (i64 (COMBINE_rr (TFRI 0), (LDrih_indexed IntRegs:$src1, + s11_1ExtPred:$offset)))>, + Requires<[NoV4T]>; + +// Map from Rdd = zxtw(Rs) -> Rdd = combine(0, Rs). +def : Pat<(i64 (zext (i32 IntRegs:$src1))), + (i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>, + Requires<[NoV4T]>; + +// Multiply 64-bit unsigned and use upper result. +def : Pat <(mulhu (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)), + (i64 + (MPYU64_acc + (i64 + (COMBINE_rr + (TFRI 0), + (i32 + (EXTRACT_SUBREG + (i64 + (LSRd_ri + (i64 + (MPYU64_acc + (i64 + (MPYU64_acc + (i64 + (COMBINE_rr (TFRI 0), + (i32 + (EXTRACT_SUBREG + (i64 + (LSRd_ri + (i64 + (MPYU64 (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), + subreg_loreg)), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), + subreg_loreg)))), 32)), + subreg_loreg)))), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg)), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_loreg)))), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg)), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg)))), + 32)), subreg_loreg)))), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg)), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg))))>; + +// Multiply 64-bit signed and use upper result. +def : Pat <(mulhs (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)), + (i64 + (MPY64_acc + (i64 + (COMBINE_rr (TFRI 0), + (i32 + (EXTRACT_SUBREG + (i64 + (LSRd_ri + (i64 + (MPY64_acc + (i64 + (MPY64_acc + (i64 + (COMBINE_rr (TFRI 0), + (i32 + (EXTRACT_SUBREG + (i64 + (LSRd_ri + (i64 + (MPYU64 (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), + subreg_loreg)), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), + subreg_loreg)))), 32)), + subreg_loreg)))), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg)), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_loreg)))), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg)), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg)))), + 32)), subreg_loreg)))), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg)), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg))))>; + +// Hexagon specific ISD nodes. +//def SDTHexagonADJDYNALLOC : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>]>; +def SDTHexagonADJDYNALLOC : SDTypeProfile<1, 2, + [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; +def Hexagon_ADJDYNALLOC : SDNode<"HexagonISD::ADJDYNALLOC", + SDTHexagonADJDYNALLOC>; +// Needed to tag these instructions for stack layout. +let usesCustomInserter = 1 in +def ADJDYNALLOC : ALU32_ri<(outs IntRegs:$dst), (ins IntRegs:$src1, + s16Imm:$src2), + "$dst = add($src1, #$src2)", + [(set (i32 IntRegs:$dst), + (Hexagon_ADJDYNALLOC (i32 IntRegs:$src1), + s16ImmPred:$src2))]>; + +def SDTHexagonARGEXTEND : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>; +def Hexagon_ARGEXTEND : SDNode<"HexagonISD::ARGEXTEND", SDTHexagonARGEXTEND>; +def ARGEXTEND : ALU32_rr <(outs IntRegs:$dst), (ins IntRegs:$src1), + "$dst = $src1", + [(set (i32 IntRegs:$dst), + (Hexagon_ARGEXTEND (i32 IntRegs:$src1)))]>; + +let AddedComplexity = 100 in +def : Pat<(i32 (sext_inreg (Hexagon_ARGEXTEND (i32 IntRegs:$src1)), i16)), + (COPY (i32 IntRegs:$src1))>; + +def HexagonWrapperJT: SDNode<"HexagonISD::WrapperJT", SDTIntUnaryOp>; + +def : Pat<(HexagonWrapperJT tjumptable:$dst), + (i32 (CONST32_set_jt tjumptable:$dst))>; + +// XTYPE/SHIFT + +// Multi-class for logical operators : +// Shift by immediate/register and accumulate/logical +multiclass xtype_imm<string OpcStr, SDNode OpNode1, SDNode OpNode2> { + def _ri : SInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, u5Imm:$src3), + !strconcat("$dst ", !strconcat(OpcStr, "($src2, #$src3)")), + [(set (i32 IntRegs:$dst), + (OpNode2 (i32 IntRegs:$src1), + (OpNode1 (i32 IntRegs:$src2), + u5ImmPred:$src3)))], + "$src1 = $dst">; + + def d_ri : SInst_acc<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, DoubleRegs:$src2, u6Imm:$src3), + !strconcat("$dst ", !strconcat(OpcStr, "($src2, #$src3)")), + [(set (i64 DoubleRegs:$dst), (OpNode2 (i64 DoubleRegs:$src1), + (OpNode1 (i64 DoubleRegs:$src2), u6ImmPred:$src3)))], + "$src1 = $dst">; +} + +// Multi-class for logical operators : +// Shift by register and accumulate/logical (32/64 bits) +multiclass xtype_reg<string OpcStr, SDNode OpNode1, SDNode OpNode2> { + def _rr : SInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), + !strconcat("$dst ", !strconcat(OpcStr, "($src2, $src3)")), + [(set (i32 IntRegs:$dst), + (OpNode2 (i32 IntRegs:$src1), + (OpNode1 (i32 IntRegs:$src2), + (i32 IntRegs:$src3))))], + "$src1 = $dst">; + + def d_rr : SInst_acc<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), + !strconcat("$dst ", !strconcat(OpcStr, "($src2, $src3)")), + [(set (i64 DoubleRegs:$dst), + (OpNode2 (i64 DoubleRegs:$src1), + (OpNode1 (i64 DoubleRegs:$src2), + (i32 IntRegs:$src3))))], + "$src1 = $dst">; + +} + +multiclass basic_xtype_imm<string OpcStr, SDNode OpNode> { +let AddedComplexity = 100 in + defm _ADD : xtype_imm< !strconcat("+= ", OpcStr), OpNode, add>; + defm _SUB : xtype_imm< !strconcat("-= ", OpcStr), OpNode, sub>; + defm _AND : xtype_imm< !strconcat("&= ", OpcStr), OpNode, and>; + defm _OR : xtype_imm< !strconcat("|= ", OpcStr), OpNode, or>; +} + +multiclass basic_xtype_reg<string OpcStr, SDNode OpNode> { +let AddedComplexity = 100 in + defm _ADD : xtype_reg< !strconcat("+= ", OpcStr), OpNode, add>; + defm _SUB : xtype_reg< !strconcat("-= ", OpcStr), OpNode, sub>; + defm _AND : xtype_reg< !strconcat("&= ", OpcStr), OpNode, and>; + defm _OR : xtype_reg< !strconcat("|= ", OpcStr), OpNode, or>; +} + +multiclass xtype_xor_imm<string OpcStr, SDNode OpNode> { +let AddedComplexity = 100 in + defm _XOR : xtype_imm< !strconcat("^= ", OpcStr), OpNode, xor>; +} + +defm ASL : basic_xtype_imm<"asl", shl>, basic_xtype_reg<"asl", shl>, + xtype_xor_imm<"asl", shl>; + +defm LSR : basic_xtype_imm<"lsr", srl>, basic_xtype_reg<"lsr", srl>, + xtype_xor_imm<"lsr", srl>; + +defm ASR : basic_xtype_imm<"asr", sra>, basic_xtype_reg<"asr", sra>; +defm LSL : basic_xtype_reg<"lsl", shl>; + +// Change the sign of the immediate for Rd=-mpyi(Rs,#u8) +def : Pat <(mul (i32 IntRegs:$src1), (ineg n8ImmPred:$src2)), + (i32 (MPYI_rin (i32 IntRegs:$src1), u8ImmPred:$src2))>; + +//===----------------------------------------------------------------------===// +// V3 Instructions + +//===----------------------------------------------------------------------===// + +include "HexagonInstrInfoV3.td" + +//===----------------------------------------------------------------------===// +// V3 Instructions - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// V4 Instructions + +//===----------------------------------------------------------------------===// + +include "HexagonInstrInfoV4.td" + +//===----------------------------------------------------------------------===// +// V4 Instructions - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// V5 Instructions + +//===----------------------------------------------------------------------===// + +include "HexagonInstrInfoV5.td" + +//===----------------------------------------------------------------------===// +// V5 Instructions - +//===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td new file mode 100644 index 000000000000..7e75554e7fcd --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td @@ -0,0 +1,107 @@ +//=- HexagonInstrInfoV3.td - Target Desc. for Hexagon Target -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hexagon V3 instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + +def callv3 : SDNode<"HexagonISD::CALLv3", SDT_SPCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; + +def callv3nr : SDNode<"HexagonISD::CALLv3nr", SDT_SPCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; + +//===----------------------------------------------------------------------===// +// J + +//===----------------------------------------------------------------------===// +// Call subroutine. +let isCall = 1, neverHasSideEffects = 1, + Defs = [D0, D1, D2, D3, D4, D5, D6, D7, R28, R31, + P0, P1, P2, P3, LC0, LC1, SA0, SA1] in { + def CALLv3 : JInst<(outs), (ins calltarget:$dst), + "call $dst", []>, Requires<[HasV3T]>; +} + +//===----------------------------------------------------------------------===// +// J - +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// JR + +//===----------------------------------------------------------------------===// +// Call subroutine from register. +let isCall = 1, neverHasSideEffects = 1, + Defs = [D0, D1, D2, D3, D4, D5, D6, D7, R28, R31, + P0, P1, P2, P3, LC0, LC1, SA0, SA1] in { + def CALLRv3 : JRInst<(outs), (ins IntRegs:$dst), + "callr $dst", + []>, Requires<[HasV3TOnly]>; + } + +//===----------------------------------------------------------------------===// +// JR - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ALU64/ALU + +//===----------------------------------------------------------------------===// + +let AddedComplexity = 200 in +def MAXw_dd : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2), + "$dst = max($src2, $src1)", + [(set (i64 DoubleRegs:$dst), + (i64 (select (i1 (setlt (i64 DoubleRegs:$src2), + (i64 DoubleRegs:$src1))), + (i64 DoubleRegs:$src1), + (i64 DoubleRegs:$src2))))]>, +Requires<[HasV3T]>; + +let AddedComplexity = 200 in +def MINw_dd : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2), + "$dst = min($src2, $src1)", + [(set (i64 DoubleRegs:$dst), + (i64 (select (i1 (setgt (i64 DoubleRegs:$src2), + (i64 DoubleRegs:$src1))), + (i64 DoubleRegs:$src1), + (i64 DoubleRegs:$src2))))]>, +Requires<[HasV3T]>; + +//===----------------------------------------------------------------------===// +// ALU64/ALU - +//===----------------------------------------------------------------------===// + + + + +//def : Pat <(brcond (i1 (seteq (i32 IntRegs:$src1), 0)), bb:$offset), +// (JMP_RegEzt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>; + +//def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), 0)), bb:$offset), +// (JMP_RegNzt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>; + +//def : Pat <(brcond (i1 (setle (i32 IntRegs:$src1), 0)), bb:$offset), +// (JMP_RegLezt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>; + +//def : Pat <(brcond (i1 (setge (i32 IntRegs:$src1), 0)), bb:$offset), +// (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>; + +//def : Pat <(brcond (i1 (setgt (i32 IntRegs:$src1), -1)), bb:$offset), +// (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>; + + +// Map call instruction +def : Pat<(call (i32 IntRegs:$dst)), + (CALLRv3 (i32 IntRegs:$dst))>, Requires<[HasV3T]>; +def : Pat<(call tglobaladdr:$dst), + (CALLv3 tglobaladdr:$dst)>, Requires<[HasV3T]>; +def : Pat<(call texternalsym:$dst), + (CALLv3 texternalsym:$dst)>, Requires<[HasV3T]>; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td new file mode 100644 index 000000000000..475c23d98bf7 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -0,0 +1,3392 @@ +//=- HexagonInstrInfoV4.td - Target Desc. for Hexagon Target -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hexagon V4 instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + +let neverHasSideEffects = 1 in +class T_Immext<dag ins> : + EXTENDERInst<(outs), ins, "immext(#$imm)", []>, + Requires<[HasV4T]>; + +def IMMEXT_b : T_Immext<(ins brtarget:$imm)>; +def IMMEXT_c : T_Immext<(ins calltarget:$imm)>; +def IMMEXT_g : T_Immext<(ins globaladdress:$imm)>; +def IMMEXT_i : T_Immext<(ins u26_6Imm:$imm)>; + +// Fold (add (CONST32 tglobaladdr:$addr) <offset>) into a global address. +def FoldGlobalAddr : ComplexPattern<i32, 1, "foldGlobalAddress", [], []>; + +// Fold (add (CONST32_GP tglobaladdr:$addr) <offset>) into a global address. +def FoldGlobalAddrGP : ComplexPattern<i32, 1, "foldGlobalAddressGP", [], []>; + +def NumUsesBelowThresCONST32 : PatFrag<(ops node:$addr), + (HexagonCONST32 node:$addr), [{ + return hasNumUsesBelowThresGA(N->getOperand(0).getNode()); +}]>; + +// Hexagon V4 Architecture spec defines 8 instruction classes: +// LD ST ALU32 XTYPE J JR MEMOP NV CR SYSTEM(system is not implemented in the +// compiler) + +// LD Instructions: +// ======================================== +// Loads (8/16/32/64 bit) +// Deallocframe + +// ST Instructions: +// ======================================== +// Stores (8/16/32/64 bit) +// Allocframe + +// ALU32 Instructions: +// ======================================== +// Arithmetic / Logical (32 bit) +// Vector Halfword + +// XTYPE Instructions (32/64 bit): +// ======================================== +// Arithmetic, Logical, Bit Manipulation +// Multiply (Integer, Fractional, Complex) +// Permute / Vector Permute Operations +// Predicate Operations +// Shift / Shift with Add/Sub/Logical +// Vector Byte ALU +// Vector Halfword (ALU, Shift, Multiply) +// Vector Word (ALU, Shift) + +// J Instructions: +// ======================================== +// Jump/Call PC-relative + +// JR Instructions: +// ======================================== +// Jump/Call Register + +// MEMOP Instructions: +// ======================================== +// Operation on memory (8/16/32 bit) + +// NV Instructions: +// ======================================== +// New-value Jumps +// New-value Stores + +// CR Instructions: +// ======================================== +// Control-Register Transfers +// Hardware Loop Setup +// Predicate Logicals & Reductions + +// SYSTEM Instructions (not implemented in the compiler): +// ======================================== +// Prefetch +// Cache Maintenance +// Bus Operations + + +//===----------------------------------------------------------------------===// +// ALU32 + +//===----------------------------------------------------------------------===// +// Generate frame index addresses. +let neverHasSideEffects = 1, isReMaterializable = 1, +isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT in +def TFR_FI_immext_V4 : ALU32_ri<(outs IntRegs:$dst), + (ins IntRegs:$src1, s32Imm:$offset), + "$dst = add($src1, ##$offset)", + []>, + Requires<[HasV4T]>; + +// Rd=cmp.eq(Rs,#s8) +let validSubTargets = HasV4SubT, isExtendable = 1, opExtendable = 2, +isExtentSigned = 1, opExtentBits = 8 in +def V4_A4_rcmpeqi : ALU32_ri<(outs IntRegs:$Rd), + (ins IntRegs:$Rs, s8Ext:$s8), + "$Rd = cmp.eq($Rs, #$s8)", + [(set (i32 IntRegs:$Rd), + (i32 (zext (i1 (seteq (i32 IntRegs:$Rs), + s8ExtPred:$s8)))))]>, + Requires<[HasV4T]>; + +// Preserve the TSTBIT generation +def : Pat <(i32 (zext (i1 (setne (i32 (and (i32 (shl 1, (i32 IntRegs:$src2))), + (i32 IntRegs:$src1))), 0)))), + (i32 (MUX_ii (i1 (TSTBIT_rr (i32 IntRegs:$src1), (i32 IntRegs:$src2))), + 1, 0))>; + +// Interfered with tstbit generation, above pattern preserves, see : tstbit.ll +// Rd=cmp.ne(Rs,#s8) +let validSubTargets = HasV4SubT, isExtendable = 1, opExtendable = 2, +isExtentSigned = 1, opExtentBits = 8 in +def V4_A4_rcmpneqi : ALU32_ri<(outs IntRegs:$Rd), + (ins IntRegs:$Rs, s8Ext:$s8), + "$Rd = !cmp.eq($Rs, #$s8)", + [(set (i32 IntRegs:$Rd), + (i32 (zext (i1 (setne (i32 IntRegs:$Rs), + s8ExtPred:$s8)))))]>, + Requires<[HasV4T]>; + +// Rd=cmp.eq(Rs,Rt) +let validSubTargets = HasV4SubT in +def V4_A4_rcmpeq : ALU32_ri<(outs IntRegs:$Rd), + (ins IntRegs:$Rs, IntRegs:$Rt), + "$Rd = cmp.eq($Rs, $Rt)", + [(set (i32 IntRegs:$Rd), + (i32 (zext (i1 (seteq (i32 IntRegs:$Rs), + IntRegs:$Rt)))))]>, + Requires<[HasV4T]>; + +// Rd=cmp.ne(Rs,Rt) +let validSubTargets = HasV4SubT in +def V4_A4_rcmpneq : ALU32_ri<(outs IntRegs:$Rd), + (ins IntRegs:$Rs, IntRegs:$Rt), + "$Rd = !cmp.eq($Rs, $Rt)", + [(set (i32 IntRegs:$Rd), + (i32 (zext (i1 (setne (i32 IntRegs:$Rs), + IntRegs:$Rt)))))]>, + Requires<[HasV4T]>; + +//===----------------------------------------------------------------------===// +// ALU32 - +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// ALU32/PERM + +//===----------------------------------------------------------------------===// + +// Combine +// Rdd=combine(Rs, #s8) +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8, + neverHasSideEffects = 1, validSubTargets = HasV4SubT in +def COMBINE_rI_V4 : ALU32_ri<(outs DoubleRegs:$dst), + (ins IntRegs:$src1, s8Ext:$src2), + "$dst = combine($src1, #$src2)", + []>, + Requires<[HasV4T]>; + +// Rdd=combine(#s8, Rs) +let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 8, + neverHasSideEffects = 1, validSubTargets = HasV4SubT in +def COMBINE_Ir_V4 : ALU32_ir<(outs DoubleRegs:$dst), + (ins s8Ext:$src1, IntRegs:$src2), + "$dst = combine(#$src1, $src2)", + []>, + Requires<[HasV4T]>; + +def HexagonWrapperCombineRI_V4 : + SDNode<"HexagonISD::WrapperCombineRI_V4", SDTHexagonI64I32I32>; +def HexagonWrapperCombineIR_V4 : + SDNode<"HexagonISD::WrapperCombineIR_V4", SDTHexagonI64I32I32>; + +def : Pat <(HexagonWrapperCombineRI_V4 IntRegs:$r, s8ExtPred:$i), + (COMBINE_rI_V4 IntRegs:$r, s8ExtPred:$i)>, + Requires<[HasV4T]>; + +def : Pat <(HexagonWrapperCombineIR_V4 s8ExtPred:$i, IntRegs:$r), + (COMBINE_Ir_V4 s8ExtPred:$i, IntRegs:$r)>, + Requires<[HasV4T]>; + +let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 6, + neverHasSideEffects = 1, validSubTargets = HasV4SubT in +def COMBINE_iI_V4 : ALU32_ii<(outs DoubleRegs:$dst), + (ins s8Imm:$src1, u6Ext:$src2), + "$dst = combine(#$src1, #$src2)", + []>, + Requires<[HasV4T]>; + +//===----------------------------------------------------------------------===// +// ALU32/PERM + +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// LD + +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// Template class for load instructions with Absolute set addressing mode. +//===----------------------------------------------------------------------===// +let isExtended = 1, opExtendable = 2, neverHasSideEffects = 1, +validSubTargets = HasV4SubT, addrMode = AbsoluteSet in +class T_LD_abs_set<string mnemonic, RegisterClass RC>: + LDInst2<(outs RC:$dst1, IntRegs:$dst2), + (ins u0AlwaysExt:$addr), + "$dst1 = "#mnemonic#"($dst2=##$addr)", + []>, + Requires<[HasV4T]>; + +def LDrid_abs_set_V4 : T_LD_abs_set <"memd", DoubleRegs>; +def LDrib_abs_set_V4 : T_LD_abs_set <"memb", IntRegs>; +def LDriub_abs_set_V4 : T_LD_abs_set <"memub", IntRegs>; +def LDrih_abs_set_V4 : T_LD_abs_set <"memh", IntRegs>; +def LDriw_abs_set_V4 : T_LD_abs_set <"memw", IntRegs>; +def LDriuh_abs_set_V4 : T_LD_abs_set <"memuh", IntRegs>; + + +// multiclass for load instructions with base + register offset +// addressing mode +multiclass ld_idxd_shl_pbase<string mnemonic, RegisterClass RC, bit isNot, + bit isPredNew> { + let isPredicatedNew = isPredNew in + def NAME : LDInst2<(outs RC:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#"$dst = "#mnemonic#"($src2+$src3<<#$offset)", + []>, Requires<[HasV4T]>; +} + +multiclass ld_idxd_shl_pred<string mnemonic, RegisterClass RC, bit PredNot> { + let isPredicatedFalse = PredNot in { + defm _c#NAME : ld_idxd_shl_pbase<mnemonic, RC, PredNot, 0>; + // Predicate new + defm _cdn#NAME : ld_idxd_shl_pbase<mnemonic, RC, PredNot, 1>; + } +} + +let neverHasSideEffects = 1 in +multiclass ld_idxd_shl<string mnemonic, string CextOp, RegisterClass RC> { + let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl in { + let isPredicable = 1 in + def NAME#_V4 : LDInst2<(outs RC:$dst), + (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset), + "$dst = "#mnemonic#"($src1+$src2<<#$offset)", + []>, Requires<[HasV4T]>; + + let isPredicated = 1 in { + defm Pt_V4 : ld_idxd_shl_pred<mnemonic, RC, 0 >; + defm NotPt_V4 : ld_idxd_shl_pred<mnemonic, RC, 1>; + } + } +} + +let addrMode = BaseRegOffset in { + let accessSize = ByteAccess in { + defm LDrib_indexed_shl: ld_idxd_shl<"memb", "LDrib", IntRegs>, + AddrModeRel; + defm LDriub_indexed_shl: ld_idxd_shl<"memub", "LDriub", IntRegs>, + AddrModeRel; + } + let accessSize = HalfWordAccess in { + defm LDrih_indexed_shl: ld_idxd_shl<"memh", "LDrih", IntRegs>, AddrModeRel; + defm LDriuh_indexed_shl: ld_idxd_shl<"memuh", "LDriuh", IntRegs>, + AddrModeRel; + } + let accessSize = WordAccess in + defm LDriw_indexed_shl: ld_idxd_shl<"memw", "LDriw", IntRegs>, AddrModeRel; + + let accessSize = DoubleWordAccess in + defm LDrid_indexed_shl: ld_idxd_shl<"memd", "LDrid", DoubleRegs>, + AddrModeRel; +} + +// 'def pats' for load instructions with base + register offset and non-zero +// immediate value. Immediate value is used to left-shift the second +// register operand. +let AddedComplexity = 40 in { +def : Pat <(i32 (sextloadi8 (add IntRegs:$src1, + (shl IntRegs:$src2, u2ImmPred:$offset)))), + (LDrib_indexed_shl_V4 IntRegs:$src1, + IntRegs:$src2, u2ImmPred:$offset)>, + Requires<[HasV4T]>; + +def : Pat <(i32 (zextloadi8 (add IntRegs:$src1, + (shl IntRegs:$src2, u2ImmPred:$offset)))), + (LDriub_indexed_shl_V4 IntRegs:$src1, + IntRegs:$src2, u2ImmPred:$offset)>, + Requires<[HasV4T]>; + +def : Pat <(i32 (extloadi8 (add IntRegs:$src1, + (shl IntRegs:$src2, u2ImmPred:$offset)))), + (LDriub_indexed_shl_V4 IntRegs:$src1, + IntRegs:$src2, u2ImmPred:$offset)>, + Requires<[HasV4T]>; + +def : Pat <(i32 (sextloadi16 (add IntRegs:$src1, + (shl IntRegs:$src2, u2ImmPred:$offset)))), + (LDrih_indexed_shl_V4 IntRegs:$src1, + IntRegs:$src2, u2ImmPred:$offset)>, + Requires<[HasV4T]>; + +def : Pat <(i32 (zextloadi16 (add IntRegs:$src1, + (shl IntRegs:$src2, u2ImmPred:$offset)))), + (LDriuh_indexed_shl_V4 IntRegs:$src1, + IntRegs:$src2, u2ImmPred:$offset)>, + Requires<[HasV4T]>; + +def : Pat <(i32 (extloadi16 (add IntRegs:$src1, + (shl IntRegs:$src2, u2ImmPred:$offset)))), + (LDriuh_indexed_shl_V4 IntRegs:$src1, + IntRegs:$src2, u2ImmPred:$offset)>, + Requires<[HasV4T]>; + +def : Pat <(i32 (load (add IntRegs:$src1, + (shl IntRegs:$src2, u2ImmPred:$offset)))), + (LDriw_indexed_shl_V4 IntRegs:$src1, + IntRegs:$src2, u2ImmPred:$offset)>, + Requires<[HasV4T]>; + +def : Pat <(i64 (load (add IntRegs:$src1, + (shl IntRegs:$src2, u2ImmPred:$offset)))), + (LDrid_indexed_shl_V4 IntRegs:$src1, + IntRegs:$src2, u2ImmPred:$offset)>, + Requires<[HasV4T]>; +} + + +// 'def pats' for load instruction base + register offset and +// zero immediate value. +let AddedComplexity = 10 in { +def : Pat <(i64 (load (add IntRegs:$src1, IntRegs:$src2))), + (LDrid_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>, + Requires<[HasV4T]>; + +def : Pat <(i32 (sextloadi8 (add IntRegs:$src1, IntRegs:$src2))), + (LDrib_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>, + Requires<[HasV4T]>; + +def : Pat <(i32 (zextloadi8 (add IntRegs:$src1, IntRegs:$src2))), + (LDriub_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>, + Requires<[HasV4T]>; + +def : Pat <(i32 (extloadi8 (add IntRegs:$src1, IntRegs:$src2))), + (LDriub_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>, + Requires<[HasV4T]>; + +def : Pat <(i32 (sextloadi16 (add IntRegs:$src1, IntRegs:$src2))), + (LDrih_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>, + Requires<[HasV4T]>; + +def : Pat <(i32 (zextloadi16 (add IntRegs:$src1, IntRegs:$src2))), + (LDriuh_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>, + Requires<[HasV4T]>; + +def : Pat <(i32 (extloadi16 (add IntRegs:$src1, IntRegs:$src2))), + (LDriuh_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>, + Requires<[HasV4T]>; + +def : Pat <(i32 (load (add IntRegs:$src1, IntRegs:$src2))), + (LDriw_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>, + Requires<[HasV4T]>; +} + +// zext i1->i64 +def : Pat <(i64 (zext (i1 PredRegs:$src1))), + (i64 (COMBINE_Ir_V4 0, (MUX_ii (i1 PredRegs:$src1), 1, 0)))>, + Requires<[HasV4T]>; + +// zext i32->i64 +def : Pat <(i64 (zext (i32 IntRegs:$src1))), + (i64 (COMBINE_Ir_V4 0, (i32 IntRegs:$src1)))>, + Requires<[HasV4T]>; +// zext i8->i64 +def: Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)), + (i64 (COMBINE_Ir_V4 0, (LDriub ADDRriS11_0:$src1)))>, + Requires<[HasV4T]>; + +let AddedComplexity = 20 in +def: Pat <(i64 (zextloadi8 (add (i32 IntRegs:$src1), + s11_0ExtPred:$offset))), + (i64 (COMBINE_Ir_V4 0, (LDriub_indexed IntRegs:$src1, + s11_0ExtPred:$offset)))>, + Requires<[HasV4T]>; + +// zext i1->i64 +def: Pat <(i64 (zextloadi1 ADDRriS11_0:$src1)), + (i64 (COMBINE_Ir_V4 0, (LDriub ADDRriS11_0:$src1)))>, + Requires<[HasV4T]>; + +let AddedComplexity = 20 in +def: Pat <(i64 (zextloadi1 (add (i32 IntRegs:$src1), + s11_0ExtPred:$offset))), + (i64 (COMBINE_Ir_V4 0, (LDriub_indexed IntRegs:$src1, + s11_0ExtPred:$offset)))>, + Requires<[HasV4T]>; + +// zext i16->i64 +def: Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)), + (i64 (COMBINE_Ir_V4 0, (LDriuh ADDRriS11_1:$src1)))>, + Requires<[HasV4T]>; + +let AddedComplexity = 20 in +def: Pat <(i64 (zextloadi16 (add (i32 IntRegs:$src1), + s11_1ExtPred:$offset))), + (i64 (COMBINE_Ir_V4 0, (LDriuh_indexed IntRegs:$src1, + s11_1ExtPred:$offset)))>, + Requires<[HasV4T]>; + +// anyext i16->i64 +def: Pat <(i64 (extloadi16 ADDRriS11_2:$src1)), + (i64 (COMBINE_Ir_V4 0, (LDrih ADDRriS11_2:$src1)))>, + Requires<[HasV4T]>; + +let AddedComplexity = 20 in +def: Pat <(i64 (extloadi16 (add (i32 IntRegs:$src1), + s11_1ExtPred:$offset))), + (i64 (COMBINE_Ir_V4 0, (LDrih_indexed IntRegs:$src1, + s11_1ExtPred:$offset)))>, + Requires<[HasV4T]>; + +// zext i32->i64 +def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)), + (i64 (COMBINE_Ir_V4 0, (LDriw ADDRriS11_2:$src1)))>, + Requires<[HasV4T]>; + +let AddedComplexity = 100 in +def: Pat <(i64 (zextloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))), + (i64 (COMBINE_Ir_V4 0, (LDriw_indexed IntRegs:$src1, + s11_2ExtPred:$offset)))>, + Requires<[HasV4T]>; + +// anyext i32->i64 +def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)), + (i64 (COMBINE_Ir_V4 0, (LDriw ADDRriS11_2:$src1)))>, + Requires<[HasV4T]>; + +let AddedComplexity = 100 in +def: Pat <(i64 (extloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))), + (i64 (COMBINE_Ir_V4 0, (LDriw_indexed IntRegs:$src1, + s11_2ExtPred:$offset)))>, + Requires<[HasV4T]>; + + + +//===----------------------------------------------------------------------===// +// LD - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ST + +//===----------------------------------------------------------------------===// +/// +//===----------------------------------------------------------------------===// +// Template class for store instructions with Absolute set addressing mode. +//===----------------------------------------------------------------------===// +let isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT, +addrMode = AbsoluteSet in +class T_ST_abs_set<string mnemonic, RegisterClass RC>: + STInst2<(outs IntRegs:$dst1), + (ins RC:$src1, u0AlwaysExt:$src2), + mnemonic#"($dst1=##$src2) = $src1", + []>, + Requires<[HasV4T]>; + +def STrid_abs_set_V4 : T_ST_abs_set <"memd", DoubleRegs>; +def STrib_abs_set_V4 : T_ST_abs_set <"memb", IntRegs>; +def STrih_abs_set_V4 : T_ST_abs_set <"memh", IntRegs>; +def STriw_abs_set_V4 : T_ST_abs_set <"memw", IntRegs>; + +//===----------------------------------------------------------------------===// +// multiclass for store instructions with base + register offset addressing +// mode +//===----------------------------------------------------------------------===// +multiclass ST_Idxd_shl_Pbase<string mnemonic, RegisterClass RC, bit isNot, + bit isPredNew> { + let isPredicatedNew = isPredNew in + def NAME : STInst2<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + RC:$src5), + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#mnemonic#"($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; +} + +multiclass ST_Idxd_shl_Pred<string mnemonic, RegisterClass RC, bit PredNot> { + let isPredicatedFalse = PredNot in { + defm _c#NAME : ST_Idxd_shl_Pbase<mnemonic, RC, PredNot, 0>; + // Predicate new + defm _cdn#NAME : ST_Idxd_shl_Pbase<mnemonic, RC, PredNot, 1>; + } +} + +let isNVStorable = 1 in +multiclass ST_Idxd_shl<string mnemonic, string CextOp, RegisterClass RC> { + let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl in { + let isPredicable = 1 in + def NAME#_V4 : STInst2<(outs), + (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, RC:$src4), + mnemonic#"($src1+$src2<<#$src3) = $src4", + []>, + Requires<[HasV4T]>; + + let isPredicated = 1 in { + defm Pt_V4 : ST_Idxd_shl_Pred<mnemonic, RC, 0 >; + defm NotPt_V4 : ST_Idxd_shl_Pred<mnemonic, RC, 1>; + } + } +} + +// multiclass for new-value store instructions with base + register offset +// addressing mode. +multiclass ST_Idxd_shl_Pbase_nv<string mnemonic, RegisterClass RC, bit isNot, + bit isPredNew> { + let isPredicatedNew = isPredNew in + def NAME#_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + RC:$src5), + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#mnemonic#"($src2+$src3<<#$src4) = $src5.new", + []>, + Requires<[HasV4T]>; +} + +multiclass ST_Idxd_shl_Pred_nv<string mnemonic, RegisterClass RC, bit PredNot> { + let isPredicatedFalse = PredNot in { + defm _c#NAME : ST_Idxd_shl_Pbase_nv<mnemonic, RC, PredNot, 0>; + // Predicate new + defm _cdn#NAME : ST_Idxd_shl_Pbase_nv<mnemonic, RC, PredNot, 1>; + } +} + +let mayStore = 1, isNVStore = 1 in +multiclass ST_Idxd_shl_nv<string mnemonic, string CextOp, RegisterClass RC> { + let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl in { + let isPredicable = 1 in + def NAME#_nv_V4 : NVInst_V4<(outs), + (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, RC:$src4), + mnemonic#"($src1+$src2<<#$src3) = $src4.new", + []>, + Requires<[HasV4T]>; + + let isPredicated = 1 in { + defm Pt : ST_Idxd_shl_Pred_nv<mnemonic, RC, 0 >; + defm NotPt : ST_Idxd_shl_Pred_nv<mnemonic, RC, 1>; + } + } +} + +let addrMode = BaseRegOffset, neverHasSideEffects = 1, +validSubTargets = HasV4SubT in { + let accessSize = ByteAccess in + defm STrib_indexed_shl: ST_Idxd_shl<"memb", "STrib", IntRegs>, + ST_Idxd_shl_nv<"memb", "STrib", IntRegs>, AddrModeRel; + + let accessSize = HalfWordAccess in + defm STrih_indexed_shl: ST_Idxd_shl<"memh", "STrih", IntRegs>, + ST_Idxd_shl_nv<"memh", "STrih", IntRegs>, AddrModeRel; + + let accessSize = WordAccess in + defm STriw_indexed_shl: ST_Idxd_shl<"memw", "STriw", IntRegs>, + ST_Idxd_shl_nv<"memw", "STriw", IntRegs>, AddrModeRel; + + let isNVStorable = 0, accessSize = DoubleWordAccess in + defm STrid_indexed_shl: ST_Idxd_shl<"memd", "STrid", DoubleRegs>, AddrModeRel; +} + +let Predicates = [HasV4T], AddedComplexity = 10 in { +def : Pat<(truncstorei8 (i32 IntRegs:$src4), + (add IntRegs:$src1, (shl IntRegs:$src2, + u2ImmPred:$src3))), + (STrib_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, + u2ImmPred:$src3, IntRegs:$src4)>; + +def : Pat<(truncstorei16 (i32 IntRegs:$src4), + (add IntRegs:$src1, (shl IntRegs:$src2, + u2ImmPred:$src3))), + (STrih_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, + u2ImmPred:$src3, IntRegs:$src4)>; + +def : Pat<(store (i32 IntRegs:$src4), + (add IntRegs:$src1, (shl IntRegs:$src2, u2ImmPred:$src3))), + (STriw_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, + u2ImmPred:$src3, IntRegs:$src4)>; + +def : Pat<(store (i64 DoubleRegs:$src4), + (add IntRegs:$src1, (shl IntRegs:$src2, u2ImmPred:$src3))), + (STrid_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, + u2ImmPred:$src3, DoubleRegs:$src4)>; +} + +let isExtended = 1, opExtendable = 2 in +class T_ST_LongOff <string mnemonic, PatFrag stOp, RegisterClass RC, ValueType VT> : + STInst<(outs), + (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, RC:$src4), + mnemonic#"($src1<<#$src2+##$src3) = $src4", + [(stOp (VT RC:$src4), + (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2), + u0AlwaysExtPred:$src3))]>, + Requires<[HasV4T]>; + +let isExtended = 1, opExtendable = 2, mayStore = 1, isNVStore = 1 in +class T_ST_LongOff_nv <string mnemonic> : + NVInst_V4<(outs), + (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4), + mnemonic#"($src1<<#$src2+##$src3) = $src4.new", + []>, + Requires<[HasV4T]>; + +multiclass ST_LongOff <string mnemonic, string BaseOp, PatFrag stOp> { + let BaseOpcode = BaseOp#"_shl" in { + let isNVStorable = 1 in + def NAME#_V4 : T_ST_LongOff<mnemonic, stOp, IntRegs, i32>; + + def NAME#_nv_V4 : T_ST_LongOff_nv<mnemonic>; + } +} + +let AddedComplexity = 10, validSubTargets = HasV4SubT in { + def STrid_shl_V4 : T_ST_LongOff<"memd", store, DoubleRegs, i64>; + defm STrib_shl : ST_LongOff <"memb", "STrib", truncstorei8>, NewValueRel; + defm STrih_shl : ST_LongOff <"memh", "Strih", truncstorei16>, NewValueRel; + defm STriw_shl : ST_LongOff <"memw", "STriw", store>, NewValueRel; +} + +let AddedComplexity = 40 in +multiclass T_ST_LOff_Pats <InstHexagon I, RegisterClass RC, ValueType VT, + PatFrag stOp> { + def : Pat<(stOp (VT RC:$src4), + (add (shl IntRegs:$src1, u2ImmPred:$src2), + (NumUsesBelowThresCONST32 tglobaladdr:$src3))), + (I IntRegs:$src1, u2ImmPred:$src2, tglobaladdr:$src3, RC:$src4)>; + + def : Pat<(stOp (VT RC:$src4), + (add IntRegs:$src1, + (NumUsesBelowThresCONST32 tglobaladdr:$src3))), + (I IntRegs:$src1, 0, tglobaladdr:$src3, RC:$src4)>; +} + +defm : T_ST_LOff_Pats<STrid_shl_V4, DoubleRegs, i64, store>; +defm : T_ST_LOff_Pats<STriw_shl_V4, IntRegs, i32, store>; +defm : T_ST_LOff_Pats<STrib_shl_V4, IntRegs, i32, truncstorei8>; +defm : T_ST_LOff_Pats<STrih_shl_V4, IntRegs, i32, truncstorei16>; + +// memd(Rx++#s4:3)=Rtt +// memd(Rx++#s4:3:circ(Mu))=Rtt +// memd(Rx++I:circ(Mu))=Rtt +// memd(Rx++Mu)=Rtt +// memd(Rx++Mu:brev)=Rtt +// memd(gp+#u16:3)=Rtt + +// Store doubleword conditionally. +// if ([!]Pv[.new]) memd(#u6)=Rtt +// TODO: needs to be implemented. + +//===----------------------------------------------------------------------===// +// multiclass for store instructions with base + immediate offset +// addressing mode and immediate stored value. +// mem[bhw](Rx++#s4:3)=#s8 +// if ([!]Pv[.new]) mem[bhw](Rx++#s4:3)=#s6 +//===----------------------------------------------------------------------===// +multiclass ST_Imm_Pbase<string mnemonic, Operand OffsetOp, bit isNot, + bit isPredNew> { + let isPredicatedNew = isPredNew in + def NAME : STInst2<(outs), + (ins PredRegs:$src1, IntRegs:$src2, OffsetOp:$src3, s6Ext:$src4), + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#mnemonic#"($src2+#$src3) = #$src4", + []>, + Requires<[HasV4T]>; +} + +multiclass ST_Imm_Pred<string mnemonic, Operand OffsetOp, bit PredNot> { + let isPredicatedFalse = PredNot in { + defm _c#NAME : ST_Imm_Pbase<mnemonic, OffsetOp, PredNot, 0>; + // Predicate new + defm _cdn#NAME : ST_Imm_Pbase<mnemonic, OffsetOp, PredNot, 1>; + } +} + +let isExtendable = 1, isExtentSigned = 1, neverHasSideEffects = 1 in +multiclass ST_Imm<string mnemonic, string CextOp, Operand OffsetOp> { + let CextOpcode = CextOp, BaseOpcode = CextOp#_imm in { + let opExtendable = 2, opExtentBits = 8, isPredicable = 1 in + def NAME#_V4 : STInst2<(outs), + (ins IntRegs:$src1, OffsetOp:$src2, s8Ext:$src3), + mnemonic#"($src1+#$src2) = #$src3", + []>, + Requires<[HasV4T]>; + + let opExtendable = 3, opExtentBits = 6, isPredicated = 1 in { + defm Pt_V4 : ST_Imm_Pred<mnemonic, OffsetOp, 0>; + defm NotPt_V4 : ST_Imm_Pred<mnemonic, OffsetOp, 1 >; + } + } +} + +let addrMode = BaseImmOffset, InputType = "imm", +validSubTargets = HasV4SubT in { + let accessSize = ByteAccess in + defm STrib_imm : ST_Imm<"memb", "STrib", u6_0Imm>, ImmRegRel, PredNewRel; + + let accessSize = HalfWordAccess in + defm STrih_imm : ST_Imm<"memh", "STrih", u6_1Imm>, ImmRegRel, PredNewRel; + + let accessSize = WordAccess in + defm STriw_imm : ST_Imm<"memw", "STriw", u6_2Imm>, ImmRegRel, PredNewRel; +} + +let Predicates = [HasV4T], AddedComplexity = 10 in { +def: Pat<(truncstorei8 s8ExtPred:$src3, (add IntRegs:$src1, u6_0ImmPred:$src2)), + (STrib_imm_V4 IntRegs:$src1, u6_0ImmPred:$src2, s8ExtPred:$src3)>; + +def: Pat<(truncstorei16 s8ExtPred:$src3, (add IntRegs:$src1, + u6_1ImmPred:$src2)), + (STrih_imm_V4 IntRegs:$src1, u6_1ImmPred:$src2, s8ExtPred:$src3)>; + +def: Pat<(store s8ExtPred:$src3, (add IntRegs:$src1, u6_2ImmPred:$src2)), + (STriw_imm_V4 IntRegs:$src1, u6_2ImmPred:$src2, s8ExtPred:$src3)>; +} + +let AddedComplexity = 6 in +def : Pat <(truncstorei8 s8ExtPred:$src2, (i32 IntRegs:$src1)), + (STrib_imm_V4 IntRegs:$src1, 0, s8ExtPred:$src2)>, + Requires<[HasV4T]>; + +// memb(Rx++#s4:0:circ(Mu))=Rt +// memb(Rx++I:circ(Mu))=Rt +// memb(Rx++Mu)=Rt +// memb(Rx++Mu:brev)=Rt +// memb(gp+#u16:0)=Rt + + +// Store halfword. +// TODO: needs to be implemented +// memh(Re=#U6)=Rt.H +// memh(Rs+#s11:1)=Rt.H +let AddedComplexity = 6 in +def : Pat <(truncstorei16 s8ExtPred:$src2, (i32 IntRegs:$src1)), + (STrih_imm_V4 IntRegs:$src1, 0, s8ExtPred:$src2)>, + Requires<[HasV4T]>; + +// memh(Rs+Ru<<#u2)=Rt.H +// TODO: needs to be implemented. + +// memh(Ru<<#u2+#U6)=Rt.H +// memh(Rx++#s4:1:circ(Mu))=Rt.H +// memh(Rx++#s4:1:circ(Mu))=Rt +// memh(Rx++I:circ(Mu))=Rt.H +// memh(Rx++I:circ(Mu))=Rt +// memh(Rx++Mu)=Rt.H +// memh(Rx++Mu)=Rt +// memh(Rx++Mu:brev)=Rt.H +// memh(Rx++Mu:brev)=Rt +// memh(gp+#u16:1)=Rt +// if ([!]Pv[.new]) memh(#u6)=Rt.H +// if ([!]Pv[.new]) memh(#u6)=Rt + + +// if ([!]Pv[.new]) memh(Rs+#u6:1)=Rt.H +// TODO: needs to be implemented. + +// if ([!]Pv[.new]) memh(Rx++#s4:1)=Rt.H +// TODO: Needs to be implemented. + +// Store word. +// memw(Re=#U6)=Rt +// TODO: Needs to be implemented. + +// Store predicate: +let neverHasSideEffects = 1 in +def STriw_pred_V4 : STInst2<(outs), + (ins MEMri:$addr, PredRegs:$src1), + "Error; should not emit", + []>, + Requires<[HasV4T]>; + +let AddedComplexity = 6 in +def : Pat <(store s8ExtPred:$src2, (i32 IntRegs:$src1)), + (STriw_imm_V4 IntRegs:$src1, 0, s8ExtPred:$src2)>, + Requires<[HasV4T]>; + +// memw(Rx++#s4:2)=Rt +// memw(Rx++#s4:2:circ(Mu))=Rt +// memw(Rx++I:circ(Mu))=Rt +// memw(Rx++Mu)=Rt +// memw(Rx++Mu:brev)=Rt + +//===----------------------------------------------------------------------=== +// ST - +//===----------------------------------------------------------------------=== + + +//===----------------------------------------------------------------------===// +// NV/ST + +//===----------------------------------------------------------------------===// + +// multiclass for new-value store instructions with base + immediate offset. +// +multiclass ST_Idxd_Pbase_nv<string mnemonic, RegisterClass RC, + Operand predImmOp, bit isNot, bit isPredNew> { + let isPredicatedNew = isPredNew in + def NAME#_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3, RC: $src4), + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#mnemonic#"($src2+#$src3) = $src4.new", + []>, + Requires<[HasV4T]>; +} + +multiclass ST_Idxd_Pred_nv<string mnemonic, RegisterClass RC, Operand predImmOp, + bit PredNot> { + let isPredicatedFalse = PredNot in { + defm _c#NAME : ST_Idxd_Pbase_nv<mnemonic, RC, predImmOp, PredNot, 0>; + // Predicate new + defm _cdn#NAME : ST_Idxd_Pbase_nv<mnemonic, RC, predImmOp, PredNot, 1>; + } +} + +let mayStore = 1, isNVStore = 1, neverHasSideEffects = 1, isExtendable = 1 in +multiclass ST_Idxd_nv<string mnemonic, string CextOp, RegisterClass RC, + Operand ImmOp, Operand predImmOp, bits<5> ImmBits, + bits<5> PredImmBits> { + + let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in { + let opExtendable = 1, isExtentSigned = 1, opExtentBits = ImmBits, + isPredicable = 1 in + def NAME#_nv_V4 : NVInst_V4<(outs), + (ins IntRegs:$src1, ImmOp:$src2, RC:$src3), + mnemonic#"($src1+#$src2) = $src3.new", + []>, + Requires<[HasV4T]>; + + let opExtendable = 2, isExtentSigned = 0, opExtentBits = PredImmBits, + isPredicated = 1 in { + defm Pt : ST_Idxd_Pred_nv<mnemonic, RC, predImmOp, 0>; + defm NotPt : ST_Idxd_Pred_nv<mnemonic, RC, predImmOp, 1>; + } + } +} + +let addrMode = BaseImmOffset, validSubTargets = HasV4SubT in { + let accessSize = ByteAccess in + defm STrib_indexed: ST_Idxd_nv<"memb", "STrib", IntRegs, s11_0Ext, + u6_0Ext, 11, 6>, AddrModeRel; + + let accessSize = HalfWordAccess in + defm STrih_indexed: ST_Idxd_nv<"memh", "STrih", IntRegs, s11_1Ext, + u6_1Ext, 12, 7>, AddrModeRel; + + let accessSize = WordAccess in + defm STriw_indexed: ST_Idxd_nv<"memw", "STriw", IntRegs, s11_2Ext, + u6_2Ext, 13, 8>, AddrModeRel; +} + +// multiclass for new-value store instructions with base + immediate offset. +// and MEMri operand. +multiclass ST_MEMri_Pbase_nv<string mnemonic, RegisterClass RC, bit isNot, + bit isPredNew> { + let isPredicatedNew = isPredNew in + def NAME#_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, MEMri:$addr, RC: $src2), + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#mnemonic#"($addr) = $src2.new", + []>, + Requires<[HasV4T]>; +} + +multiclass ST_MEMri_Pred_nv<string mnemonic, RegisterClass RC, bit PredNot> { + let isPredicatedFalse = PredNot in { + defm _c#NAME : ST_MEMri_Pbase_nv<mnemonic, RC, PredNot, 0>; + + // Predicate new + defm _cdn#NAME : ST_MEMri_Pbase_nv<mnemonic, RC, PredNot, 1>; + } +} + +let mayStore = 1, isNVStore = 1, isExtendable = 1, neverHasSideEffects = 1 in +multiclass ST_MEMri_nv<string mnemonic, string CextOp, RegisterClass RC, + bits<5> ImmBits, bits<5> PredImmBits> { + + let CextOpcode = CextOp, BaseOpcode = CextOp in { + let opExtendable = 1, isExtentSigned = 1, opExtentBits = ImmBits, + isPredicable = 1 in + def NAME#_nv_V4 : NVInst_V4<(outs), + (ins MEMri:$addr, RC:$src), + mnemonic#"($addr) = $src.new", + []>, + Requires<[HasV4T]>; + + let opExtendable = 2, isExtentSigned = 0, opExtentBits = PredImmBits, + neverHasSideEffects = 1, isPredicated = 1 in { + defm Pt : ST_MEMri_Pred_nv<mnemonic, RC, 0>; + defm NotPt : ST_MEMri_Pred_nv<mnemonic, RC, 1>; + } + } +} + +let addrMode = BaseImmOffset, isMEMri = "true", validSubTargets = HasV4SubT, +mayStore = 1 in { + let accessSize = ByteAccess in + defm STrib: ST_MEMri_nv<"memb", "STrib", IntRegs, 11, 6>, AddrModeRel; + + let accessSize = HalfWordAccess in + defm STrih: ST_MEMri_nv<"memh", "STrih", IntRegs, 12, 7>, AddrModeRel; + + let accessSize = WordAccess in + defm STriw: ST_MEMri_nv<"memw", "STriw", IntRegs, 13, 8>, AddrModeRel; +} + +//===----------------------------------------------------------------------===// +// Post increment store +// mem[bhwd](Rx++#s4:[0123])=Nt.new +//===----------------------------------------------------------------------===// + +multiclass ST_PostInc_Pbase_nv<string mnemonic, RegisterClass RC, Operand ImmOp, + bit isNot, bit isPredNew> { + let isPredicatedNew = isPredNew in + def NAME#_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset, RC:$src3), + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#mnemonic#"($src2++#$offset) = $src3.new", + [], + "$src2 = $dst">, + Requires<[HasV4T]>; +} + +multiclass ST_PostInc_Pred_nv<string mnemonic, RegisterClass RC, + Operand ImmOp, bit PredNot> { + let isPredicatedFalse = PredNot in { + defm _c#NAME : ST_PostInc_Pbase_nv<mnemonic, RC, ImmOp, PredNot, 0>; + // Predicate new + let Predicates = [HasV4T], validSubTargets = HasV4SubT in + defm _cdn#NAME : ST_PostInc_Pbase_nv<mnemonic, RC, ImmOp, PredNot, 1>; + } +} + +let hasCtrlDep = 1, isNVStore = 1, neverHasSideEffects = 1 in +multiclass ST_PostInc_nv<string mnemonic, string BaseOp, RegisterClass RC, + Operand ImmOp> { + + let BaseOpcode = "POST_"#BaseOp in { + let isPredicable = 1 in + def NAME#_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), + (ins IntRegs:$src1, ImmOp:$offset, RC:$src2), + mnemonic#"($src1++#$offset) = $src2.new", + [], + "$src1 = $dst">, + Requires<[HasV4T]>; + + let isPredicated = 1 in { + defm Pt : ST_PostInc_Pred_nv<mnemonic, RC, ImmOp, 0 >; + defm NotPt : ST_PostInc_Pred_nv<mnemonic, RC, ImmOp, 1 >; + } + } +} + +let addrMode = PostInc, validSubTargets = HasV4SubT in { +defm POST_STbri: ST_PostInc_nv <"memb", "STrib", IntRegs, s4_0Imm>, AddrModeRel; +defm POST_SThri: ST_PostInc_nv <"memh", "STrih", IntRegs, s4_1Imm>, AddrModeRel; +defm POST_STwri: ST_PostInc_nv <"memw", "STriw", IntRegs, s4_2Imm>, AddrModeRel; +} + +// memb(Rx++#s4:0:circ(Mu))=Nt.new +// memb(Rx++I:circ(Mu))=Nt.new +// memb(Rx++Mu)=Nt.new +// memb(Rx++Mu:brev)=Nt.new +// memh(Rx++#s4:1:circ(Mu))=Nt.new +// memh(Rx++I:circ(Mu))=Nt.new +// memh(Rx++Mu)=Nt.new +// memh(Rx++Mu:brev)=Nt.new + +// memw(Rx++#s4:2:circ(Mu))=Nt.new +// memw(Rx++I:circ(Mu))=Nt.new +// memw(Rx++Mu)=Nt.new +// memw(Rx++Mu:brev)=Nt.new + +//===----------------------------------------------------------------------===// +// NV/ST - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// NV/J + +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// multiclass/template class for the new-value compare jumps with the register +// operands. +//===----------------------------------------------------------------------===// + +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 11 in +class NVJrr_template<string mnemonic, bits<3> majOp, bit NvOpNum, + bit isNegCond, bit isTaken> + : NVInst_V4<(outs), + (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset), + "if ("#!if(isNegCond, "!","")#mnemonic# + "($src1"#!if(!eq(NvOpNum, 0),".new, ",", ")# + "$src2"#!if(!eq(NvOpNum, 1),".new))","))")#" jump:" + #!if(isTaken, "t","nt")#" $offset", + []>, Requires<[HasV4T]> { + + bits<5> src1; + bits<5> src2; + bits<3> Ns; // New-Value Operand + bits<5> RegOp; // Non New-Value Operand + bits<11> offset; + + let isBrTaken = !if(isTaken, "true", "false"); + let isPredicatedFalse = isNegCond; + + let Ns = !if(!eq(NvOpNum, 0), src1{2-0}, src2{2-0}); + let RegOp = !if(!eq(NvOpNum, 0), src2, src1); + + let IClass = 0b0010; + let Inst{26} = 0b0; + let Inst{25-23} = majOp; + let Inst{22} = isNegCond; + let Inst{18-16} = Ns; + let Inst{13} = isTaken; + let Inst{12-8} = RegOp; + let Inst{21-20} = offset{10-9}; + let Inst{7-1} = offset{8-2}; +} + + +multiclass NVJrr_cond<string mnemonic, bits<3> majOp, bit NvOpNum, + bit isNegCond> { + // Branch not taken: + def _nt_V4: NVJrr_template<mnemonic, majOp, NvOpNum, isNegCond, 0>; + // Branch taken: + def _t_V4: NVJrr_template<mnemonic, majOp, NvOpNum, isNegCond, 1>; +} + +// NvOpNum = 0 -> First Operand is a new-value Register +// NvOpNum = 1 -> Second Operand is a new-value Register + +multiclass NVJrr_base<string mnemonic, string BaseOp, bits<3> majOp, + bit NvOpNum> { + let BaseOpcode = BaseOp#_NVJ in { + defm _t_Jumpnv : NVJrr_cond<mnemonic, majOp, NvOpNum, 0>; // True cond + defm _f_Jumpnv : NVJrr_cond<mnemonic, majOp, NvOpNum, 1>; // False cond + } +} + +// if ([!]cmp.eq(Ns.new,Rt)) jump:[n]t #r9:2 +// if ([!]cmp.gt(Ns.new,Rt)) jump:[n]t #r9:2 +// if ([!]cmp.gtu(Ns.new,Rt)) jump:[n]t #r9:2 +// if ([!]cmp.gt(Rt,Ns.new)) jump:[n]t #r9:2 +// if ([!]cmp.gtu(Rt,Ns.new)) jump:[n]t #r9:2 + +let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator = 1, + Defs = [PC], neverHasSideEffects = 1, validSubTargets = HasV4SubT in { + defm CMPEQrr : NVJrr_base<"cmp.eq", "CMPEQ", 0b000, 0>, PredRel; + defm CMPGTrr : NVJrr_base<"cmp.gt", "CMPGT", 0b001, 0>, PredRel; + defm CMPGTUrr : NVJrr_base<"cmp.gtu", "CMPGTU", 0b010, 0>, PredRel; + defm CMPLTrr : NVJrr_base<"cmp.gt", "CMPLT", 0b011, 1>, PredRel; + defm CMPLTUrr : NVJrr_base<"cmp.gtu", "CMPLTU", 0b100, 1>, PredRel; +} + +//===----------------------------------------------------------------------===// +// multiclass/template class for the new-value compare jumps instruction +// with a register and an unsigned immediate (U5) operand. +//===----------------------------------------------------------------------===// + +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 11 in +class NVJri_template<string mnemonic, bits<3> majOp, bit isNegCond, + bit isTaken> + : NVInst_V4<(outs), + (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset), + "if ("#!if(isNegCond, "!","")#mnemonic#"($src1.new, #$src2)) jump:" + #!if(isTaken, "t","nt")#" $offset", + []>, Requires<[HasV4T]> { + + let isPredicatedFalse = isNegCond; + let isBrTaken = !if(isTaken, "true", "false"); + + bits<3> src1; + bits<5> src2; + bits<11> offset; + + let IClass = 0b0010; + let Inst{26} = 0b1; + let Inst{25-23} = majOp; + let Inst{22} = isNegCond; + let Inst{18-16} = src1; + let Inst{13} = isTaken; + let Inst{12-8} = src2; + let Inst{21-20} = offset{10-9}; + let Inst{7-1} = offset{8-2}; +} + +multiclass NVJri_cond<string mnemonic, bits<3> majOp, bit isNegCond> { + // Branch not taken: + def _nt_V4: NVJri_template<mnemonic, majOp, isNegCond, 0>; + // Branch taken: + def _t_V4: NVJri_template<mnemonic, majOp, isNegCond, 1>; +} + +multiclass NVJri_base<string mnemonic, string BaseOp, bits<3> majOp> { + let BaseOpcode = BaseOp#_NVJri in { + defm _t_Jumpnv : NVJri_cond<mnemonic, majOp, 0>; // True Cond + defm _f_Jumpnv : NVJri_cond<mnemonic, majOp, 1>; // False cond + } +} + +// if ([!]cmp.eq(Ns.new,#U5)) jump:[n]t #r9:2 +// if ([!]cmp.gt(Ns.new,#U5)) jump:[n]t #r9:2 +// if ([!]cmp.gtu(Ns.new,#U5)) jump:[n]t #r9:2 + +let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator = 1, + Defs = [PC], neverHasSideEffects = 1, validSubTargets = HasV4SubT in { + defm CMPEQri : NVJri_base<"cmp.eq", "CMPEQ", 0b000>, PredRel; + defm CMPGTri : NVJri_base<"cmp.gt", "CMPGT", 0b001>, PredRel; + defm CMPGTUri : NVJri_base<"cmp.gtu", "CMPGTU", 0b010>, PredRel; +} + +//===----------------------------------------------------------------------===// +// multiclass/template class for the new-value compare jumps instruction +// with a register and an hardcoded 0/-1 immediate value. +//===----------------------------------------------------------------------===// + +let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 11 in +class NVJ_ConstImm_template<string mnemonic, bits<3> majOp, string ImmVal, + bit isNegCond, bit isTaken> + : NVInst_V4<(outs), + (ins IntRegs:$src1, brtarget:$offset), + "if ("#!if(isNegCond, "!","")#mnemonic + #"($src1.new, #"#ImmVal#")) jump:" + #!if(isTaken, "t","nt")#" $offset", + []>, Requires<[HasV4T]> { + + let isPredicatedFalse = isNegCond; + let isBrTaken = !if(isTaken, "true", "false"); + + bits<3> src1; + bits<11> offset; + let IClass = 0b0010; + let Inst{26} = 0b1; + let Inst{25-23} = majOp; + let Inst{22} = isNegCond; + let Inst{18-16} = src1; + let Inst{13} = isTaken; + let Inst{21-20} = offset{10-9}; + let Inst{7-1} = offset{8-2}; +} + +multiclass NVJ_ConstImm_cond<string mnemonic, bits<3> majOp, string ImmVal, + bit isNegCond> { + // Branch not taken: + def _nt_V4: NVJ_ConstImm_template<mnemonic, majOp, ImmVal, isNegCond, 0>; + // Branch taken: + def _t_V4: NVJ_ConstImm_template<mnemonic, majOp, ImmVal, isNegCond, 1>; +} + +multiclass NVJ_ConstImm_base<string mnemonic, string BaseOp, bits<3> majOp, + string ImmVal> { + let BaseOpcode = BaseOp#_NVJ_ConstImm in { + defm _t_Jumpnv : NVJ_ConstImm_cond<mnemonic, majOp, ImmVal, 0>; // True cond + defm _f_Jumpnv : NVJ_ConstImm_cond<mnemonic, majOp, ImmVal, 1>; // False Cond + } +} + +// if ([!]tstbit(Ns.new,#0)) jump:[n]t #r9:2 +// if ([!]cmp.eq(Ns.new,#-1)) jump:[n]t #r9:2 +// if ([!]cmp.gt(Ns.new,#-1)) jump:[n]t #r9:2 + +let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator=1, + Defs = [PC], neverHasSideEffects = 1 in { + defm TSTBIT0 : NVJ_ConstImm_base<"tstbit", "TSTBIT", 0b011, "0">, PredRel; + defm CMPEQn1 : NVJ_ConstImm_base<"cmp.eq", "CMPEQ", 0b100, "-1">, PredRel; + defm CMPGTn1 : NVJ_ConstImm_base<"cmp.gt", "CMPGT", 0b101, "-1">, PredRel; +} + +//===----------------------------------------------------------------------===// +// XTYPE/ALU + +//===----------------------------------------------------------------------===// + +// Add and accumulate. +// Rd=add(Rs,add(Ru,#s6)) +let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 6, +validSubTargets = HasV4SubT in +def ADDr_ADDri_V4 : MInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, s6Ext:$src3), + "$dst = add($src1, add($src2, #$src3))", + [(set (i32 IntRegs:$dst), + (add (i32 IntRegs:$src1), (add (i32 IntRegs:$src2), + s6_16ExtPred:$src3)))]>, + Requires<[HasV4T]>; + +// Rd=add(Rs,sub(#s6,Ru)) +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 6, +validSubTargets = HasV4SubT in +def ADDr_SUBri_V4 : MInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, s6Ext:$src2, IntRegs:$src3), + "$dst = add($src1, sub(#$src2, $src3))", + [(set (i32 IntRegs:$dst), + (add (i32 IntRegs:$src1), (sub s6_10ExtPred:$src2, + (i32 IntRegs:$src3))))]>, + Requires<[HasV4T]>; + +// Generates the same instruction as ADDr_SUBri_V4 but matches different +// pattern. +// Rd=add(Rs,sub(#s6,Ru)) +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 6, +validSubTargets = HasV4SubT in +def ADDri_SUBr_V4 : MInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, s6Ext:$src2, IntRegs:$src3), + "$dst = add($src1, sub(#$src2, $src3))", + [(set (i32 IntRegs:$dst), + (sub (add (i32 IntRegs:$src1), s6_10ExtPred:$src2), + (i32 IntRegs:$src3)))]>, + Requires<[HasV4T]>; + + +// Add or subtract doublewords with carry. +//TODO: +// Rdd=add(Rss,Rtt,Px):carry +//TODO: +// Rdd=sub(Rss,Rtt,Px):carry + + +// Logical doublewords. +// Rdd=and(Rtt,~Rss) +let validSubTargets = HasV4SubT in +def ANDd_NOTd_V4 : MInst<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, DoubleRegs:$src2), + "$dst = and($src1, ~$src2)", + [(set (i64 DoubleRegs:$dst), (and (i64 DoubleRegs:$src1), + (not (i64 DoubleRegs:$src2))))]>, + Requires<[HasV4T]>; + +// Rdd=or(Rtt,~Rss) +let validSubTargets = HasV4SubT in +def ORd_NOTd_V4 : MInst<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, DoubleRegs:$src2), + "$dst = or($src1, ~$src2)", + [(set (i64 DoubleRegs:$dst), + (or (i64 DoubleRegs:$src1), (not (i64 DoubleRegs:$src2))))]>, + Requires<[HasV4T]>; + + +// Logical-logical doublewords. +// Rxx^=xor(Rss,Rtt) +let validSubTargets = HasV4SubT in +def XORd_XORdd: MInst_acc<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), + "$dst ^= xor($src2, $src3)", + [(set (i64 DoubleRegs:$dst), + (xor (i64 DoubleRegs:$src1), (xor (i64 DoubleRegs:$src2), + (i64 DoubleRegs:$src3))))], + "$src1 = $dst">, + Requires<[HasV4T]>; + + +// Logical-logical words. +// Rx=or(Ru,and(Rx,#s10)) +let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 10, +validSubTargets = HasV4SubT in +def ORr_ANDri_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, s10Ext:$src3), + "$dst = or($src1, and($src2, #$src3))", + [(set (i32 IntRegs:$dst), + (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2), + s10ExtPred:$src3)))], + "$src2 = $dst">, + Requires<[HasV4T]>; + +// Rx[&|^]=and(Rs,Rt) +// Rx&=and(Rs,Rt) +let validSubTargets = HasV4SubT in +def ANDr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), + "$dst &= and($src2, $src3)", + [(set (i32 IntRegs:$dst), + (and (i32 IntRegs:$src1), (and (i32 IntRegs:$src2), + (i32 IntRegs:$src3))))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +// Rx|=and(Rs,Rt) +let validSubTargets = HasV4SubT, CextOpcode = "ORr_ANDr", InputType = "reg" in +def ORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), + "$dst |= and($src2, $src3)", + [(set (i32 IntRegs:$dst), + (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2), + (i32 IntRegs:$src3))))], + "$src1 = $dst">, + Requires<[HasV4T]>, ImmRegRel; + +// Rx^=and(Rs,Rt) +let validSubTargets = HasV4SubT in +def XORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), + "$dst ^= and($src2, $src3)", + [(set (i32 IntRegs:$dst), + (xor (i32 IntRegs:$src1), (and (i32 IntRegs:$src2), + (i32 IntRegs:$src3))))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +// Rx[&|^]=and(Rs,~Rt) +// Rx&=and(Rs,~Rt) +let validSubTargets = HasV4SubT in +def ANDr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), + "$dst &= and($src2, ~$src3)", + [(set (i32 IntRegs:$dst), + (and (i32 IntRegs:$src1), (and (i32 IntRegs:$src2), + (not (i32 IntRegs:$src3)))))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +// Rx|=and(Rs,~Rt) +let validSubTargets = HasV4SubT in +def ORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), + "$dst |= and($src2, ~$src3)", + [(set (i32 IntRegs:$dst), + (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2), + (not (i32 IntRegs:$src3)))))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +// Rx^=and(Rs,~Rt) +let validSubTargets = HasV4SubT in +def XORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), + "$dst ^= and($src2, ~$src3)", + [(set (i32 IntRegs:$dst), + (xor (i32 IntRegs:$src1), (and (i32 IntRegs:$src2), + (not (i32 IntRegs:$src3)))))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +// Rx[&|^]=or(Rs,Rt) +// Rx&=or(Rs,Rt) +let validSubTargets = HasV4SubT in +def ANDr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), + "$dst &= or($src2, $src3)", + [(set (i32 IntRegs:$dst), + (and (i32 IntRegs:$src1), (or (i32 IntRegs:$src2), + (i32 IntRegs:$src3))))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +// Rx|=or(Rs,Rt) +let validSubTargets = HasV4SubT, CextOpcode = "ORr_ORr", InputType = "reg" in +def ORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), + "$dst |= or($src2, $src3)", + [(set (i32 IntRegs:$dst), + (or (i32 IntRegs:$src1), (or (i32 IntRegs:$src2), + (i32 IntRegs:$src3))))], + "$src1 = $dst">, + Requires<[HasV4T]>, ImmRegRel; + +// Rx^=or(Rs,Rt) +let validSubTargets = HasV4SubT in +def XORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), + "$dst ^= or($src2, $src3)", + [(set (i32 IntRegs:$dst), + (xor (i32 IntRegs:$src1), (or (i32 IntRegs:$src2), + (i32 IntRegs:$src3))))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +// Rx[&|^]=xor(Rs,Rt) +// Rx&=xor(Rs,Rt) +let validSubTargets = HasV4SubT in +def ANDr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), + "$dst &= xor($src2, $src3)", + [(set (i32 IntRegs:$dst), + (and (i32 IntRegs:$src1), (xor (i32 IntRegs:$src2), + (i32 IntRegs:$src3))))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +// Rx|=xor(Rs,Rt) +let validSubTargets = HasV4SubT in +def ORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), + "$dst |= xor($src2, $src3)", + [(set (i32 IntRegs:$dst), + (and (i32 IntRegs:$src1), (xor (i32 IntRegs:$src2), + (i32 IntRegs:$src3))))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +// Rx^=xor(Rs,Rt) +let validSubTargets = HasV4SubT in +def XORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), + "$dst ^= xor($src2, $src3)", + [(set (i32 IntRegs:$dst), + (and (i32 IntRegs:$src1), (xor (i32 IntRegs:$src2), + (i32 IntRegs:$src3))))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +// Rx|=and(Rs,#s10) +let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 10, +validSubTargets = HasV4SubT, CextOpcode = "ORr_ANDr", InputType = "imm" in +def ORr_ANDri2_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, s10Ext:$src3), + "$dst |= and($src2, #$src3)", + [(set (i32 IntRegs:$dst), + (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2), + s10ExtPred:$src3)))], + "$src1 = $dst">, + Requires<[HasV4T]>, ImmRegRel; + +// Rx|=or(Rs,#s10) +let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 10, +validSubTargets = HasV4SubT, CextOpcode = "ORr_ORr", InputType = "imm" in +def ORr_ORri_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, s10Ext:$src3), + "$dst |= or($src2, #$src3)", + [(set (i32 IntRegs:$dst), + (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2), + s10ExtPred:$src3)))], + "$src1 = $dst">, + Requires<[HasV4T]>, ImmRegRel; + + +// Modulo wrap +// Rd=modwrap(Rs,Rt) +// Round +// Rd=cround(Rs,#u5) +// Rd=cround(Rs,Rt) +// Rd=round(Rs,#u5)[:sat] +// Rd=round(Rs,Rt)[:sat] +// Vector reduce add unsigned halfwords +// Rd=vraddh(Rss,Rtt) +// Vector add bytes +// Rdd=vaddb(Rss,Rtt) +// Vector conditional negate +// Rdd=vcnegh(Rss,Rt) +// Rxx+=vrcnegh(Rss,Rt) +// Vector maximum bytes +// Rdd=vmaxb(Rtt,Rss) +// Vector reduce maximum halfwords +// Rxx=vrmaxh(Rss,Ru) +// Rxx=vrmaxuh(Rss,Ru) +// Vector reduce maximum words +// Rxx=vrmaxuw(Rss,Ru) +// Rxx=vrmaxw(Rss,Ru) +// Vector minimum bytes +// Rdd=vminb(Rtt,Rss) +// Vector reduce minimum halfwords +// Rxx=vrminh(Rss,Ru) +// Rxx=vrminuh(Rss,Ru) +// Vector reduce minimum words +// Rxx=vrminuw(Rss,Ru) +// Rxx=vrminw(Rss,Ru) +// Vector subtract bytes +// Rdd=vsubb(Rss,Rtt) + +//===----------------------------------------------------------------------===// +// XTYPE/ALU - +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// XTYPE/MPY + +//===----------------------------------------------------------------------===// + +// Multiply and user lower result. +// Rd=add(#u6,mpyi(Rs,#U6)) +let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 6, +validSubTargets = HasV4SubT in +def ADDi_MPYri_V4 : MInst<(outs IntRegs:$dst), + (ins u6Ext:$src1, IntRegs:$src2, u6Imm:$src3), + "$dst = add(#$src1, mpyi($src2, #$src3))", + [(set (i32 IntRegs:$dst), + (add (mul (i32 IntRegs:$src2), u6ImmPred:$src3), + u6ExtPred:$src1))]>, + Requires<[HasV4T]>; + +// Rd=add(##,mpyi(Rs,#U6)) +def : Pat <(add (mul (i32 IntRegs:$src2), u6ImmPred:$src3), + (HexagonCONST32 tglobaladdr:$src1)), + (i32 (ADDi_MPYri_V4 tglobaladdr:$src1, IntRegs:$src2, + u6ImmPred:$src3))>; + +// Rd=add(#u6,mpyi(Rs,Rt)) +let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 6, +validSubTargets = HasV4SubT, InputType = "imm", CextOpcode = "ADD_MPY" in +def ADDi_MPYrr_V4 : MInst<(outs IntRegs:$dst), + (ins u6Ext:$src1, IntRegs:$src2, IntRegs:$src3), + "$dst = add(#$src1, mpyi($src2, $src3))", + [(set (i32 IntRegs:$dst), + (add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)), + u6ExtPred:$src1))]>, + Requires<[HasV4T]>, ImmRegRel; + +// Rd=add(##,mpyi(Rs,Rt)) +def : Pat <(add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)), + (HexagonCONST32 tglobaladdr:$src1)), + (i32 (ADDi_MPYrr_V4 tglobaladdr:$src1, IntRegs:$src2, + IntRegs:$src3))>; + +// Rd=add(Ru,mpyi(#u6:2,Rs)) +let validSubTargets = HasV4SubT in +def ADDr_MPYir_V4 : MInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, u6Imm:$src2, IntRegs:$src3), + "$dst = add($src1, mpyi(#$src2, $src3))", + [(set (i32 IntRegs:$dst), + (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src3), + u6_2ImmPred:$src2)))]>, + Requires<[HasV4T]>; + +// Rd=add(Ru,mpyi(Rs,#u6)) +let isExtendable = 1, opExtendable = 3, isExtentSigned = 0, opExtentBits = 6, +validSubTargets = HasV4SubT, InputType = "imm", CextOpcode = "ADD_MPY" in +def ADDr_MPYri_V4 : MInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, u6Ext:$src3), + "$dst = add($src1, mpyi($src2, #$src3))", + [(set (i32 IntRegs:$dst), + (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src2), + u6ExtPred:$src3)))]>, + Requires<[HasV4T]>, ImmRegRel; + +// Rx=add(Ru,mpyi(Rx,Rs)) +let validSubTargets = HasV4SubT, InputType = "reg", CextOpcode = "ADD_MPY" in +def ADDr_MPYrr_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "$dst = add($src1, mpyi($src2, $src3))", + [(set (i32 IntRegs:$dst), + (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src2), + (i32 IntRegs:$src3))))], + "$src2 = $dst">, + Requires<[HasV4T]>, ImmRegRel; + + +// Polynomial multiply words +// Rdd=pmpyw(Rs,Rt) +// Rxx^=pmpyw(Rs,Rt) + +// Vector reduce multiply word by signed half (32x16) +// Rdd=vrmpyweh(Rss,Rtt)[:<<1] +// Rdd=vrmpywoh(Rss,Rtt)[:<<1] +// Rxx+=vrmpyweh(Rss,Rtt)[:<<1] +// Rxx+=vrmpywoh(Rss,Rtt)[:<<1] + +// Multiply and use upper result +// Rd=mpy(Rs,Rt.H):<<1:sat +// Rd=mpy(Rs,Rt.L):<<1:sat +// Rd=mpy(Rs,Rt):<<1 +// Rd=mpy(Rs,Rt):<<1:sat +// Rd=mpysu(Rs,Rt) +// Rx+=mpy(Rs,Rt):<<1:sat +// Rx-=mpy(Rs,Rt):<<1:sat + +// Vector multiply bytes +// Rdd=vmpybsu(Rs,Rt) +// Rdd=vmpybu(Rs,Rt) +// Rxx+=vmpybsu(Rs,Rt) +// Rxx+=vmpybu(Rs,Rt) + +// Vector polynomial multiply halfwords +// Rdd=vpmpyh(Rs,Rt) +// Rxx^=vpmpyh(Rs,Rt) + +//===----------------------------------------------------------------------===// +// XTYPE/MPY - +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// XTYPE/SHIFT + +//===----------------------------------------------------------------------===// + +// Shift by immediate and accumulate. +// Rx=add(#u8,asl(Rx,#U5)) +let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8, +validSubTargets = HasV4SubT in +def ADDi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst), + (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3), + "$dst = add(#$src1, asl($src2, #$src3))", + [(set (i32 IntRegs:$dst), + (add (shl (i32 IntRegs:$src2), u5ImmPred:$src3), + u8ExtPred:$src1))], + "$src2 = $dst">, + Requires<[HasV4T]>; + +// Rx=add(#u8,lsr(Rx,#U5)) +let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8, +validSubTargets = HasV4SubT in +def ADDi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst), + (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3), + "$dst = add(#$src1, lsr($src2, #$src3))", + [(set (i32 IntRegs:$dst), + (add (srl (i32 IntRegs:$src2), u5ImmPred:$src3), + u8ExtPred:$src1))], + "$src2 = $dst">, + Requires<[HasV4T]>; + +// Rx=sub(#u8,asl(Rx,#U5)) +let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8, +validSubTargets = HasV4SubT in +def SUBi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst), + (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3), + "$dst = sub(#$src1, asl($src2, #$src3))", + [(set (i32 IntRegs:$dst), + (sub (shl (i32 IntRegs:$src2), u5ImmPred:$src3), + u8ExtPred:$src1))], + "$src2 = $dst">, + Requires<[HasV4T]>; + +// Rx=sub(#u8,lsr(Rx,#U5)) +let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8, +validSubTargets = HasV4SubT in +def SUBi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst), + (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3), + "$dst = sub(#$src1, lsr($src2, #$src3))", + [(set (i32 IntRegs:$dst), + (sub (srl (i32 IntRegs:$src2), u5ImmPred:$src3), + u8ExtPred:$src1))], + "$src2 = $dst">, + Requires<[HasV4T]>; + + +//Shift by immediate and logical. +//Rx=and(#u8,asl(Rx,#U5)) +let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8, +validSubTargets = HasV4SubT in +def ANDi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst), + (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3), + "$dst = and(#$src1, asl($src2, #$src3))", + [(set (i32 IntRegs:$dst), + (and (shl (i32 IntRegs:$src2), u5ImmPred:$src3), + u8ExtPred:$src1))], + "$src2 = $dst">, + Requires<[HasV4T]>; + +//Rx=and(#u8,lsr(Rx,#U5)) +let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8, +validSubTargets = HasV4SubT in +def ANDi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst), + (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3), + "$dst = and(#$src1, lsr($src2, #$src3))", + [(set (i32 IntRegs:$dst), + (and (srl (i32 IntRegs:$src2), u5ImmPred:$src3), + u8ExtPred:$src1))], + "$src2 = $dst">, + Requires<[HasV4T]>; + +//Rx=or(#u8,asl(Rx,#U5)) +let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8, +AddedComplexity = 30, validSubTargets = HasV4SubT in +def ORi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst), + (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3), + "$dst = or(#$src1, asl($src2, #$src3))", + [(set (i32 IntRegs:$dst), + (or (shl (i32 IntRegs:$src2), u5ImmPred:$src3), + u8ExtPred:$src1))], + "$src2 = $dst">, + Requires<[HasV4T]>; + +//Rx=or(#u8,lsr(Rx,#U5)) +let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8, +AddedComplexity = 30, validSubTargets = HasV4SubT in +def ORi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst), + (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3), + "$dst = or(#$src1, lsr($src2, #$src3))", + [(set (i32 IntRegs:$dst), + (or (srl (i32 IntRegs:$src2), u5ImmPred:$src3), + u8ExtPred:$src1))], + "$src2 = $dst">, + Requires<[HasV4T]>; + + +//Shift by register. +//Rd=lsl(#s6,Rt) +let validSubTargets = HasV4SubT in { +def LSLi_V4 : MInst<(outs IntRegs:$dst), (ins s6Imm:$src1, IntRegs:$src2), + "$dst = lsl(#$src1, $src2)", + [(set (i32 IntRegs:$dst), (shl s6ImmPred:$src1, + (i32 IntRegs:$src2)))]>, + Requires<[HasV4T]>; + + +//Shift by register and logical. +//Rxx^=asl(Rss,Rt) +def ASLd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), + "$dst ^= asl($src2, $src3)", + [(set (i64 DoubleRegs:$dst), + (xor (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$src2), + (i32 IntRegs:$src3))))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +//Rxx^=asr(Rss,Rt) +def ASRd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), + "$dst ^= asr($src2, $src3)", + [(set (i64 DoubleRegs:$dst), + (xor (i64 DoubleRegs:$src1), (sra (i64 DoubleRegs:$src2), + (i32 IntRegs:$src3))))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +//Rxx^=lsl(Rss,Rt) +def LSLd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), + "$dst ^= lsl($src2, $src3)", + [(set (i64 DoubleRegs:$dst), (xor (i64 DoubleRegs:$src1), + (shl (i64 DoubleRegs:$src2), + (i32 IntRegs:$src3))))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +//Rxx^=lsr(Rss,Rt) +def LSRd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), + "$dst ^= lsr($src2, $src3)", + [(set (i64 DoubleRegs:$dst), + (xor (i64 DoubleRegs:$src1), (srl (i64 DoubleRegs:$src2), + (i32 IntRegs:$src3))))], + "$src1 = $dst">, + Requires<[HasV4T]>; +} + +//===----------------------------------------------------------------------===// +// XTYPE/SHIFT - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MEMOP: Word, Half, Byte +//===----------------------------------------------------------------------===// + +def MEMOPIMM : SDNodeXForm<imm, [{ + // Call the transformation function XformM5ToU5Imm to get the negative + // immediate's positive counterpart. + int32_t imm = N->getSExtValue(); + return XformM5ToU5Imm(imm); +}]>; + +def MEMOPIMM_HALF : SDNodeXForm<imm, [{ + // -1 .. -31 represented as 65535..65515 + // assigning to a short restores our desired signed value. + // Call the transformation function XformM5ToU5Imm to get the negative + // immediate's positive counterpart. + int16_t imm = N->getSExtValue(); + return XformM5ToU5Imm(imm); +}]>; + +def MEMOPIMM_BYTE : SDNodeXForm<imm, [{ + // -1 .. -31 represented as 255..235 + // assigning to a char restores our desired signed value. + // Call the transformation function XformM5ToU5Imm to get the negative + // immediate's positive counterpart. + int8_t imm = N->getSExtValue(); + return XformM5ToU5Imm(imm); +}]>; + +def SETMEMIMM : SDNodeXForm<imm, [{ + // Return the bit position we will set [0-31]. + // As an SDNode. + int32_t imm = N->getSExtValue(); + return XformMskToBitPosU5Imm(imm); +}]>; + +def CLRMEMIMM : SDNodeXForm<imm, [{ + // Return the bit position we will clear [0-31]. + // As an SDNode. + // we bit negate the value first + int32_t imm = ~(N->getSExtValue()); + return XformMskToBitPosU5Imm(imm); +}]>; + +def SETMEMIMM_SHORT : SDNodeXForm<imm, [{ + // Return the bit position we will set [0-15]. + // As an SDNode. + int16_t imm = N->getSExtValue(); + return XformMskToBitPosU4Imm(imm); +}]>; + +def CLRMEMIMM_SHORT : SDNodeXForm<imm, [{ + // Return the bit position we will clear [0-15]. + // As an SDNode. + // we bit negate the value first + int16_t imm = ~(N->getSExtValue()); + return XformMskToBitPosU4Imm(imm); +}]>; + +def SETMEMIMM_BYTE : SDNodeXForm<imm, [{ + // Return the bit position we will set [0-7]. + // As an SDNode. + int8_t imm = N->getSExtValue(); + return XformMskToBitPosU3Imm(imm); +}]>; + +def CLRMEMIMM_BYTE : SDNodeXForm<imm, [{ + // Return the bit position we will clear [0-7]. + // As an SDNode. + // we bit negate the value first + int8_t imm = ~(N->getSExtValue()); + return XformMskToBitPosU3Imm(imm); +}]>; + +//===----------------------------------------------------------------------===// +// Template class for MemOp instructions with the register value. +//===----------------------------------------------------------------------===// +class MemOp_rr_base <string opc, bits<2> opcBits, Operand ImmOp, + string memOp, bits<2> memOpBits> : + MEMInst_V4<(outs), + (ins IntRegs:$base, ImmOp:$offset, IntRegs:$delta), + opc#"($base+#$offset)"#memOp#"$delta", + []>, + Requires<[HasV4T, UseMEMOP]> { + + bits<5> base; + bits<5> delta; + bits<32> offset; + bits<6> offsetBits; // memb - u6:0 , memh - u6:1, memw - u6:2 + + let offsetBits = !if (!eq(opcBits, 0b00), offset{5-0}, + !if (!eq(opcBits, 0b01), offset{6-1}, + !if (!eq(opcBits, 0b10), offset{7-2},0))); + + let IClass = 0b0011; + let Inst{27-24} = 0b1110; + let Inst{22-21} = opcBits; + let Inst{20-16} = base; + let Inst{13} = 0b0; + let Inst{12-7} = offsetBits; + let Inst{6-5} = memOpBits; + let Inst{4-0} = delta; +} + +//===----------------------------------------------------------------------===// +// Template class for MemOp instructions with the immediate value. +//===----------------------------------------------------------------------===// +class MemOp_ri_base <string opc, bits<2> opcBits, Operand ImmOp, + string memOp, bits<2> memOpBits> : + MEMInst_V4 <(outs), + (ins IntRegs:$base, ImmOp:$offset, u5Imm:$delta), + opc#"($base+#$offset)"#memOp#"#$delta" + #!if(memOpBits{1},")", ""), // clrbit, setbit - include ')' + []>, + Requires<[HasV4T, UseMEMOP]> { + + bits<5> base; + bits<5> delta; + bits<32> offset; + bits<6> offsetBits; // memb - u6:0 , memh - u6:1, memw - u6:2 + + let offsetBits = !if (!eq(opcBits, 0b00), offset{5-0}, + !if (!eq(opcBits, 0b01), offset{6-1}, + !if (!eq(opcBits, 0b10), offset{7-2},0))); + + let IClass = 0b0011; + let Inst{27-24} = 0b1111; + let Inst{22-21} = opcBits; + let Inst{20-16} = base; + let Inst{13} = 0b0; + let Inst{12-7} = offsetBits; + let Inst{6-5} = memOpBits; + let Inst{4-0} = delta; +} + +// multiclass to define MemOp instructions with register operand. +multiclass MemOp_rr<string opc, bits<2> opcBits, Operand ImmOp> { + def _ADD#NAME#_V4 : MemOp_rr_base <opc, opcBits, ImmOp, " += ", 0b00>; // add + def _SUB#NAME#_V4 : MemOp_rr_base <opc, opcBits, ImmOp, " -= ", 0b01>; // sub + def _AND#NAME#_V4 : MemOp_rr_base <opc, opcBits, ImmOp, " &= ", 0b10>; // and + def _OR#NAME#_V4 : MemOp_rr_base <opc, opcBits, ImmOp, " |= ", 0b11>; // or +} + +// multiclass to define MemOp instructions with immediate Operand. +multiclass MemOp_ri<string opc, bits<2> opcBits, Operand ImmOp> { + def _ADD#NAME#_V4 : MemOp_ri_base <opc, opcBits, ImmOp, " += ", 0b00 >; + def _SUB#NAME#_V4 : MemOp_ri_base <opc, opcBits, ImmOp, " -= ", 0b01 >; + def _CLRBIT#NAME#_V4 : MemOp_ri_base<opc, opcBits, ImmOp, " =clrbit(", 0b10>; + def _SETBIT#NAME#_V4 : MemOp_ri_base<opc, opcBits, ImmOp, " =setbit(", 0b11>; +} + +multiclass MemOp_base <string opc, bits<2> opcBits, Operand ImmOp> { + defm r : MemOp_rr <opc, opcBits, ImmOp>; + defm i : MemOp_ri <opc, opcBits, ImmOp>; +} + +// Define MemOp instructions. +let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, +validSubTargets =HasV4SubT in { + let opExtentBits = 6, accessSize = ByteAccess in + defm MemOPb : MemOp_base <"memb", 0b00, u6_0Ext>; + + let opExtentBits = 7, accessSize = HalfWordAccess in + defm MemOPh : MemOp_base <"memh", 0b01, u6_1Ext>; + + let opExtentBits = 8, accessSize = WordAccess in + defm MemOPw : MemOp_base <"memw", 0b10, u6_2Ext>; +} + +//===----------------------------------------------------------------------===// +// Multiclass to define 'Def Pats' for ALU operations on the memory +// Here value used for the ALU operation is an immediate value. +// mem[bh](Rs+#0) += #U5 +// mem[bh](Rs+#u6) += #U5 +//===----------------------------------------------------------------------===// + +multiclass MemOpi_u5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf ExtPred, + InstHexagon MI, SDNode OpNode> { + let AddedComplexity = 180 in + def : Pat < (stOp (OpNode (ldOp IntRegs:$addr), u5ImmPred:$addend), + IntRegs:$addr), + (MI IntRegs:$addr, #0, u5ImmPred:$addend )>; + + let AddedComplexity = 190 in + def : Pat <(stOp (OpNode (ldOp (add IntRegs:$base, ExtPred:$offset)), + u5ImmPred:$addend), + (add IntRegs:$base, ExtPred:$offset)), + (MI IntRegs:$base, ExtPred:$offset, u5ImmPred:$addend)>; +} + +multiclass MemOpi_u5ALUOp<PatFrag ldOp, PatFrag stOp, PatLeaf ExtPred, + InstHexagon addMI, InstHexagon subMI> { + defm : MemOpi_u5Pats<ldOp, stOp, ExtPred, addMI, add>; + defm : MemOpi_u5Pats<ldOp, stOp, ExtPred, subMI, sub>; +} + +multiclass MemOpi_u5ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > { + // Half Word + defm : MemOpi_u5ALUOp <ldOpHalf, truncstorei16, u6_1ExtPred, + MemOPh_ADDi_V4, MemOPh_SUBi_V4>; + // Byte + defm : MemOpi_u5ALUOp <ldOpByte, truncstorei8, u6ExtPred, + MemOPb_ADDi_V4, MemOPb_SUBi_V4>; +} + +let Predicates = [HasV4T, UseMEMOP] in { + defm : MemOpi_u5ExtType<zextloadi8, zextloadi16>; // zero extend + defm : MemOpi_u5ExtType<sextloadi8, sextloadi16>; // sign extend + defm : MemOpi_u5ExtType<extloadi8, extloadi16>; // any extend + + // Word + defm : MemOpi_u5ALUOp <load, store, u6_2ExtPred, MemOPw_ADDi_V4, + MemOPw_SUBi_V4>; +} + +//===----------------------------------------------------------------------===// +// multiclass to define 'Def Pats' for ALU operations on the memory. +// Here value used for the ALU operation is a negative value. +// mem[bh](Rs+#0) += #m5 +// mem[bh](Rs+#u6) += #m5 +//===----------------------------------------------------------------------===// + +multiclass MemOpi_m5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf extPred, + PatLeaf immPred, ComplexPattern addrPred, + SDNodeXForm xformFunc, InstHexagon MI> { + let AddedComplexity = 190 in + def : Pat <(stOp (add (ldOp IntRegs:$addr), immPred:$subend), + IntRegs:$addr), + (MI IntRegs:$addr, #0, (xformFunc immPred:$subend) )>; + + let AddedComplexity = 195 in + def : Pat<(stOp (add (ldOp (add IntRegs:$base, extPred:$offset)), + immPred:$subend), + (add IntRegs:$base, extPred:$offset)), + (MI IntRegs:$base, extPred:$offset, (xformFunc immPred:$subend))>; +} + +multiclass MemOpi_m5ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > { + // Half Word + defm : MemOpi_m5Pats <ldOpHalf, truncstorei16, u6_1ExtPred, m5HImmPred, + ADDRriU6_1, MEMOPIMM_HALF, MemOPh_SUBi_V4>; + // Byte + defm : MemOpi_m5Pats <ldOpByte, truncstorei8, u6ExtPred, m5BImmPred, + ADDRriU6_0, MEMOPIMM_BYTE, MemOPb_SUBi_V4>; +} + +let Predicates = [HasV4T, UseMEMOP] in { + defm : MemOpi_m5ExtType<zextloadi8, zextloadi16>; // zero extend + defm : MemOpi_m5ExtType<sextloadi8, sextloadi16>; // sign extend + defm : MemOpi_m5ExtType<extloadi8, extloadi16>; // any extend + + // Word + defm : MemOpi_m5Pats <load, store, u6_2ExtPred, m5ImmPred, + ADDRriU6_2, MEMOPIMM, MemOPw_SUBi_V4>; +} + +//===----------------------------------------------------------------------===// +// Multiclass to define 'def Pats' for bit operations on the memory. +// mem[bhw](Rs+#0) = [clrbit|setbit](#U5) +// mem[bhw](Rs+#u6) = [clrbit|setbit](#U5) +//===----------------------------------------------------------------------===// + +multiclass MemOpi_bitPats <PatFrag ldOp, PatFrag stOp, PatLeaf immPred, + PatLeaf extPred, ComplexPattern addrPred, + SDNodeXForm xformFunc, InstHexagon MI, SDNode OpNode> { + + // mem[bhw](Rs+#u6:[012]) = [clrbit|setbit](#U5) + let AddedComplexity = 250 in + def : Pat<(stOp (OpNode (ldOp (add IntRegs:$base, extPred:$offset)), + immPred:$bitend), + (add IntRegs:$base, extPred:$offset)), + (MI IntRegs:$base, extPred:$offset, (xformFunc immPred:$bitend))>; + + // mem[bhw](Rs+#0) = [clrbit|setbit](#U5) + let AddedComplexity = 225 in + def : Pat <(stOp (OpNode (ldOp addrPred:$addr), immPred:$bitend), + addrPred:$addr), + (MI IntRegs:$addr, #0, (xformFunc immPred:$bitend))>; +} + +multiclass MemOpi_bitExtType<PatFrag ldOpByte, PatFrag ldOpHalf > { + // Byte - clrbit + defm : MemOpi_bitPats<ldOpByte, truncstorei8, Clr3ImmPred, u6ExtPred, + ADDRriU6_0, CLRMEMIMM_BYTE, MemOPb_CLRBITi_V4, and>; + // Byte - setbit + defm : MemOpi_bitPats<ldOpByte, truncstorei8, Set3ImmPred, u6ExtPred, + ADDRriU6_0, SETMEMIMM_BYTE, MemOPb_SETBITi_V4, or>; + // Half Word - clrbit + defm : MemOpi_bitPats<ldOpHalf, truncstorei16, Clr4ImmPred, u6_1ExtPred, + ADDRriU6_1, CLRMEMIMM_SHORT, MemOPh_CLRBITi_V4, and>; + // Half Word - setbit + defm : MemOpi_bitPats<ldOpHalf, truncstorei16, Set4ImmPred, u6_1ExtPred, + ADDRriU6_1, SETMEMIMM_SHORT, MemOPh_SETBITi_V4, or>; +} + +let Predicates = [HasV4T, UseMEMOP] in { + // mem[bh](Rs+#0) = [clrbit|setbit](#U5) + // mem[bh](Rs+#u6:[01]) = [clrbit|setbit](#U5) + defm : MemOpi_bitExtType<zextloadi8, zextloadi16>; // zero extend + defm : MemOpi_bitExtType<sextloadi8, sextloadi16>; // sign extend + defm : MemOpi_bitExtType<extloadi8, extloadi16>; // any extend + + // memw(Rs+#0) = [clrbit|setbit](#U5) + // memw(Rs+#u6:2) = [clrbit|setbit](#U5) + defm : MemOpi_bitPats<load, store, Clr5ImmPred, u6_2ExtPred, ADDRriU6_2, + CLRMEMIMM, MemOPw_CLRBITi_V4, and>; + defm : MemOpi_bitPats<load, store, Set5ImmPred, u6_2ExtPred, ADDRriU6_2, + SETMEMIMM, MemOPw_SETBITi_V4, or>; +} + +//===----------------------------------------------------------------------===// +// Multiclass to define 'def Pats' for ALU operations on the memory +// where addend is a register. +// mem[bhw](Rs+#0) [+-&|]= Rt +// mem[bhw](Rs+#U6:[012]) [+-&|]= Rt +//===----------------------------------------------------------------------===// + +multiclass MemOpr_Pats <PatFrag ldOp, PatFrag stOp, ComplexPattern addrPred, + PatLeaf extPred, InstHexagon MI, SDNode OpNode> { + let AddedComplexity = 141 in + // mem[bhw](Rs+#0) [+-&|]= Rt + def : Pat <(stOp (OpNode (ldOp addrPred:$addr), (i32 IntRegs:$addend)), + addrPred:$addr), + (MI IntRegs:$addr, #0, (i32 IntRegs:$addend) )>; + + // mem[bhw](Rs+#U6:[012]) [+-&|]= Rt + let AddedComplexity = 150 in + def : Pat <(stOp (OpNode (ldOp (add IntRegs:$base, extPred:$offset)), + (i32 IntRegs:$orend)), + (add IntRegs:$base, extPred:$offset)), + (MI IntRegs:$base, extPred:$offset, (i32 IntRegs:$orend) )>; +} + +multiclass MemOPr_ALUOp<PatFrag ldOp, PatFrag stOp, + ComplexPattern addrPred, PatLeaf extPred, + InstHexagon addMI, InstHexagon subMI, + InstHexagon andMI, InstHexagon orMI > { + + defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, addMI, add>; + defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, subMI, sub>; + defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, andMI, and>; + defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, orMI, or>; +} + +multiclass MemOPr_ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > { + // Half Word + defm : MemOPr_ALUOp <ldOpHalf, truncstorei16, ADDRriU6_1, u6_1ExtPred, + MemOPh_ADDr_V4, MemOPh_SUBr_V4, + MemOPh_ANDr_V4, MemOPh_ORr_V4>; + // Byte + defm : MemOPr_ALUOp <ldOpByte, truncstorei8, ADDRriU6_0, u6ExtPred, + MemOPb_ADDr_V4, MemOPb_SUBr_V4, + MemOPb_ANDr_V4, MemOPb_ORr_V4>; +} + +// Define 'def Pats' for MemOps with register addend. +let Predicates = [HasV4T, UseMEMOP] in { + // Byte, Half Word + defm : MemOPr_ExtType<zextloadi8, zextloadi16>; // zero extend + defm : MemOPr_ExtType<sextloadi8, sextloadi16>; // sign extend + defm : MemOPr_ExtType<extloadi8, extloadi16>; // any extend + // Word + defm : MemOPr_ALUOp <load, store, ADDRriU6_2, u6_2ExtPred, MemOPw_ADDr_V4, + MemOPw_SUBr_V4, MemOPw_ANDr_V4, MemOPw_ORr_V4 >; +} + +//===----------------------------------------------------------------------===// +// XTYPE/PRED + +//===----------------------------------------------------------------------===// + +// Hexagon V4 only supports these flavors of byte/half compare instructions: +// EQ/GT/GTU. Other flavors like GE/GEU/LT/LTU/LE/LEU are not supported by +// hardware. However, compiler can still implement these patterns through +// appropriate patterns combinations based on current implemented patterns. +// The implemented patterns are: EQ/GT/GTU. +// Missing patterns are: GE/GEU/LT/LTU/LE/LEU. + +// Following instruction is not being extended as it results into the +// incorrect code for negative numbers. +// Pd=cmpb.eq(Rs,#u8) + +// p=!cmp.eq(r1,r2) +let isCompare = 1, validSubTargets = HasV4SubT in +def CMPnotEQ_rr : ALU32_rr<(outs PredRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = !cmp.eq($src1, $src2)", + [(set (i1 PredRegs:$dst), + (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2)))]>, + Requires<[HasV4T]>; + +// p=!cmp.eq(r1,#s10) +let isCompare = 1, validSubTargets = HasV4SubT in +def CMPnotEQ_ri : ALU32_ri<(outs PredRegs:$dst), + (ins IntRegs:$src1, s10Ext:$src2), + "$dst = !cmp.eq($src1, #$src2)", + [(set (i1 PredRegs:$dst), + (setne (i32 IntRegs:$src1), s10ImmPred:$src2))]>, + Requires<[HasV4T]>; + +// p=!cmp.gt(r1,r2) +let isCompare = 1, validSubTargets = HasV4SubT in +def CMPnotGT_rr : ALU32_rr<(outs PredRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = !cmp.gt($src1, $src2)", + [(set (i1 PredRegs:$dst), + (not (setgt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>, + Requires<[HasV4T]>; + +// p=!cmp.gt(r1,#s10) +let isCompare = 1, validSubTargets = HasV4SubT in +def CMPnotGT_ri : ALU32_ri<(outs PredRegs:$dst), + (ins IntRegs:$src1, s10Ext:$src2), + "$dst = !cmp.gt($src1, #$src2)", + [(set (i1 PredRegs:$dst), + (not (setgt (i32 IntRegs:$src1), s10ImmPred:$src2)))]>, + Requires<[HasV4T]>; + +// p=!cmp.gtu(r1,r2) +let isCompare = 1, validSubTargets = HasV4SubT in +def CMPnotGTU_rr : ALU32_rr<(outs PredRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = !cmp.gtu($src1, $src2)", + [(set (i1 PredRegs:$dst), + (not (setugt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>, + Requires<[HasV4T]>; + +// p=!cmp.gtu(r1,#u9) +let isCompare = 1, validSubTargets = HasV4SubT in +def CMPnotGTU_ri : ALU32_ri<(outs PredRegs:$dst), + (ins IntRegs:$src1, u9Ext:$src2), + "$dst = !cmp.gtu($src1, #$src2)", + [(set (i1 PredRegs:$dst), + (not (setugt (i32 IntRegs:$src1), u9ImmPred:$src2)))]>, + Requires<[HasV4T]>; + +let isCompare = 1, validSubTargets = HasV4SubT in +def CMPbEQri_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, u8Imm:$src2), + "$dst = cmpb.eq($src1, #$src2)", + [(set (i1 PredRegs:$dst), + (seteq (and (i32 IntRegs:$src1), 255), u8ImmPred:$src2))]>, + Requires<[HasV4T]>; + +def : Pat <(brcond (i1 (setne (and (i32 IntRegs:$src1), 255), u8ImmPred:$src2)), + bb:$offset), + (JMP_f (CMPbEQri_V4 (i32 IntRegs:$src1), u8ImmPred:$src2), + bb:$offset)>, + Requires<[HasV4T]>; + +// Pd=cmpb.eq(Rs,Rt) +let isCompare = 1, validSubTargets = HasV4SubT in +def CMPbEQrr_ubub_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = cmpb.eq($src1, $src2)", + [(set (i1 PredRegs:$dst), + (seteq (and (xor (i32 IntRegs:$src1), + (i32 IntRegs:$src2)), 255), 0))]>, + Requires<[HasV4T]>; + +// Pd=cmpb.eq(Rs,Rt) +let isCompare = 1, validSubTargets = HasV4SubT in +def CMPbEQrr_sbsb_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = cmpb.eq($src1, $src2)", + [(set (i1 PredRegs:$dst), + (seteq (shl (i32 IntRegs:$src1), (i32 24)), + (shl (i32 IntRegs:$src2), (i32 24))))]>, + Requires<[HasV4T]>; + +// Pd=cmpb.gt(Rs,Rt) +let isCompare = 1, validSubTargets = HasV4SubT in +def CMPbGTrr_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = cmpb.gt($src1, $src2)", + [(set (i1 PredRegs:$dst), + (setgt (shl (i32 IntRegs:$src1), (i32 24)), + (shl (i32 IntRegs:$src2), (i32 24))))]>, + Requires<[HasV4T]>; + +// Pd=cmpb.gtu(Rs,#u7) +let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 7, +isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPbGTU", InputType = "imm" in +def CMPbGTUri_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, u7Ext:$src2), + "$dst = cmpb.gtu($src1, #$src2)", + [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 255), + u7ExtPred:$src2))]>, + Requires<[HasV4T]>, ImmRegRel; + +// SDNode for converting immediate C to C-1. +def DEC_CONST_BYTE : SDNodeXForm<imm, [{ + // Return the byte immediate const-1 as an SDNode. + int32_t imm = N->getSExtValue(); + return XformU7ToU7M1Imm(imm); +}]>; + +// For the sequence +// zext( seteq ( and(Rs, 255), u8)) +// Generate +// Pd=cmpb.eq(Rs, #u8) +// if (Pd.new) Rd=#1 +// if (!Pd.new) Rd=#0 +def : Pat <(i32 (zext (i1 (seteq (i32 (and (i32 IntRegs:$Rs), 255)), + u8ExtPred:$u8)))), + (i32 (TFR_condset_ii (i1 (CMPbEQri_V4 (i32 IntRegs:$Rs), + (u8ExtPred:$u8))), + 1, 0))>, + Requires<[HasV4T]>; + +// For the sequence +// zext( setne ( and(Rs, 255), u8)) +// Generate +// Pd=cmpb.eq(Rs, #u8) +// if (Pd.new) Rd=#0 +// if (!Pd.new) Rd=#1 +def : Pat <(i32 (zext (i1 (setne (i32 (and (i32 IntRegs:$Rs), 255)), + u8ExtPred:$u8)))), + (i32 (TFR_condset_ii (i1 (CMPbEQri_V4 (i32 IntRegs:$Rs), + (u8ExtPred:$u8))), + 0, 1))>, + Requires<[HasV4T]>; + +// For the sequence +// zext( seteq (Rs, and(Rt, 255))) +// Generate +// Pd=cmpb.eq(Rs, Rt) +// if (Pd.new) Rd=#1 +// if (!Pd.new) Rd=#0 +def : Pat <(i32 (zext (i1 (seteq (i32 IntRegs:$Rt), + (i32 (and (i32 IntRegs:$Rs), 255)))))), + (i32 (TFR_condset_ii (i1 (CMPbEQrr_ubub_V4 (i32 IntRegs:$Rs), + (i32 IntRegs:$Rt))), + 1, 0))>, + Requires<[HasV4T]>; + +// For the sequence +// zext( setne (Rs, and(Rt, 255))) +// Generate +// Pd=cmpb.eq(Rs, Rt) +// if (Pd.new) Rd=#0 +// if (!Pd.new) Rd=#1 +def : Pat <(i32 (zext (i1 (setne (i32 IntRegs:$Rt), + (i32 (and (i32 IntRegs:$Rs), 255)))))), + (i32 (TFR_condset_ii (i1 (CMPbEQrr_ubub_V4 (i32 IntRegs:$Rs), + (i32 IntRegs:$Rt))), + 0, 1))>, + Requires<[HasV4T]>; + +// For the sequence +// zext( setugt ( and(Rs, 255), u8)) +// Generate +// Pd=cmpb.gtu(Rs, #u8) +// if (Pd.new) Rd=#1 +// if (!Pd.new) Rd=#0 +def : Pat <(i32 (zext (i1 (setugt (i32 (and (i32 IntRegs:$Rs), 255)), + u8ExtPred:$u8)))), + (i32 (TFR_condset_ii (i1 (CMPbGTUri_V4 (i32 IntRegs:$Rs), + (u8ExtPred:$u8))), + 1, 0))>, + Requires<[HasV4T]>; + +// For the sequence +// zext( setugt ( and(Rs, 254), u8)) +// Generate +// Pd=cmpb.gtu(Rs, #u8) +// if (Pd.new) Rd=#1 +// if (!Pd.new) Rd=#0 +def : Pat <(i32 (zext (i1 (setugt (i32 (and (i32 IntRegs:$Rs), 254)), + u8ExtPred:$u8)))), + (i32 (TFR_condset_ii (i1 (CMPbGTUri_V4 (i32 IntRegs:$Rs), + (u8ExtPred:$u8))), + 1, 0))>, + Requires<[HasV4T]>; + +// For the sequence +// zext( setult ( Rs, Rt)) +// Generate +// Pd=cmp.ltu(Rs, Rt) +// if (Pd.new) Rd=#1 +// if (!Pd.new) Rd=#0 +// cmp.ltu(Rs, Rt) -> cmp.gtu(Rt, Rs) +def : Pat <(i32 (zext (i1 (setult (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), + (i32 (TFR_condset_ii (i1 (CMPGTUrr (i32 IntRegs:$Rt), + (i32 IntRegs:$Rs))), + 1, 0))>, + Requires<[HasV4T]>; + +// For the sequence +// zext( setlt ( Rs, Rt)) +// Generate +// Pd=cmp.lt(Rs, Rt) +// if (Pd.new) Rd=#1 +// if (!Pd.new) Rd=#0 +// cmp.lt(Rs, Rt) -> cmp.gt(Rt, Rs) +def : Pat <(i32 (zext (i1 (setlt (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), + (i32 (TFR_condset_ii (i1 (CMPGTrr (i32 IntRegs:$Rt), + (i32 IntRegs:$Rs))), + 1, 0))>, + Requires<[HasV4T]>; + +// For the sequence +// zext( setugt ( Rs, Rt)) +// Generate +// Pd=cmp.gtu(Rs, Rt) +// if (Pd.new) Rd=#1 +// if (!Pd.new) Rd=#0 +def : Pat <(i32 (zext (i1 (setugt (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), + (i32 (TFR_condset_ii (i1 (CMPGTUrr (i32 IntRegs:$Rs), + (i32 IntRegs:$Rt))), + 1, 0))>, + Requires<[HasV4T]>; + +// This pattern interefers with coremark performance, not implementing at this +// time. +// For the sequence +// zext( setgt ( Rs, Rt)) +// Generate +// Pd=cmp.gt(Rs, Rt) +// if (Pd.new) Rd=#1 +// if (!Pd.new) Rd=#0 + +// For the sequence +// zext( setuge ( Rs, Rt)) +// Generate +// Pd=cmp.ltu(Rs, Rt) +// if (Pd.new) Rd=#0 +// if (!Pd.new) Rd=#1 +// cmp.ltu(Rs, Rt) -> cmp.gtu(Rt, Rs) +def : Pat <(i32 (zext (i1 (setuge (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), + (i32 (TFR_condset_ii (i1 (CMPGTUrr (i32 IntRegs:$Rt), + (i32 IntRegs:$Rs))), + 0, 1))>, + Requires<[HasV4T]>; + +// For the sequence +// zext( setge ( Rs, Rt)) +// Generate +// Pd=cmp.lt(Rs, Rt) +// if (Pd.new) Rd=#0 +// if (!Pd.new) Rd=#1 +// cmp.lt(Rs, Rt) -> cmp.gt(Rt, Rs) +def : Pat <(i32 (zext (i1 (setge (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), + (i32 (TFR_condset_ii (i1 (CMPGTrr (i32 IntRegs:$Rt), + (i32 IntRegs:$Rs))), + 0, 1))>, + Requires<[HasV4T]>; + +// For the sequence +// zext( setule ( Rs, Rt)) +// Generate +// Pd=cmp.gtu(Rs, Rt) +// if (Pd.new) Rd=#0 +// if (!Pd.new) Rd=#1 +def : Pat <(i32 (zext (i1 (setule (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), + (i32 (TFR_condset_ii (i1 (CMPGTUrr (i32 IntRegs:$Rs), + (i32 IntRegs:$Rt))), + 0, 1))>, + Requires<[HasV4T]>; + +// For the sequence +// zext( setle ( Rs, Rt)) +// Generate +// Pd=cmp.gt(Rs, Rt) +// if (Pd.new) Rd=#0 +// if (!Pd.new) Rd=#1 +def : Pat <(i32 (zext (i1 (setle (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), + (i32 (TFR_condset_ii (i1 (CMPGTrr (i32 IntRegs:$Rs), + (i32 IntRegs:$Rt))), + 0, 1))>, + Requires<[HasV4T]>; + +// For the sequence +// zext( setult ( and(Rs, 255), u8)) +// Use the isdigit transformation below + +// Generate code of the form 'mux_ii(cmpbgtu(Rdd, C-1),0,1)' +// for C code of the form r = ((c>='0') & (c<='9')) ? 1 : 0;. +// The isdigit transformation relies on two 'clever' aspects: +// 1) The data type is unsigned which allows us to eliminate a zero test after +// biasing the expression by 48. We are depending on the representation of +// the unsigned types, and semantics. +// 2) The front end has converted <= 9 into < 10 on entry to LLVM +// +// For the C code: +// retval = ((c>='0') & (c<='9')) ? 1 : 0; +// The code is transformed upstream of llvm into +// retval = (c-48) < 10 ? 1 : 0; +let AddedComplexity = 139 in +def : Pat <(i32 (zext (i1 (setult (i32 (and (i32 IntRegs:$src1), 255)), + u7StrictPosImmPred:$src2)))), + (i32 (MUX_ii (i1 (CMPbGTUri_V4 (i32 IntRegs:$src1), + (DEC_CONST_BYTE u7StrictPosImmPred:$src2))), + 0, 1))>, + Requires<[HasV4T]>; + +// Pd=cmpb.gtu(Rs,Rt) +let isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPbGTU", +InputType = "reg" in +def CMPbGTUrr_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = cmpb.gtu($src1, $src2)", + [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 255), + (and (i32 IntRegs:$src2), 255)))]>, + Requires<[HasV4T]>, ImmRegRel; + +// Following instruction is not being extended as it results into the incorrect +// code for negative numbers. + +// Signed half compare(.eq) ri. +// Pd=cmph.eq(Rs,#s8) +let isCompare = 1, validSubTargets = HasV4SubT in +def CMPhEQri_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, s8Imm:$src2), + "$dst = cmph.eq($src1, #$src2)", + [(set (i1 PredRegs:$dst), (seteq (and (i32 IntRegs:$src1), 65535), + s8ImmPred:$src2))]>, + Requires<[HasV4T]>; + +// Signed half compare(.eq) rr. +// Case 1: xor + and, then compare: +// r0=xor(r0,r1) +// r0=and(r0,#0xffff) +// p0=cmp.eq(r0,#0) +// Pd=cmph.eq(Rs,Rt) +let isCompare = 1, validSubTargets = HasV4SubT in +def CMPhEQrr_xor_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = cmph.eq($src1, $src2)", + [(set (i1 PredRegs:$dst), (seteq (and (xor (i32 IntRegs:$src1), + (i32 IntRegs:$src2)), + 65535), 0))]>, + Requires<[HasV4T]>; + +// Signed half compare(.eq) rr. +// Case 2: shift left 16 bits then compare: +// r0=asl(r0,16) +// r1=asl(r1,16) +// p0=cmp.eq(r0,r1) +// Pd=cmph.eq(Rs,Rt) +let isCompare = 1, validSubTargets = HasV4SubT in +def CMPhEQrr_shl_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = cmph.eq($src1, $src2)", + [(set (i1 PredRegs:$dst), + (seteq (shl (i32 IntRegs:$src1), (i32 16)), + (shl (i32 IntRegs:$src2), (i32 16))))]>, + Requires<[HasV4T]>; + +/* Incorrect Pattern -- immediate should be right shifted before being +used in the cmph.gt instruction. +// Signed half compare(.gt) ri. +// Pd=cmph.gt(Rs,#s8) + +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8, +isCompare = 1, validSubTargets = HasV4SubT in +def CMPhGTri_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, s8Ext:$src2), + "$dst = cmph.gt($src1, #$src2)", + [(set (i1 PredRegs:$dst), + (setgt (shl (i32 IntRegs:$src1), (i32 16)), + s8ExtPred:$src2))]>, + Requires<[HasV4T]>; +*/ + +// Signed half compare(.gt) rr. +// Pd=cmph.gt(Rs,Rt) +let isCompare = 1, validSubTargets = HasV4SubT in +def CMPhGTrr_shl_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = cmph.gt($src1, $src2)", + [(set (i1 PredRegs:$dst), + (setgt (shl (i32 IntRegs:$src1), (i32 16)), + (shl (i32 IntRegs:$src2), (i32 16))))]>, + Requires<[HasV4T]>; + +// Unsigned half compare rr (.gtu). +// Pd=cmph.gtu(Rs,Rt) +let isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPhGTU", +InputType = "reg" in +def CMPhGTUrr_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = cmph.gtu($src1, $src2)", + [(set (i1 PredRegs:$dst), + (setugt (and (i32 IntRegs:$src1), 65535), + (and (i32 IntRegs:$src2), 65535)))]>, + Requires<[HasV4T]>, ImmRegRel; + +// Unsigned half compare ri (.gtu). +// Pd=cmph.gtu(Rs,#u7) +let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 7, +isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPhGTU", +InputType = "imm" in +def CMPhGTUri_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, u7Ext:$src2), + "$dst = cmph.gtu($src1, #$src2)", + [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 65535), + u7ExtPred:$src2))]>, + Requires<[HasV4T]>, ImmRegRel; + +let validSubTargets = HasV4SubT in +def NTSTBIT_rr : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = !tstbit($src1, $src2)", + [(set (i1 PredRegs:$dst), + (seteq (and (shl 1, (i32 IntRegs:$src2)), (i32 IntRegs:$src1)), 0))]>, + Requires<[HasV4T]>; + +let validSubTargets = HasV4SubT in +def NTSTBIT_ri : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), + "$dst = !tstbit($src1, $src2)", + [(set (i1 PredRegs:$dst), + (seteq (and (shl 1, u5ImmPred:$src2), (i32 IntRegs:$src1)), 0))]>, + Requires<[HasV4T]>; + +//===----------------------------------------------------------------------===// +// XTYPE/PRED - +//===----------------------------------------------------------------------===// + +//Deallocate frame and return. +// dealloc_return +let isReturn = 1, isTerminator = 1, isBarrier = 1, isPredicable = 1, + Defs = [R29, R30, R31, PC], Uses = [R30], neverHasSideEffects = 1 in { +let validSubTargets = HasV4SubT in + def DEALLOC_RET_V4 : LD0Inst<(outs), (ins), + "dealloc_return", + []>, + Requires<[HasV4T]>; +} + +// Restore registers and dealloc return function call. +let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1, + Defs = [R29, R30, R31, PC] in { +let validSubTargets = HasV4SubT in + def RESTORE_DEALLOC_RET_JMP_V4 : JInst<(outs), + (ins calltarget:$dst), + "jump $dst", + []>, + Requires<[HasV4T]>; +} + +// Restore registers and dealloc frame before a tail call. +let isCall = 1, isBarrier = 1, + Defs = [R29, R30, R31, PC] in { +let validSubTargets = HasV4SubT in + def RESTORE_DEALLOC_BEFORE_TAILCALL_V4 : JInst<(outs), + (ins calltarget:$dst), + "call $dst", + []>, + Requires<[HasV4T]>; +} + +// Save registers function call. +let isCall = 1, isBarrier = 1, + Uses = [R29, R31] in { + def SAVE_REGISTERS_CALL_V4 : JInst<(outs), + (ins calltarget:$dst), + "call $dst // Save_calle_saved_registers", + []>, + Requires<[HasV4T]>; +} + +// if (Ps) dealloc_return +let isReturn = 1, isTerminator = 1, + Defs = [R29, R30, R31, PC], Uses = [R30], neverHasSideEffects = 1, + isPredicated = 1 in { +let validSubTargets = HasV4SubT in + def DEALLOC_RET_cPt_V4 : LD0Inst<(outs), + (ins PredRegs:$src1), + "if ($src1) dealloc_return", + []>, + Requires<[HasV4T]>; +} + +// if (!Ps) dealloc_return +let isReturn = 1, isTerminator = 1, + Defs = [R29, R30, R31, PC], Uses = [R30], neverHasSideEffects = 1, + isPredicated = 1, isPredicatedFalse = 1 in { +let validSubTargets = HasV4SubT in + def DEALLOC_RET_cNotPt_V4 : LD0Inst<(outs), (ins PredRegs:$src1), + "if (!$src1) dealloc_return", + []>, + Requires<[HasV4T]>; +} + +// if (Ps.new) dealloc_return:nt +let isReturn = 1, isTerminator = 1, + Defs = [R29, R30, R31, PC], Uses = [R30], neverHasSideEffects = 1, + isPredicated = 1 in { +let validSubTargets = HasV4SubT in + def DEALLOC_RET_cdnPnt_V4 : LD0Inst<(outs), (ins PredRegs:$src1), + "if ($src1.new) dealloc_return:nt", + []>, + Requires<[HasV4T]>; +} + +// if (!Ps.new) dealloc_return:nt +let isReturn = 1, isTerminator = 1, + Defs = [R29, R30, R31, PC], Uses = [R30], neverHasSideEffects = 1, + isPredicated = 1, isPredicatedFalse = 1 in { +let validSubTargets = HasV4SubT in + def DEALLOC_RET_cNotdnPnt_V4 : LD0Inst<(outs), (ins PredRegs:$src1), + "if (!$src1.new) dealloc_return:nt", + []>, + Requires<[HasV4T]>; +} + +// if (Ps.new) dealloc_return:t +let isReturn = 1, isTerminator = 1, + Defs = [R29, R30, R31, PC], Uses = [R30], neverHasSideEffects = 1, + isPredicated = 1 in { +let validSubTargets = HasV4SubT in + def DEALLOC_RET_cdnPt_V4 : LD0Inst<(outs), (ins PredRegs:$src1), + "if ($src1.new) dealloc_return:t", + []>, + Requires<[HasV4T]>; +} + +// if (!Ps.new) dealloc_return:nt +let isReturn = 1, isTerminator = 1, + Defs = [R29, R30, R31, PC], Uses = [R30], neverHasSideEffects = 1, + isPredicated = 1, isPredicatedFalse = 1 in { +let validSubTargets = HasV4SubT in + def DEALLOC_RET_cNotdnPt_V4 : LD0Inst<(outs), (ins PredRegs:$src1), + "if (!$src1.new) dealloc_return:t", + []>, + Requires<[HasV4T]>; +} + +// Load/Store with absolute addressing mode +// memw(#u6)=Rt + +multiclass ST_Abs_Predbase<string mnemonic, RegisterClass RC, bit isNot, + bit isPredNew> { + let isPredicatedNew = isPredNew in + def NAME#_V4 : STInst2<(outs), + (ins PredRegs:$src1, u0AlwaysExt:$absaddr, RC: $src2), + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#mnemonic#"(##$absaddr) = $src2", + []>, + Requires<[HasV4T]>; +} + +multiclass ST_Abs_Pred<string mnemonic, RegisterClass RC, bit PredNot> { + let isPredicatedFalse = PredNot in { + defm _c#NAME : ST_Abs_Predbase<mnemonic, RC, PredNot, 0>; + // Predicate new + defm _cdn#NAME : ST_Abs_Predbase<mnemonic, RC, PredNot, 1>; + } +} + +let isNVStorable = 1, isExtended = 1, neverHasSideEffects = 1 in +multiclass ST_Abs<string mnemonic, string CextOp, RegisterClass RC> { + let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in { + let opExtendable = 0, isPredicable = 1 in + def NAME#_V4 : STInst2<(outs), + (ins u0AlwaysExt:$absaddr, RC:$src), + mnemonic#"(##$absaddr) = $src", + []>, + Requires<[HasV4T]>; + + let opExtendable = 1, isPredicated = 1 in { + defm Pt : ST_Abs_Pred<mnemonic, RC, 0>; + defm NotPt : ST_Abs_Pred<mnemonic, RC, 1>; + } + } +} + +multiclass ST_Abs_Predbase_nv<string mnemonic, RegisterClass RC, bit isNot, + bit isPredNew> { + let isPredicatedNew = isPredNew in + def NAME#_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, u0AlwaysExt:$absaddr, RC: $src2), + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#mnemonic#"(##$absaddr) = $src2.new", + []>, + Requires<[HasV4T]>; +} + +multiclass ST_Abs_Pred_nv<string mnemonic, RegisterClass RC, bit PredNot> { + let isPredicatedFalse = PredNot in { + defm _c#NAME : ST_Abs_Predbase_nv<mnemonic, RC, PredNot, 0>; + // Predicate new + defm _cdn#NAME : ST_Abs_Predbase_nv<mnemonic, RC, PredNot, 1>; + } +} + +let mayStore = 1, isNVStore = 1, isExtended = 1, neverHasSideEffects = 1 in +multiclass ST_Abs_nv<string mnemonic, string CextOp, RegisterClass RC> { + let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in { + let opExtendable = 0, isPredicable = 1 in + def NAME#_nv_V4 : NVInst_V4<(outs), + (ins u0AlwaysExt:$absaddr, RC:$src), + mnemonic#"(##$absaddr) = $src.new", + []>, + Requires<[HasV4T]>; + + let opExtendable = 1, isPredicated = 1 in { + defm Pt : ST_Abs_Pred_nv<mnemonic, RC, 0>; + defm NotPt : ST_Abs_Pred_nv<mnemonic, RC, 1>; + } + } +} + +let addrMode = Absolute in { + let accessSize = ByteAccess in + defm STrib_abs : ST_Abs<"memb", "STrib", IntRegs>, + ST_Abs_nv<"memb", "STrib", IntRegs>, AddrModeRel; + + let accessSize = HalfWordAccess in + defm STrih_abs : ST_Abs<"memh", "STrih", IntRegs>, + ST_Abs_nv<"memh", "STrih", IntRegs>, AddrModeRel; + + let accessSize = WordAccess in + defm STriw_abs : ST_Abs<"memw", "STriw", IntRegs>, + ST_Abs_nv<"memw", "STriw", IntRegs>, AddrModeRel; + + let accessSize = DoubleWordAccess, isNVStorable = 0 in + defm STrid_abs : ST_Abs<"memd", "STrid", DoubleRegs>, AddrModeRel; +} + +let Predicates = [HasV4T], AddedComplexity = 30 in { +def : Pat<(truncstorei8 (i32 IntRegs:$src1), + (HexagonCONST32 tglobaladdr:$absaddr)), + (STrib_abs_V4 tglobaladdr: $absaddr, IntRegs: $src1)>; + +def : Pat<(truncstorei16 (i32 IntRegs:$src1), + (HexagonCONST32 tglobaladdr:$absaddr)), + (STrih_abs_V4 tglobaladdr: $absaddr, IntRegs: $src1)>; + +def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32 tglobaladdr:$absaddr)), + (STriw_abs_V4 tglobaladdr: $absaddr, IntRegs: $src1)>; + +def : Pat<(store (i64 DoubleRegs:$src1), + (HexagonCONST32 tglobaladdr:$absaddr)), + (STrid_abs_V4 tglobaladdr: $absaddr, DoubleRegs: $src1)>; +} + +//===----------------------------------------------------------------------===// +// multiclass for store instructions with GP-relative addressing mode. +// mem[bhwd](#global)=Rt +// if ([!]Pv[.new]) mem[bhwd](##global) = Rt +//===----------------------------------------------------------------------===// +let mayStore = 1, isNVStorable = 1 in +multiclass ST_GP<string mnemonic, string BaseOp, RegisterClass RC> { + let BaseOpcode = BaseOp, isPredicable = 1 in + def NAME#_V4 : STInst2<(outs), + (ins globaladdress:$global, RC:$src), + mnemonic#"(#$global) = $src", + []>; + + // When GP-relative instructions are predicated, their addressing mode is + // changed to absolute and they are always constant extended. + let BaseOpcode = BaseOp, isExtended = 1, opExtendable = 1, + isPredicated = 1 in { + defm Pt : ST_Abs_Pred <mnemonic, RC, 0>; + defm NotPt : ST_Abs_Pred <mnemonic, RC, 1>; + } +} + +let mayStore = 1, isNVStore = 1 in +multiclass ST_GP_nv<string mnemonic, string BaseOp, RegisterClass RC> { + let BaseOpcode = BaseOp, isPredicable = 1 in + def NAME#_nv_V4 : NVInst_V4<(outs), + (ins u0AlwaysExt:$global, RC:$src), + mnemonic#"(#$global) = $src.new", + []>, + Requires<[HasV4T]>; + + // When GP-relative instructions are predicated, their addressing mode is + // changed to absolute and they are always constant extended. + let BaseOpcode = BaseOp, isExtended = 1, opExtendable = 1, + isPredicated = 1 in { + defm Pt : ST_Abs_Pred_nv<mnemonic, RC, 0>; + defm NotPt : ST_Abs_Pred_nv<mnemonic, RC, 1>; + } +} + +let validSubTargets = HasV4SubT, neverHasSideEffects = 1 in { + let isNVStorable = 0 in + defm STd_GP : ST_GP <"memd", "STd_GP", DoubleRegs>, PredNewRel; + + defm STb_GP : ST_GP<"memb", "STb_GP", IntRegs>, + ST_GP_nv<"memb", "STb_GP", IntRegs>, NewValueRel; + defm STh_GP : ST_GP<"memh", "STh_GP", IntRegs>, + ST_GP_nv<"memh", "STh_GP", IntRegs>, NewValueRel; + defm STw_GP : ST_GP<"memw", "STw_GP", IntRegs>, + ST_GP_nv<"memw", "STw_GP", IntRegs>, NewValueRel; +} + +// 64 bit atomic store +def : Pat <(atomic_store_64 (HexagonCONST32_GP tglobaladdr:$global), + (i64 DoubleRegs:$src1)), + (STd_GP_V4 tglobaladdr:$global, (i64 DoubleRegs:$src1))>, + Requires<[HasV4T]>; + +// Map from store(globaladdress) -> memd(#foo) +let AddedComplexity = 100 in +def : Pat <(store (i64 DoubleRegs:$src1), + (HexagonCONST32_GP tglobaladdr:$global)), + (STd_GP_V4 tglobaladdr:$global, (i64 DoubleRegs:$src1))>; + +// 8 bit atomic store +def : Pat < (atomic_store_8 (HexagonCONST32_GP tglobaladdr:$global), + (i32 IntRegs:$src1)), + (STb_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>; + +// Map from store(globaladdress) -> memb(#foo) +let AddedComplexity = 100 in +def : Pat<(truncstorei8 (i32 IntRegs:$src1), + (HexagonCONST32_GP tglobaladdr:$global)), + (STb_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>; + +// Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1" +// to "r0 = 1; memw(#foo) = r0" +let AddedComplexity = 100 in +def : Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)), + (STb_GP_V4 tglobaladdr:$global, (TFRI 1))>; + +def : Pat<(atomic_store_16 (HexagonCONST32_GP tglobaladdr:$global), + (i32 IntRegs:$src1)), + (STh_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>; + +// Map from store(globaladdress) -> memh(#foo) +let AddedComplexity = 100 in +def : Pat<(truncstorei16 (i32 IntRegs:$src1), + (HexagonCONST32_GP tglobaladdr:$global)), + (STh_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>; + +// 32 bit atomic store +def : Pat<(atomic_store_32 (HexagonCONST32_GP tglobaladdr:$global), + (i32 IntRegs:$src1)), + (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>; + +// Map from store(globaladdress) -> memw(#foo) +let AddedComplexity = 100 in +def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)), + (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>; + +//===----------------------------------------------------------------------===// +// Multiclass for the load instructions with absolute addressing mode. +//===----------------------------------------------------------------------===// +multiclass LD_Abs_Predbase<string mnemonic, RegisterClass RC, bit isNot, + bit isPredNew> { + let isPredicatedNew = isPredNew in + def NAME : LDInst2<(outs RC:$dst), + (ins PredRegs:$src1, u0AlwaysExt:$absaddr), + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#"$dst = "#mnemonic#"(##$absaddr)", + []>, + Requires<[HasV4T]>; +} + +multiclass LD_Abs_Pred<string mnemonic, RegisterClass RC, bit PredNot> { + let isPredicatedFalse = PredNot in { + defm _c#NAME : LD_Abs_Predbase<mnemonic, RC, PredNot, 0>; + // Predicate new + defm _cdn#NAME : LD_Abs_Predbase<mnemonic, RC, PredNot, 1>; + } +} + +let isExtended = 1, neverHasSideEffects = 1 in +multiclass LD_Abs<string mnemonic, string CextOp, RegisterClass RC> { + let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in { + let opExtendable = 1, isPredicable = 1 in + def NAME#_V4 : LDInst2<(outs RC:$dst), + (ins u0AlwaysExt:$absaddr), + "$dst = "#mnemonic#"(##$absaddr)", + []>, + Requires<[HasV4T]>; + + let opExtendable = 2, isPredicated = 1 in { + defm Pt_V4 : LD_Abs_Pred<mnemonic, RC, 0>; + defm NotPt_V4 : LD_Abs_Pred<mnemonic, RC, 1>; + } + } +} + +let addrMode = Absolute in { + let accessSize = ByteAccess in { + defm LDrib_abs : LD_Abs<"memb", "LDrib", IntRegs>, AddrModeRel; + defm LDriub_abs : LD_Abs<"memub", "LDriub", IntRegs>, AddrModeRel; + } + let accessSize = HalfWordAccess in { + defm LDrih_abs : LD_Abs<"memh", "LDrih", IntRegs>, AddrModeRel; + defm LDriuh_abs : LD_Abs<"memuh", "LDriuh", IntRegs>, AddrModeRel; + } + let accessSize = WordAccess in + defm LDriw_abs : LD_Abs<"memw", "LDriw", IntRegs>, AddrModeRel; + + let accessSize = DoubleWordAccess in + defm LDrid_abs : LD_Abs<"memd", "LDrid", DoubleRegs>, AddrModeRel; +} + +let Predicates = [HasV4T], AddedComplexity = 30 in { +def : Pat<(i32 (load (HexagonCONST32 tglobaladdr:$absaddr))), + (LDriw_abs_V4 tglobaladdr: $absaddr)>; + +def : Pat<(i32 (sextloadi8 (HexagonCONST32 tglobaladdr:$absaddr))), + (LDrib_abs_V4 tglobaladdr:$absaddr)>; + +def : Pat<(i32 (zextloadi8 (HexagonCONST32 tglobaladdr:$absaddr))), + (LDriub_abs_V4 tglobaladdr:$absaddr)>; + +def : Pat<(i32 (sextloadi16 (HexagonCONST32 tglobaladdr:$absaddr))), + (LDrih_abs_V4 tglobaladdr:$absaddr)>; + +def : Pat<(i32 (zextloadi16 (HexagonCONST32 tglobaladdr:$absaddr))), + (LDriuh_abs_V4 tglobaladdr:$absaddr)>; +} + +//===----------------------------------------------------------------------===// +// multiclass for load instructions with GP-relative addressing mode. +// Rx=mem[bhwd](##global) +// if ([!]Pv[.new]) Rx=mem[bhwd](##global) +//===----------------------------------------------------------------------===// +let neverHasSideEffects = 1, validSubTargets = HasV4SubT in +multiclass LD_GP<string mnemonic, string BaseOp, RegisterClass RC> { + let BaseOpcode = BaseOp in { + let isPredicable = 1 in + def NAME#_V4 : LDInst2<(outs RC:$dst), + (ins globaladdress:$global), + "$dst = "#mnemonic#"(#$global)", + []>; + + let isExtended = 1, opExtendable = 2, isPredicated = 1 in { + defm Pt_V4 : LD_Abs_Pred<mnemonic, RC, 0>; + defm NotPt_V4 : LD_Abs_Pred<mnemonic, RC, 1>; + } + } +} + +defm LDd_GP : LD_GP<"memd", "LDd_GP", DoubleRegs>, PredNewRel; +defm LDb_GP : LD_GP<"memb", "LDb_GP", IntRegs>, PredNewRel; +defm LDub_GP : LD_GP<"memub", "LDub_GP", IntRegs>, PredNewRel; +defm LDh_GP : LD_GP<"memh", "LDh_GP", IntRegs>, PredNewRel; +defm LDuh_GP : LD_GP<"memuh", "LDuh_GP", IntRegs>, PredNewRel; +defm LDw_GP : LD_GP<"memw", "LDw_GP", IntRegs>, PredNewRel; + +def : Pat <(atomic_load_64 (HexagonCONST32_GP tglobaladdr:$global)), + (i64 (LDd_GP_V4 tglobaladdr:$global))>; + +def : Pat <(atomic_load_32 (HexagonCONST32_GP tglobaladdr:$global)), + (i32 (LDw_GP_V4 tglobaladdr:$global))>; + +def : Pat <(atomic_load_16 (HexagonCONST32_GP tglobaladdr:$global)), + (i32 (LDuh_GP_V4 tglobaladdr:$global))>; + +def : Pat <(atomic_load_8 (HexagonCONST32_GP tglobaladdr:$global)), + (i32 (LDub_GP_V4 tglobaladdr:$global))>; + +// Map from load(globaladdress) -> memw(#foo + 0) +let AddedComplexity = 100 in +def : Pat <(i64 (load (HexagonCONST32_GP tglobaladdr:$global))), + (i64 (LDd_GP_V4 tglobaladdr:$global))>; + +// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd +let AddedComplexity = 100 in +def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))), + (i1 (TFR_PdRs (i32 (LDb_GP_V4 tglobaladdr:$global))))>; + +// When the Interprocedural Global Variable optimizer realizes that a certain +// global variable takes only two constant values, it shrinks the global to +// a boolean. Catch those loads here in the following 3 patterns. +let AddedComplexity = 100 in +def : Pat <(i32 (extloadi1 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDb_GP_V4 tglobaladdr:$global))>; + +let AddedComplexity = 100 in +def : Pat <(i32 (sextloadi1 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDb_GP_V4 tglobaladdr:$global))>; + +// Map from load(globaladdress) -> memb(#foo) +let AddedComplexity = 100 in +def : Pat <(i32 (extloadi8 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDb_GP_V4 tglobaladdr:$global))>; + +// Map from load(globaladdress) -> memb(#foo) +let AddedComplexity = 100 in +def : Pat <(i32 (sextloadi8 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDb_GP_V4 tglobaladdr:$global))>; + +let AddedComplexity = 100 in +def : Pat <(i32 (zextloadi1 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDub_GP_V4 tglobaladdr:$global))>; + +// Map from load(globaladdress) -> memub(#foo) +let AddedComplexity = 100 in +def : Pat <(i32 (zextloadi8 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDub_GP_V4 tglobaladdr:$global))>; + +// Map from load(globaladdress) -> memh(#foo) +let AddedComplexity = 100 in +def : Pat <(i32 (extloadi16 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDh_GP_V4 tglobaladdr:$global))>; + +// Map from load(globaladdress) -> memh(#foo) +let AddedComplexity = 100 in +def : Pat <(i32 (sextloadi16 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDh_GP_V4 tglobaladdr:$global))>; + +// Map from load(globaladdress) -> memuh(#foo) +let AddedComplexity = 100 in +def : Pat <(i32 (zextloadi16 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDuh_GP_V4 tglobaladdr:$global))>; + +// Map from load(globaladdress) -> memw(#foo) +let AddedComplexity = 100 in +def : Pat <(i32 (load (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDw_GP_V4 tglobaladdr:$global))>; + + +// Transfer global address into a register +let isExtended = 1, opExtendable = 1, AddedComplexity=50, isMoveImm = 1, +isAsCheapAsAMove = 1, isReMaterializable = 1, validSubTargets = HasV4SubT in +def TFRI_V4 : ALU32_ri<(outs IntRegs:$dst), (ins s16Ext:$src1), + "$dst = #$src1", + [(set IntRegs:$dst, (HexagonCONST32 tglobaladdr:$src1))]>, + Requires<[HasV4T]>; + +// Transfer a block address into a register +def : Pat<(HexagonCONST32_GP tblockaddress:$src1), + (TFRI_V4 tblockaddress:$src1)>, + Requires<[HasV4T]>; + +let isExtended = 1, opExtendable = 2, AddedComplexity=50, +neverHasSideEffects = 1, isPredicated = 1, validSubTargets = HasV4SubT in +def TFRI_cPt_V4 : ALU32_ri<(outs IntRegs:$dst), + (ins PredRegs:$src1, s16Ext:$src2), + "if($src1) $dst = #$src2", + []>, + Requires<[HasV4T]>; + +let isExtended = 1, opExtendable = 2, AddedComplexity=50, isPredicatedFalse = 1, +neverHasSideEffects = 1, isPredicated = 1, validSubTargets = HasV4SubT in +def TFRI_cNotPt_V4 : ALU32_ri<(outs IntRegs:$dst), + (ins PredRegs:$src1, s16Ext:$src2), + "if(!$src1) $dst = #$src2", + []>, + Requires<[HasV4T]>; + +let isExtended = 1, opExtendable = 2, AddedComplexity=50, +neverHasSideEffects = 1, isPredicated = 1, validSubTargets = HasV4SubT in +def TFRI_cdnPt_V4 : ALU32_ri<(outs IntRegs:$dst), + (ins PredRegs:$src1, s16Ext:$src2), + "if($src1.new) $dst = #$src2", + []>, + Requires<[HasV4T]>; + +let isExtended = 1, opExtendable = 2, AddedComplexity=50, isPredicatedFalse = 1, +neverHasSideEffects = 1, isPredicated = 1, validSubTargets = HasV4SubT in +def TFRI_cdnNotPt_V4 : ALU32_ri<(outs IntRegs:$dst), + (ins PredRegs:$src1, s16Ext:$src2), + "if(!$src1.new) $dst = #$src2", + []>, + Requires<[HasV4T]>; + +let AddedComplexity = 50, Predicates = [HasV4T] in +def : Pat<(HexagonCONST32_GP tglobaladdr:$src1), + (TFRI_V4 tglobaladdr:$src1)>, + Requires<[HasV4T]>; + + +// Load - Indirect with long offset: These instructions take global address +// as an operand +let isExtended = 1, opExtendable = 3, AddedComplexity = 40, +validSubTargets = HasV4SubT in +def LDrid_ind_lo_V4 : LDInst<(outs DoubleRegs:$dst), + (ins IntRegs:$src1, u2Imm:$src2, globaladdressExt:$offset), + "$dst=memd($src1<<#$src2+##$offset)", + [(set (i64 DoubleRegs:$dst), + (load (add (shl IntRegs:$src1, u2ImmPred:$src2), + (HexagonCONST32 tglobaladdr:$offset))))]>, + Requires<[HasV4T]>; + +let AddedComplexity = 40 in +multiclass LD_indirect_lo<string OpcStr, PatFrag OpNode> { +let isExtended = 1, opExtendable = 3, validSubTargets = HasV4SubT in + def _lo_V4 : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, u2Imm:$src2, globaladdressExt:$offset), + !strconcat("$dst = ", + !strconcat(OpcStr, "($src1<<#$src2+##$offset)")), + [(set IntRegs:$dst, + (i32 (OpNode (add (shl IntRegs:$src1, u2ImmPred:$src2), + (HexagonCONST32 tglobaladdr:$offset)))))]>, + Requires<[HasV4T]>; +} + +defm LDrib_ind : LD_indirect_lo<"memb", sextloadi8>; +defm LDriub_ind : LD_indirect_lo<"memub", zextloadi8>; +defm LDriub_ind_anyext : LD_indirect_lo<"memub", extloadi8>; +defm LDrih_ind : LD_indirect_lo<"memh", sextloadi16>; +defm LDriuh_ind : LD_indirect_lo<"memuh", zextloadi16>; +defm LDriuh_ind_anyext : LD_indirect_lo<"memuh", extloadi16>; +defm LDriw_ind : LD_indirect_lo<"memw", load>; + +let AddedComplexity = 40 in +def : Pat <(i32 (sextloadi8 (add IntRegs:$src1, + (NumUsesBelowThresCONST32 tglobaladdr:$offset)))), + (i32 (LDrib_ind_lo_V4 IntRegs:$src1, 0, tglobaladdr:$offset))>, + Requires<[HasV4T]>; + +let AddedComplexity = 40 in +def : Pat <(i32 (zextloadi8 (add IntRegs:$src1, + (NumUsesBelowThresCONST32 tglobaladdr:$offset)))), + (i32 (LDriub_ind_lo_V4 IntRegs:$src1, 0, tglobaladdr:$offset))>, + Requires<[HasV4T]>; + +let Predicates = [HasV4T], AddedComplexity = 30 in { +def : Pat<(truncstorei8 (i32 IntRegs:$src1), u0AlwaysExtPred:$src2), + (STrib_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>; + +def : Pat<(truncstorei16 (i32 IntRegs:$src1), u0AlwaysExtPred:$src2), + (STrih_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>; + +def : Pat<(store (i32 IntRegs:$src1), u0AlwaysExtPred:$src2), + (STriw_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>; +} + +let Predicates = [HasV4T], AddedComplexity = 30 in { +def : Pat<(i32 (load u0AlwaysExtPred:$src)), + (LDriw_abs_V4 u0AlwaysExtPred:$src)>; + +def : Pat<(i32 (sextloadi8 u0AlwaysExtPred:$src)), + (LDrib_abs_V4 u0AlwaysExtPred:$src)>; + +def : Pat<(i32 (zextloadi8 u0AlwaysExtPred:$src)), + (LDriub_abs_V4 u0AlwaysExtPred:$src)>; + +def : Pat<(i32 (sextloadi16 u0AlwaysExtPred:$src)), + (LDrih_abs_V4 u0AlwaysExtPred:$src)>; + +def : Pat<(i32 (zextloadi16 u0AlwaysExtPred:$src)), + (LDriuh_abs_V4 u0AlwaysExtPred:$src)>; +} + +// Indexed store word - global address. +// memw(Rs+#u6:2)=#S8 +let AddedComplexity = 10 in +def STriw_offset_ext_V4 : STInst<(outs), + (ins IntRegs:$src1, u6_2Imm:$src2, globaladdress:$src3), + "memw($src1+#$src2) = ##$src3", + [(store (HexagonCONST32 tglobaladdr:$src3), + (add IntRegs:$src1, u6_2ImmPred:$src2))]>, + Requires<[HasV4T]>; + +def : Pat<(i64 (ctlz (i64 DoubleRegs:$src1))), + (i64 (COMBINE_Ir_V4 (i32 0), (i32 (CTLZ64_rr DoubleRegs:$src1))))>, + Requires<[HasV4T]>; + +def : Pat<(i64 (cttz (i64 DoubleRegs:$src1))), + (i64 (COMBINE_Ir_V4 (i32 0), (i32 (CTTZ64_rr DoubleRegs:$src1))))>, + Requires<[HasV4T]>; + + +// i8 -> i64 loads +// We need a complexity of 120 here to overide preceeding handling of +// zextloadi8. +let Predicates = [HasV4T], AddedComplexity = 120 in { +def: Pat <(i64 (extloadi8 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), + (i64 (COMBINE_Ir_V4 0, (LDrib_abs_V4 tglobaladdr:$addr)))>; + +def: Pat <(i64 (zextloadi8 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), + (i64 (COMBINE_Ir_V4 0, (LDriub_abs_V4 tglobaladdr:$addr)))>; + +def: Pat <(i64 (sextloadi8 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), + (i64 (SXTW (LDrib_abs_V4 tglobaladdr:$addr)))>; + +def: Pat <(i64 (extloadi8 FoldGlobalAddr:$addr)), + (i64 (COMBINE_Ir_V4 0, (LDrib_abs_V4 FoldGlobalAddr:$addr)))>; + +def: Pat <(i64 (zextloadi8 FoldGlobalAddr:$addr)), + (i64 (COMBINE_Ir_V4 0, (LDriub_abs_V4 FoldGlobalAddr:$addr)))>; + +def: Pat <(i64 (sextloadi8 FoldGlobalAddr:$addr)), + (i64 (SXTW (LDrib_abs_V4 FoldGlobalAddr:$addr)))>; +} +// i16 -> i64 loads +// We need a complexity of 120 here to overide preceeding handling of +// zextloadi16. +let AddedComplexity = 120 in { +def: Pat <(i64 (extloadi16 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), + (i64 (COMBINE_Ir_V4 0, (LDrih_abs_V4 tglobaladdr:$addr)))>, + Requires<[HasV4T]>; + +def: Pat <(i64 (zextloadi16 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), + (i64 (COMBINE_Ir_V4 0, (LDriuh_abs_V4 tglobaladdr:$addr)))>, + Requires<[HasV4T]>; + +def: Pat <(i64 (sextloadi16 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), + (i64 (SXTW (LDrih_abs_V4 tglobaladdr:$addr)))>, + Requires<[HasV4T]>; + +def: Pat <(i64 (extloadi16 FoldGlobalAddr:$addr)), + (i64 (COMBINE_Ir_V4 0, (LDrih_abs_V4 FoldGlobalAddr:$addr)))>, + Requires<[HasV4T]>; + +def: Pat <(i64 (zextloadi16 FoldGlobalAddr:$addr)), + (i64 (COMBINE_Ir_V4 0, (LDriuh_abs_V4 FoldGlobalAddr:$addr)))>, + Requires<[HasV4T]>; + +def: Pat <(i64 (sextloadi16 FoldGlobalAddr:$addr)), + (i64 (SXTW (LDrih_abs_V4 FoldGlobalAddr:$addr)))>, + Requires<[HasV4T]>; +} +// i32->i64 loads +// We need a complexity of 120 here to overide preceeding handling of +// zextloadi32. +let AddedComplexity = 120 in { +def: Pat <(i64 (extloadi32 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), + (i64 (COMBINE_Ir_V4 0, (LDriw_abs_V4 tglobaladdr:$addr)))>, + Requires<[HasV4T]>; + +def: Pat <(i64 (zextloadi32 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), + (i64 (COMBINE_Ir_V4 0, (LDriw_abs_V4 tglobaladdr:$addr)))>, + Requires<[HasV4T]>; + +def: Pat <(i64 (sextloadi32 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), + (i64 (SXTW (LDriw_abs_V4 tglobaladdr:$addr)))>, + Requires<[HasV4T]>; + +def: Pat <(i64 (extloadi32 FoldGlobalAddr:$addr)), + (i64 (COMBINE_Ir_V4 0, (LDriw_abs_V4 FoldGlobalAddr:$addr)))>, + Requires<[HasV4T]>; + +def: Pat <(i64 (zextloadi32 FoldGlobalAddr:$addr)), + (i64 (COMBINE_Ir_V4 0, (LDriw_abs_V4 FoldGlobalAddr:$addr)))>, + Requires<[HasV4T]>; + +def: Pat <(i64 (sextloadi32 FoldGlobalAddr:$addr)), + (i64 (SXTW (LDriw_abs_V4 FoldGlobalAddr:$addr)))>, + Requires<[HasV4T]>; +} + +// Indexed store double word - global address. +// memw(Rs+#u6:2)=#S8 +let AddedComplexity = 10 in +def STrih_offset_ext_V4 : STInst<(outs), + (ins IntRegs:$src1, u6_1Imm:$src2, globaladdress:$src3), + "memh($src1+#$src2) = ##$src3", + [(truncstorei16 (HexagonCONST32 tglobaladdr:$src3), + (add IntRegs:$src1, u6_1ImmPred:$src2))]>, + Requires<[HasV4T]>; +// Map from store(globaladdress + x) -> memd(#foo + x) +let AddedComplexity = 100 in +def : Pat<(store (i64 DoubleRegs:$src1), + FoldGlobalAddrGP:$addr), + (STrid_abs_V4 FoldGlobalAddrGP:$addr, (i64 DoubleRegs:$src1))>, + Requires<[HasV4T]>; + +def : Pat<(atomic_store_64 FoldGlobalAddrGP:$addr, + (i64 DoubleRegs:$src1)), + (STrid_abs_V4 FoldGlobalAddrGP:$addr, (i64 DoubleRegs:$src1))>, + Requires<[HasV4T]>; + +// Map from store(globaladdress + x) -> memb(#foo + x) +let AddedComplexity = 100 in +def : Pat<(truncstorei8 (i32 IntRegs:$src1), FoldGlobalAddrGP:$addr), + (STrib_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>, + Requires<[HasV4T]>; + +def : Pat<(atomic_store_8 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1)), + (STrib_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>, + Requires<[HasV4T]>; + +// Map from store(globaladdress + x) -> memh(#foo + x) +let AddedComplexity = 100 in +def : Pat<(truncstorei16 (i32 IntRegs:$src1), FoldGlobalAddrGP:$addr), + (STrih_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>, + Requires<[HasV4T]>; + +def : Pat<(atomic_store_16 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1)), + (STrih_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>, + Requires<[HasV4T]>; + +// Map from store(globaladdress + x) -> memw(#foo + x) +let AddedComplexity = 100 in +def : Pat<(store (i32 IntRegs:$src1), FoldGlobalAddrGP:$addr), + (STriw_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>, + Requires<[HasV4T]>; + +def : Pat<(atomic_store_32 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1)), + (STriw_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>, + Requires<[HasV4T]>; + +// Map from load(globaladdress + x) -> memd(#foo + x) +let AddedComplexity = 100 in +def : Pat<(i64 (load FoldGlobalAddrGP:$addr)), + (i64 (LDrid_abs_V4 FoldGlobalAddrGP:$addr))>, + Requires<[HasV4T]>; + +def : Pat<(atomic_load_64 FoldGlobalAddrGP:$addr), + (i64 (LDrid_abs_V4 FoldGlobalAddrGP:$addr))>, + Requires<[HasV4T]>; + +// Map from load(globaladdress + x) -> memb(#foo + x) +let AddedComplexity = 100 in +def : Pat<(i32 (extloadi8 FoldGlobalAddrGP:$addr)), + (i32 (LDrib_abs_V4 FoldGlobalAddrGP:$addr))>, + Requires<[HasV4T]>; + +// Map from load(globaladdress + x) -> memb(#foo + x) +let AddedComplexity = 100 in +def : Pat<(i32 (sextloadi8 FoldGlobalAddrGP:$addr)), + (i32 (LDrib_abs_V4 FoldGlobalAddrGP:$addr))>, + Requires<[HasV4T]>; + +//let AddedComplexity = 100 in +let AddedComplexity = 100 in +def : Pat<(i32 (extloadi16 FoldGlobalAddrGP:$addr)), + (i32 (LDrih_abs_V4 FoldGlobalAddrGP:$addr))>, + Requires<[HasV4T]>; + +// Map from load(globaladdress + x) -> memh(#foo + x) +let AddedComplexity = 100 in +def : Pat<(i32 (sextloadi16 FoldGlobalAddrGP:$addr)), + (i32 (LDrih_abs_V4 FoldGlobalAddrGP:$addr))>, + Requires<[HasV4T]>; + +// Map from load(globaladdress + x) -> memuh(#foo + x) +let AddedComplexity = 100 in +def : Pat<(i32 (zextloadi16 FoldGlobalAddrGP:$addr)), + (i32 (LDriuh_abs_V4 FoldGlobalAddrGP:$addr))>, + Requires<[HasV4T]>; + +def : Pat<(atomic_load_16 FoldGlobalAddrGP:$addr), + (i32 (LDriuh_abs_V4 FoldGlobalAddrGP:$addr))>, + Requires<[HasV4T]>; + +// Map from load(globaladdress + x) -> memub(#foo + x) +let AddedComplexity = 100 in +def : Pat<(i32 (zextloadi8 FoldGlobalAddrGP:$addr)), + (i32 (LDriub_abs_V4 FoldGlobalAddrGP:$addr))>, + Requires<[HasV4T]>; + +def : Pat<(atomic_load_8 FoldGlobalAddrGP:$addr), + (i32 (LDriub_abs_V4 FoldGlobalAddrGP:$addr))>, + Requires<[HasV4T]>; + +// Map from load(globaladdress + x) -> memw(#foo + x) +let AddedComplexity = 100 in +def : Pat<(i32 (load FoldGlobalAddrGP:$addr)), + (i32 (LDriw_abs_V4 FoldGlobalAddrGP:$addr))>, + Requires<[HasV4T]>; + +def : Pat<(atomic_load_32 FoldGlobalAddrGP:$addr), + (i32 (LDriw_abs_V4 FoldGlobalAddrGP:$addr))>, + Requires<[HasV4T]>; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td new file mode 100644 index 000000000000..9da607455811 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td @@ -0,0 +1,633 @@ +def SDTHexagonFCONST32 : SDTypeProfile<1, 1, [ + SDTCisVT<0, f32>, + SDTCisPtrTy<1>]>; +def HexagonFCONST32 : SDNode<"HexagonISD::FCONST32", SDTHexagonFCONST32>; + +let isReMaterializable = 1, isMoveImm = 1 in +def FCONST32_nsdata : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global), + "$dst = CONST32(#$global)", + [(set (f32 IntRegs:$dst), + (HexagonFCONST32 tglobaladdr:$global))]>, + Requires<[HasV5T]>; + +let isReMaterializable = 1, isMoveImm = 1 in +def CONST64_Float_Real : LDInst<(outs DoubleRegs:$dst), (ins f64imm:$src1), + "$dst = CONST64(#$src1)", + [(set DoubleRegs:$dst, fpimm:$src1)]>, + Requires<[HasV5T]>; + +let isReMaterializable = 1, isMoveImm = 1 in +def CONST32_Float_Real : LDInst<(outs IntRegs:$dst), (ins f32imm:$src1), + "$dst = CONST32(#$src1)", + [(set IntRegs:$dst, fpimm:$src1)]>, + Requires<[HasV5T]>; + +// Transfer immediate float. +// Only works with single precision fp value. +// For double precision, use CONST64_float_real, as 64bit transfer +// can only hold 40-bit values - 32 from const ext + 8 bit immediate. +// Make sure that complexity is more than the CONST32 pattern in +// HexagonInstrInfo.td patterns. +let isExtended = 1, opExtendable = 1, isMoveImm = 1, isReMaterializable = 1, +isPredicable = 1, AddedComplexity = 30, validSubTargets = HasV5SubT, +isCodeGenOnly = 1 in +def TFRI_f : ALU32_ri<(outs IntRegs:$dst), (ins f32Ext:$src1), + "$dst = #$src1", + [(set IntRegs:$dst, fpimm:$src1)]>, + Requires<[HasV5T]>; + +let isExtended = 1, opExtendable = 2, isPredicated = 1, +neverHasSideEffects = 1, validSubTargets = HasV5SubT in +def TFRI_cPt_f : ALU32_ri<(outs IntRegs:$dst), + (ins PredRegs:$src1, f32Ext:$src2), + "if ($src1) $dst = #$src2", + []>, + Requires<[HasV5T]>; + +let isExtended = 1, opExtendable = 2, isPredicated = 1, isPredicatedFalse = 1, +neverHasSideEffects = 1, validSubTargets = HasV5SubT in +def TFRI_cNotPt_f : ALU32_ri<(outs IntRegs:$dst), + (ins PredRegs:$src1, f32Ext:$src2), + "if (!$src1) $dst =#$src2", + []>, + Requires<[HasV5T]>; + +// Convert single precision to double precision and vice-versa. +def CONVERT_sf2df : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src), + "$dst = convert_sf2df($src)", + [(set DoubleRegs:$dst, (fextend IntRegs:$src))]>, + Requires<[HasV5T]>; + +def CONVERT_df2sf : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src), + "$dst = convert_df2sf($src)", + [(set IntRegs:$dst, (fround DoubleRegs:$src))]>, + Requires<[HasV5T]>; + + +// Load. +def LDrid_f : LDInst<(outs DoubleRegs:$dst), + (ins MEMri:$addr), + "$dst = memd($addr)", + [(set DoubleRegs:$dst, (f64 (load ADDRriS11_3:$addr)))]>, + Requires<[HasV5T]>; + + +let AddedComplexity = 20 in +def LDrid_indexed_f : LDInst<(outs DoubleRegs:$dst), + (ins IntRegs:$src1, s11_3Imm:$offset), + "$dst = memd($src1+#$offset)", + [(set DoubleRegs:$dst, (f64 (load (add IntRegs:$src1, + s11_3ImmPred:$offset))))]>, + Requires<[HasV5T]>; + +def LDriw_f : LDInst<(outs IntRegs:$dst), + (ins MEMri:$addr), "$dst = memw($addr)", + [(set IntRegs:$dst, (f32 (load ADDRriS11_2:$addr)))]>, + Requires<[HasV5T]>; + + +let AddedComplexity = 20 in +def LDriw_indexed_f : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, s11_2Imm:$offset), + "$dst = memw($src1+#$offset)", + [(set IntRegs:$dst, (f32 (load (add IntRegs:$src1, + s11_2ImmPred:$offset))))]>, + Requires<[HasV5T]>; + +// Store. +def STriw_f : STInst<(outs), + (ins MEMri:$addr, IntRegs:$src1), + "memw($addr) = $src1", + [(store (f32 IntRegs:$src1), ADDRriS11_2:$addr)]>, + Requires<[HasV5T]>; + +let AddedComplexity = 10 in +def STriw_indexed_f : STInst<(outs), + (ins IntRegs:$src1, s11_2Imm:$src2, IntRegs:$src3), + "memw($src1+#$src2) = $src3", + [(store (f32 IntRegs:$src3), + (add IntRegs:$src1, s11_2ImmPred:$src2))]>, + Requires<[HasV5T]>; + +def STrid_f : STInst<(outs), + (ins MEMri:$addr, DoubleRegs:$src1), + "memd($addr) = $src1", + [(store (f64 DoubleRegs:$src1), ADDRriS11_2:$addr)]>, + Requires<[HasV5T]>; + +// Indexed store double word. +let AddedComplexity = 10 in +def STrid_indexed_f : STInst<(outs), + (ins IntRegs:$src1, s11_3Imm:$src2, DoubleRegs:$src3), + "memd($src1+#$src2) = $src3", + [(store (f64 DoubleRegs:$src3), + (add IntRegs:$src1, s11_3ImmPred:$src2))]>, + Requires<[HasV5T]>; + + +// Add +let isCommutable = 1 in +def fADD_rr : ALU64_rr<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = sfadd($src1, $src2)", + [(set IntRegs:$dst, (fadd IntRegs:$src1, IntRegs:$src2))]>, + Requires<[HasV5T]>; + +let isCommutable = 1 in +def fADD64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2), + "$dst = dfadd($src1, $src2)", + [(set DoubleRegs:$dst, (fadd DoubleRegs:$src1, + DoubleRegs:$src2))]>, + Requires<[HasV5T]>; + +def fSUB_rr : ALU64_rr<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = sfsub($src1, $src2)", + [(set IntRegs:$dst, (fsub IntRegs:$src1, IntRegs:$src2))]>, + Requires<[HasV5T]>; + +def fSUB64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2), + "$dst = dfsub($src1, $src2)", + [(set DoubleRegs:$dst, (fsub DoubleRegs:$src1, + DoubleRegs:$src2))]>, + Requires<[HasV5T]>; + +let isCommutable = 1 in +def fMUL_rr : ALU64_rr<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = sfmpy($src1, $src2)", + [(set IntRegs:$dst, (fmul IntRegs:$src1, IntRegs:$src2))]>, + Requires<[HasV5T]>; + +let isCommutable = 1 in +def fMUL64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2), + "$dst = dfmpy($src1, $src2)", + [(set DoubleRegs:$dst, (fmul DoubleRegs:$src1, + DoubleRegs:$src2))]>, + Requires<[HasV5T]>; + +// Compare. +let isCompare = 1 in { +multiclass FCMP64_rr<string OpcStr, PatFrag OpNode> { + def _rr : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$b, DoubleRegs:$c), + !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")), + [(set PredRegs:$dst, + (OpNode (f64 DoubleRegs:$b), (f64 DoubleRegs:$c)))]>, + Requires<[HasV5T]>; +} + +multiclass FCMP32_rr<string OpcStr, PatFrag OpNode> { + def _rr : ALU64_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c), + !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")), + [(set PredRegs:$dst, + (OpNode (f32 IntRegs:$b), (f32 IntRegs:$c)))]>, + Requires<[HasV5T]>; +} +} + +defm FCMPOEQ64 : FCMP64_rr<"dfcmp.eq", setoeq>; +defm FCMPUEQ64 : FCMP64_rr<"dfcmp.eq", setueq>; +defm FCMPOGT64 : FCMP64_rr<"dfcmp.gt", setogt>; +defm FCMPUGT64 : FCMP64_rr<"dfcmp.gt", setugt>; +defm FCMPOGE64 : FCMP64_rr<"dfcmp.ge", setoge>; +defm FCMPUGE64 : FCMP64_rr<"dfcmp.ge", setuge>; + +defm FCMPOEQ32 : FCMP32_rr<"sfcmp.eq", setoeq>; +defm FCMPUEQ32 : FCMP32_rr<"sfcmp.eq", setueq>; +defm FCMPOGT32 : FCMP32_rr<"sfcmp.gt", setogt>; +defm FCMPUGT32 : FCMP32_rr<"sfcmp.gt", setugt>; +defm FCMPOGE32 : FCMP32_rr<"sfcmp.ge", setoge>; +defm FCMPUGE32 : FCMP32_rr<"sfcmp.ge", setuge>; + +// olt. +def : Pat <(i1 (setolt (f32 IntRegs:$src1), (f32 IntRegs:$src2))), + (i1 (FCMPOGT32_rr IntRegs:$src2, IntRegs:$src1))>, + Requires<[HasV5T]>; + +def : Pat <(i1 (setolt (f32 IntRegs:$src1), (fpimm:$src2))), + (i1 (FCMPOGT32_rr (f32 (TFRI_f fpimm:$src2)), (f32 IntRegs:$src1)))>, + Requires<[HasV5T]>; + +def : Pat <(i1 (setolt (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))), + (i1 (FCMPOGT64_rr DoubleRegs:$src2, DoubleRegs:$src1))>, + Requires<[HasV5T]>; + +def : Pat <(i1 (setolt (f64 DoubleRegs:$src1), (fpimm:$src2))), + (i1 (FCMPOGT64_rr (f64 (CONST64_Float_Real fpimm:$src2)), + (f64 DoubleRegs:$src1)))>, + Requires<[HasV5T]>; + +// gt. +def : Pat <(i1 (setugt (f64 DoubleRegs:$src1), (fpimm:$src2))), + (i1 (FCMPUGT64_rr (f64 DoubleRegs:$src1), + (f64 (CONST64_Float_Real fpimm:$src2))))>, + Requires<[HasV5T]>; + +def : Pat <(i1 (setugt (f32 IntRegs:$src1), (fpimm:$src2))), + (i1 (FCMPUGT32_rr (f32 IntRegs:$src1), (f32 (TFRI_f fpimm:$src2))))>, + Requires<[HasV5T]>; + +// ult. +def : Pat <(i1 (setult (f32 IntRegs:$src1), (f32 IntRegs:$src2))), + (i1 (FCMPUGT32_rr IntRegs:$src2, IntRegs:$src1))>, + Requires<[HasV5T]>; + +def : Pat <(i1 (setult (f32 IntRegs:$src1), (fpimm:$src2))), + (i1 (FCMPUGT32_rr (f32 (TFRI_f fpimm:$src2)), (f32 IntRegs:$src1)))>, + Requires<[HasV5T]>; + +def : Pat <(i1 (setult (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))), + (i1 (FCMPUGT64_rr DoubleRegs:$src2, DoubleRegs:$src1))>, + Requires<[HasV5T]>; + +def : Pat <(i1 (setult (f64 DoubleRegs:$src1), (fpimm:$src2))), + (i1 (FCMPUGT64_rr (f64 (CONST64_Float_Real fpimm:$src2)), + (f64 DoubleRegs:$src1)))>, + Requires<[HasV5T]>; + +// le. +// rs <= rt -> rt >= rs. +def : Pat<(i1 (setole (f32 IntRegs:$src1), (f32 IntRegs:$src2))), + (i1 (FCMPOGE32_rr IntRegs:$src2, IntRegs:$src1))>, + Requires<[HasV5T]>; + +def : Pat<(i1 (setole (f32 IntRegs:$src1), (fpimm:$src2))), + (i1 (FCMPOGE32_rr (f32 (TFRI_f fpimm:$src2)), IntRegs:$src1))>, + Requires<[HasV5T]>; + + +// Rss <= Rtt -> Rtt >= Rss. +def : Pat<(i1 (setole (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))), + (i1 (FCMPOGE64_rr DoubleRegs:$src2, DoubleRegs:$src1))>, + Requires<[HasV5T]>; + +def : Pat<(i1 (setole (f64 DoubleRegs:$src1), (fpimm:$src2))), + (i1 (FCMPOGE64_rr (f64 (CONST64_Float_Real fpimm:$src2)), + DoubleRegs:$src1))>, + Requires<[HasV5T]>; + +// rs <= rt -> rt >= rs. +def : Pat<(i1 (setule (f32 IntRegs:$src1), (f32 IntRegs:$src2))), + (i1 (FCMPUGE32_rr IntRegs:$src2, IntRegs:$src1))>, + Requires<[HasV5T]>; + +def : Pat<(i1 (setule (f32 IntRegs:$src1), (fpimm:$src2))), + (i1 (FCMPUGE32_rr (f32 (TFRI_f fpimm:$src2)), IntRegs:$src1))>, + Requires<[HasV5T]>; + +// Rss <= Rtt -> Rtt >= Rss. +def : Pat<(i1 (setule (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))), + (i1 (FCMPUGE64_rr DoubleRegs:$src2, DoubleRegs:$src1))>, + Requires<[HasV5T]>; + +def : Pat<(i1 (setule (f64 DoubleRegs:$src1), (fpimm:$src2))), + (i1 (FCMPUGE64_rr (f64 (CONST64_Float_Real fpimm:$src2)), + DoubleRegs:$src1))>, + Requires<[HasV5T]>; + +// ne. +def : Pat<(i1 (setone (f32 IntRegs:$src1), (f32 IntRegs:$src2))), + (i1 (NOT_p (FCMPOEQ32_rr IntRegs:$src1, IntRegs:$src2)))>, + Requires<[HasV5T]>; + +def : Pat<(i1 (setone (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))), + (i1 (NOT_p (FCMPOEQ64_rr DoubleRegs:$src1, DoubleRegs:$src2)))>, + Requires<[HasV5T]>; + +def : Pat<(i1 (setune (f32 IntRegs:$src1), (f32 IntRegs:$src2))), + (i1 (NOT_p (FCMPUEQ32_rr IntRegs:$src1, IntRegs:$src2)))>, + Requires<[HasV5T]>; + +def : Pat<(i1 (setune (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))), + (i1 (NOT_p (FCMPUEQ64_rr DoubleRegs:$src1, DoubleRegs:$src2)))>, + Requires<[HasV5T]>; + +def : Pat<(i1 (setone (f32 IntRegs:$src1), (fpimm:$src2))), + (i1 (NOT_p (FCMPOEQ32_rr IntRegs:$src1, (f32 (TFRI_f fpimm:$src2)))))>, + Requires<[HasV5T]>; + +def : Pat<(i1 (setone (f64 DoubleRegs:$src1), (fpimm:$src2))), + (i1 (NOT_p (FCMPOEQ64_rr DoubleRegs:$src1, + (f64 (CONST64_Float_Real fpimm:$src2)))))>, + Requires<[HasV5T]>; + +def : Pat<(i1 (setune (f32 IntRegs:$src1), (fpimm:$src2))), + (i1 (NOT_p (FCMPUEQ32_rr IntRegs:$src1, (f32 (TFRI_f fpimm:$src2)))))>, + Requires<[HasV5T]>; + +def : Pat<(i1 (setune (f64 DoubleRegs:$src1), (fpimm:$src2))), + (i1 (NOT_p (FCMPUEQ64_rr DoubleRegs:$src1, + (f64 (CONST64_Float_Real fpimm:$src2)))))>, + Requires<[HasV5T]>; + +// Convert Integer to Floating Point. +def CONVERT_d2sf : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src), + "$dst = convert_d2sf($src)", + [(set (f32 IntRegs:$dst), (sint_to_fp (i64 DoubleRegs:$src)))]>, + Requires<[HasV5T]>; + +def CONVERT_ud2sf : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src), + "$dst = convert_ud2sf($src)", + [(set (f32 IntRegs:$dst), (uint_to_fp (i64 DoubleRegs:$src)))]>, + Requires<[HasV5T]>; + +def CONVERT_uw2sf : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src), + "$dst = convert_uw2sf($src)", + [(set (f32 IntRegs:$dst), (uint_to_fp (i32 IntRegs:$src)))]>, + Requires<[HasV5T]>; + +def CONVERT_w2sf : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src), + "$dst = convert_w2sf($src)", + [(set (f32 IntRegs:$dst), (sint_to_fp (i32 IntRegs:$src)))]>, + Requires<[HasV5T]>; + +def CONVERT_d2df : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src), + "$dst = convert_d2df($src)", + [(set (f64 DoubleRegs:$dst), (sint_to_fp (i64 DoubleRegs:$src)))]>, + Requires<[HasV5T]>; + +def CONVERT_ud2df : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src), + "$dst = convert_ud2df($src)", + [(set (f64 DoubleRegs:$dst), (uint_to_fp (i64 DoubleRegs:$src)))]>, + Requires<[HasV5T]>; + +def CONVERT_uw2df : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src), + "$dst = convert_uw2df($src)", + [(set (f64 DoubleRegs:$dst), (uint_to_fp (i32 IntRegs:$src)))]>, + Requires<[HasV5T]>; + +def CONVERT_w2df : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src), + "$dst = convert_w2df($src)", + [(set (f64 DoubleRegs:$dst), (sint_to_fp (i32 IntRegs:$src)))]>, + Requires<[HasV5T]>; + +// Convert Floating Point to Integer - default. +def CONVERT_df2uw : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src), + "$dst = convert_df2uw($src):chop", + [(set (i32 IntRegs:$dst), (fp_to_uint (f64 DoubleRegs:$src)))]>, + Requires<[HasV5T]>; + +def CONVERT_df2w : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src), + "$dst = convert_df2w($src):chop", + [(set (i32 IntRegs:$dst), (fp_to_sint (f64 DoubleRegs:$src)))]>, + Requires<[HasV5T]>; + +def CONVERT_sf2uw : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src), + "$dst = convert_sf2uw($src):chop", + [(set (i32 IntRegs:$dst), (fp_to_uint (f32 IntRegs:$src)))]>, + Requires<[HasV5T]>; + +def CONVERT_sf2w : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src), + "$dst = convert_sf2w($src):chop", + [(set (i32 IntRegs:$dst), (fp_to_sint (f32 IntRegs:$src)))]>, + Requires<[HasV5T]>; + +def CONVERT_df2d : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src), + "$dst = convert_df2d($src):chop", + [(set (i64 DoubleRegs:$dst), (fp_to_sint (f64 DoubleRegs:$src)))]>, + Requires<[HasV5T]>; + +def CONVERT_df2ud : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src), + "$dst = convert_df2ud($src):chop", + [(set (i64 DoubleRegs:$dst), (fp_to_uint (f64 DoubleRegs:$src)))]>, + Requires<[HasV5T]>; + +def CONVERT_sf2d : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src), + "$dst = convert_sf2d($src):chop", + [(set (i64 DoubleRegs:$dst), (fp_to_sint (f32 IntRegs:$src)))]>, + Requires<[HasV5T]>; + +def CONVERT_sf2ud : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src), + "$dst = convert_sf2ud($src):chop", + [(set (i64 DoubleRegs:$dst), (fp_to_uint (f32 IntRegs:$src)))]>, + Requires<[HasV5T]>; + +// Convert Floating Point to Integer: non-chopped. +let AddedComplexity = 20 in +def CONVERT_df2uw_nchop : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src), + "$dst = convert_df2uw($src)", + [(set (i32 IntRegs:$dst), (fp_to_uint (f64 DoubleRegs:$src)))]>, + Requires<[HasV5T, IEEERndNearV5T]>; + +let AddedComplexity = 20 in +def CONVERT_df2w_nchop : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src), + "$dst = convert_df2w($src)", + [(set (i32 IntRegs:$dst), (fp_to_sint (f64 DoubleRegs:$src)))]>, + Requires<[HasV5T, IEEERndNearV5T]>; + +let AddedComplexity = 20 in +def CONVERT_sf2uw_nchop : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src), + "$dst = convert_sf2uw($src)", + [(set (i32 IntRegs:$dst), (fp_to_uint (f32 IntRegs:$src)))]>, + Requires<[HasV5T, IEEERndNearV5T]>; + +let AddedComplexity = 20 in +def CONVERT_sf2w_nchop : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src), + "$dst = convert_sf2w($src)", + [(set (i32 IntRegs:$dst), (fp_to_sint (f32 IntRegs:$src)))]>, + Requires<[HasV5T, IEEERndNearV5T]>; + +let AddedComplexity = 20 in +def CONVERT_df2d_nchop : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src), + "$dst = convert_df2d($src)", + [(set (i64 DoubleRegs:$dst), (fp_to_sint (f64 DoubleRegs:$src)))]>, + Requires<[HasV5T, IEEERndNearV5T]>; + +let AddedComplexity = 20 in +def CONVERT_df2ud_nchop : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src), + "$dst = convert_df2ud($src)", + [(set (i64 DoubleRegs:$dst), (fp_to_uint (f64 DoubleRegs:$src)))]>, + Requires<[HasV5T, IEEERndNearV5T]>; + +let AddedComplexity = 20 in +def CONVERT_sf2d_nchop : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src), + "$dst = convert_sf2d($src)", + [(set (i64 DoubleRegs:$dst), (fp_to_sint (f32 IntRegs:$src)))]>, + Requires<[HasV5T, IEEERndNearV5T]>; + +let AddedComplexity = 20 in +def CONVERT_sf2ud_nchop : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src), + "$dst = convert_sf2ud($src)", + [(set (i64 DoubleRegs:$dst), (fp_to_uint (f32 IntRegs:$src)))]>, + Requires<[HasV5T, IEEERndNearV5T]>; + + + +// Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp]. +def : Pat <(i32 (bitconvert (f32 IntRegs:$src))), + (i32 (TFR IntRegs:$src))>, + Requires<[HasV5T]>; + +def : Pat <(f32 (bitconvert (i32 IntRegs:$src))), + (f32 (TFR IntRegs:$src))>, + Requires<[HasV5T]>; + +def : Pat <(i64 (bitconvert (f64 DoubleRegs:$src))), + (i64 (TFR64 DoubleRegs:$src))>, + Requires<[HasV5T]>; + +def : Pat <(f64 (bitconvert (i64 DoubleRegs:$src))), + (f64 (TFR64 DoubleRegs:$src))>, + Requires<[HasV5T]>; + +// Floating point fused multiply-add. +def FMADD_dp : ALU64_acc<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), + "$dst += dfmpy($src2, $src3)", + [(set (f64 DoubleRegs:$dst), + (fma DoubleRegs:$src2, DoubleRegs:$src3, DoubleRegs:$src1))], + "$src1 = $dst">, + Requires<[HasV5T]>; + +def FMADD_sp : ALU64_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "$dst += sfmpy($src2, $src3)", + [(set (f32 IntRegs:$dst), + (fma IntRegs:$src2, IntRegs:$src3, IntRegs:$src1))], + "$src1 = $dst">, + Requires<[HasV5T]>; + + +// Floating point max/min. +let AddedComplexity = 100 in +def FMAX_dp : ALU64_rr<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, DoubleRegs:$src2), + "$dst = dfmax($src1, $src2)", + [(set DoubleRegs:$dst, (f64 (select (i1 (setolt DoubleRegs:$src2, + DoubleRegs:$src1)), + DoubleRegs:$src1, + DoubleRegs:$src2)))]>, + Requires<[HasV5T]>; + +let AddedComplexity = 100 in +def FMAX_sp : ALU64_rr<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = sfmax($src1, $src2)", + [(set IntRegs:$dst, (f32 (select (i1 (setolt IntRegs:$src2, + IntRegs:$src1)), + IntRegs:$src1, + IntRegs:$src2)))]>, + Requires<[HasV5T]>; + +let AddedComplexity = 100 in +def FMIN_dp : ALU64_rr<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, DoubleRegs:$src2), + "$dst = dfmin($src1, $src2)", + [(set DoubleRegs:$dst, (f64 (select (i1 (setogt DoubleRegs:$src2, + DoubleRegs:$src1)), + DoubleRegs:$src1, + DoubleRegs:$src2)))]>, + Requires<[HasV5T]>; + +let AddedComplexity = 100 in +def FMIN_sp : ALU64_rr<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = sfmin($src1, $src2)", + [(set IntRegs:$dst, (f32 (select (i1 (setogt IntRegs:$src2, + IntRegs:$src1)), + IntRegs:$src1, + IntRegs:$src2)))]>, + Requires<[HasV5T]>; + +// Pseudo instruction to encode a set of conditional transfers. +// This instruction is used instead of a mux and trades-off codesize +// for performance. We conduct this transformation optimistically in +// the hope that these instructions get promoted to dot-new transfers. +let AddedComplexity = 100, isPredicated = 1 in +def TFR_condset_rr_f : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, + IntRegs:$src2, + IntRegs:$src3), + "Error; should not emit", + [(set IntRegs:$dst, (f32 (select PredRegs:$src1, + IntRegs:$src2, + IntRegs:$src3)))]>, + Requires<[HasV5T]>; + +let AddedComplexity = 100, isPredicated = 1 in +def TFR_condset_rr64_f : ALU32_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1, + DoubleRegs:$src2, + DoubleRegs:$src3), + "Error; should not emit", + [(set DoubleRegs:$dst, (f64 (select PredRegs:$src1, + DoubleRegs:$src2, + DoubleRegs:$src3)))]>, + Requires<[HasV5T]>; + + + +let AddedComplexity = 100, isPredicated = 1 in +def TFR_condset_ri_f : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, f32imm:$src3), + "Error; should not emit", + [(set IntRegs:$dst, + (f32 (select PredRegs:$src1, IntRegs:$src2, fpimm:$src3)))]>, + Requires<[HasV5T]>; + +let AddedComplexity = 100, isPredicated = 1 in +def TFR_condset_ir_f : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, f32imm:$src2, IntRegs:$src3), + "Error; should not emit", + [(set IntRegs:$dst, + (f32 (select PredRegs:$src1, fpimm:$src2, IntRegs:$src3)))]>, + Requires<[HasV5T]>; + +let AddedComplexity = 100, isPredicated = 1 in +def TFR_condset_ii_f : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, f32imm:$src2, f32imm:$src3), + "Error; should not emit", + [(set IntRegs:$dst, (f32 (select PredRegs:$src1, + fpimm:$src2, + fpimm:$src3)))]>, + Requires<[HasV5T]>; + + +def : Pat <(select (i1 (setult (f32 IntRegs:$src1), (f32 IntRegs:$src2))), + (f32 IntRegs:$src3), + (f32 IntRegs:$src4)), + (TFR_condset_rr_f (FCMPUGT32_rr IntRegs:$src2, IntRegs:$src1), IntRegs:$src4, + IntRegs:$src3)>, Requires<[HasV5T]>; + +def : Pat <(select (i1 (setult (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))), + (f64 DoubleRegs:$src3), + (f64 DoubleRegs:$src4)), + (TFR_condset_rr64_f (FCMPUGT64_rr DoubleRegs:$src2, DoubleRegs:$src1), + DoubleRegs:$src4, DoubleRegs:$src3)>, Requires<[HasV5T]>; + +// Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i). +def : Pat <(select (not PredRegs:$src1), fpimm:$src2, fpimm:$src3), + (TFR_condset_ii_f PredRegs:$src1, fpimm:$src3, fpimm:$src2)>; + +// Map from p0 = pnot(p0); r0 = select(p0, #i, r1) +// => r0 = TFR_condset_ri(p0, r1, #i) +def : Pat <(select (not PredRegs:$src1), fpimm:$src2, IntRegs:$src3), + (TFR_condset_ri_f PredRegs:$src1, IntRegs:$src3, fpimm:$src2)>; + +// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i) +// => r0 = TFR_condset_ir(p0, #i, r1) +def : Pat <(select (not PredRegs:$src1), IntRegs:$src2, fpimm:$src3), + (TFR_condset_ir_f PredRegs:$src1, fpimm:$src3, IntRegs:$src2)>; + +def : Pat <(i32 (fp_to_sint (f64 DoubleRegs:$src1))), + (i32 (EXTRACT_SUBREG (i64 (CONVERT_df2d (f64 DoubleRegs:$src1))), subreg_loreg))>, + Requires<[HasV5T]>; + +def : Pat <(fabs (f32 IntRegs:$src1)), + (CLRBIT_31 (f32 IntRegs:$src1), 31)>, + Requires<[HasV5T]>; + +def : Pat <(fneg (f32 IntRegs:$src1)), + (TOGBIT_31 (f32 IntRegs:$src1), 31)>, + Requires<[HasV5T]>; + +/* +def : Pat <(fabs (f64 DoubleRegs:$src1)), + (CLRBIT_31 (f32 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg)), 31)>, + Requires<[HasV5T]>; + +def : Pat <(fabs (f64 DoubleRegs:$src1)), + (CLRBIT_31 (f32 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg)), 31)>, + Requires<[HasV5T]>; + */ diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td new file mode 100644 index 000000000000..99f59d5ea669 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td @@ -0,0 +1,3503 @@ +//===-- HexagonIntrinsics.td - Instruction intrinsics ------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This is populated based on the following specs: +// Hexagon V2 Architecture +// Application-Level Specification +// 80-V9418-8 Rev. B +// March 4, 2008 +//===----------------------------------------------------------------------===// + +// +// ALU 32 types. +// + +class qi_ALU32_sisi<string opc, Intrinsic IntID> + : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class qi_ALU32_sis10<string opc, Intrinsic IntID> + : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class qi_ALU32_sis8<string opc, Intrinsic IntID> + : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class qi_ALU32_siu8<string opc, Intrinsic IntID> + : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, u8Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class qi_ALU32_siu9<string opc, Intrinsic IntID> + : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, u9Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class si_ALU32_qisisi<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, + IntRegs:$src3))]>; + +class si_ALU32_qis8si<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2, + IntRegs:$src3), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, $src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2, + IntRegs:$src3))]>; + +class si_ALU32_qisis8<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, + s8Imm:$src3), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, #$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, + imm:$src3))]>; + +class si_ALU32_qis8s8<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2, s8Imm:$src3), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, #$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2, imm:$src3))]>; + +class si_ALU32_sisi<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU32_sisi_sat<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU32_sisi_rnd<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU32_sis16<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s16Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class si_ALU32_sis10<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class si_ALU32_s10si<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins s10Imm:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "(#$src1, $src2)")), + [(set IntRegs:$dst, (IntID imm:$src1, IntRegs:$src2))]>; + +class si_lo_ALU32_siu16<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u16Imm:$src2), + !strconcat("$dst.l = ", !strconcat(opc , "#$src2")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class si_hi_ALU32_siu16<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u16Imm:$src2), + !strconcat("$dst.h = ", !strconcat(opc , "#$src2")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class si_ALU32_s16<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins s16Imm:$src1), + !strconcat("$dst = ", !strconcat(opc , "#$src1")), + [(set IntRegs:$dst, (IntID imm:$src1))]>; + +class di_ALU32_s8<string opc, Intrinsic IntID> + : ALU32_rr<(outs DoubleRegs:$dst), (ins s8Imm:$src1), + !strconcat("$dst = ", !strconcat(opc , "#$src1")), + [(set DoubleRegs:$dst, (IntID imm:$src1))]>; + +class di_ALU64_di<string opc, Intrinsic IntID> + : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "$src")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src))]>; + +class si_ALU32_si<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "($src)")), + [(set IntRegs:$dst, (IntID IntRegs:$src))]>; + +class si_ALU32_si_tfr<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "$src")), + [(set IntRegs:$dst, (IntID IntRegs:$src))]>; + +// +// ALU 64 types. +// + +class si_ALU64_si_sat<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "($src):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src))]>; + +class si_ALU64_didi<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; + +class di_ALU64_sidi<string opc, Intrinsic IntID> + : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, DoubleRegs:$src2))]>; + +class di_ALU64_didi<string opc, Intrinsic IntID> + : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +class di_ALU64_qididi<string opc, Intrinsic IntID> + : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, DoubleRegs:$src2, + DoubleRegs:$src3), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, DoubleRegs:$src2, + DoubleRegs:$src3))]>; + +class di_ALU64_sisi<string opc, Intrinsic IntID> + : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_ALU64_didi_sat<string opc, Intrinsic IntID> + : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +class di_ALU64_didi_rnd<string opc, Intrinsic IntID> + : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +class di_ALU64_didi_crnd<string opc, Intrinsic IntID> + : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):crnd")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +class di_ALU64_didi_rnd_sat<string opc, Intrinsic IntID> + : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +class di_ALU64_didi_crnd_sat<string opc, Intrinsic IntID> + : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):crnd:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +class qi_ALU64_didi<string opc, Intrinsic IntID> + : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set PredRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; + +class si_ALU64_sisi<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_sat_lh<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_l16_sat_hh<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_l16_sat_lh<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_l16_sat_hl<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_l16_sat_ll<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_l16_hh<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_l16_hl<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_l16_lh<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_l16_ll<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_h16_sat_hh<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.H, $src2.H):sat:<<16")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_h16_sat_lh<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.L, $src2.H):sat:<<16")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_h16_sat_hl<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.H, $src2.L):sat:<<16")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_h16_sat_ll<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.L, $src2.L):sat:<<16")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_h16_hh<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):<<16")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_h16_hl<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):<<16")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_h16_lh<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):<<16")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_h16_ll<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):<<16")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_lh<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_ll<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_sat<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +// +// SInst classes. +// + +class qi_SInst_qi<string opc, Intrinsic IntID> + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "($src)")), + [(set PredRegs:$dst, (IntID IntRegs:$src))]>; + +class qi_SInst_qi_pxfer<string opc, Intrinsic IntID> + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "$src")), + [(set PredRegs:$dst, (IntID IntRegs:$src))]>; + +class qi_SInst_qiqi<string opc, Intrinsic IntID> + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class qi_SInst_qiqi_neg<string opc, Intrinsic IntID> + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, !$src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_SInst_di<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "($src)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src))]>; + +class di_SInst_di_sat<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "($src):sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src))]>; + +class si_SInst_di<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "($src)")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src))]>; + +class si_SInst_di_sat<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "($src):sat")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src))]>; + +class di_SInst_disi<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>; + +class di_SInst_didi<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; + +class di_SInst_si<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1), + !strconcat("$dst = ", !strconcat(opc , "($src1)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1))]>; + +class si_SInst_sisiu3<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, u3Imm:$src3), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, #$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, + imm:$src3))]>; + +class si_SInst_diu5<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, u5Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>; + +class si_SInst_disi<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>; + +class si_SInst_sidi<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, DoubleRegs:$src2))]>; + +class di_SInst_disisi<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2, + IntRegs:$src3))]>; + +class di_SInst_sisi<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class qi_SInst_siu5<string opc, Intrinsic IntID> + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class qi_SInst_siu6<string opc, Intrinsic IntID> + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u6Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class qi_SInst_sisi<string opc, Intrinsic IntID> + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_SInst_si<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "($src)")), + [(set IntRegs:$dst, (IntID IntRegs:$src))]>; + +class si_SInst_si_sat<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "($src):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src))]>; + +class di_SInst_qi<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "($src)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src))]>; + +class si_SInst_qi<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "$src")), + [(set IntRegs:$dst, (IntID IntRegs:$src))]>; + +class si_SInst_qiqi<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class qi_SInst_si<string opc, Intrinsic IntID> + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "$src")), + [(set PredRegs:$dst, (IntID IntRegs:$src))]>; + +class si_SInst_sisi<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_SInst_diu6<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>; + +class si_SInst_siu5<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class si_SInst_siu5_rnd<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):rnd")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class si_SInst_siu5u5<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2, u5Imm:$src3), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, #$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2, imm:$src3))]>; + +class si_SInst_sisisi_acc<string opc, Intrinsic IntID> + : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_SInst_sisisi_nac<string opc, Intrinsic IntID> + : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_SInst_didisi_acc<string opc, Intrinsic IntID> + : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, + DoubleRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_SInst_didisi_nac<string opc, Intrinsic IntID> + : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, + DoubleRegs:$src1, IntRegs:$src2))], + "$dst2 = $dst">; + +class si_SInst_sisiu5u5<string opc, Intrinsic IntID> + : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + u5Imm:$src2, u5Imm:$src3), + !strconcat("$dst = ", !strconcat(opc , + "($src1, #$src2, #$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + imm:$src2, imm:$src3))], + "$dst2 = $dst">; + +class si_SInst_sisidi<string opc, Intrinsic IntID> + : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + DoubleRegs:$src2))], + "$dst2 = $dst">; + +class di_SInst_didiu6u6<string opc, Intrinsic IntID> + : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + u6Imm:$src2, u6Imm:$src3), + !strconcat("$dst = ", !strconcat(opc , + "($src1, #$src2, #$src3)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, + imm:$src2, imm:$src3))], + "$dst2 = $dst">; + +class di_SInst_dididi<string opc, Intrinsic IntID> + : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, + DoubleRegs:$src1, + DoubleRegs:$src2))], + "$dst2 = $dst">; + +class di_SInst_diu6u6<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2, + u6Imm:$src3), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, #$src3)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2, + imm:$src3))]>; + +class di_SInst_didiqi<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2, + IntRegs:$src3), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2, + IntRegs:$src3))]>; + +class di_SInst_didiu3<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2, + u3Imm:$src3), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, #$src3)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2, + imm:$src3))]>; + +class di_SInst_didisi_or<string opc, Intrinsic IntID> + : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + IntRegs:$src2), + !strconcat("$dst |= ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_SInst_didisi_and<string opc, Intrinsic IntID> + : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + IntRegs:$src2), + !strconcat("$dst &= ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_SInst_didiu6_and<string opc, Intrinsic IntID> + : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + u6Imm:$src2), + !strconcat("$dst &= ", !strconcat(opc , "($src1, #$src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, + imm:$src2))], + "$dst2 = $dst">; + +class di_SInst_didiu6_or<string opc, Intrinsic IntID> + : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + u6Imm:$src2), + !strconcat("$dst |= ", !strconcat(opc , "($src1, #$src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, + imm:$src2))], + "$dst2 = $dst">; + +class di_SInst_didiu6_xor<string opc, Intrinsic IntID> + : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + u6Imm:$src2), + !strconcat("$dst ^= ", !strconcat(opc , "($src1, #$src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, + imm:$src2))], + "$dst2 = $dst">; + +class si_SInst_sisisi_and<string opc, Intrinsic IntID> + : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst &= ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_SInst_sisisi_or<string opc, Intrinsic IntID> + : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst |= ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + + +class si_SInst_sisiu5_and<string opc, Intrinsic IntID> + : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + u5Imm:$src2), + !strconcat("$dst &= ", !strconcat(opc , "($src1, #$src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + imm:$src2))], + "$dst2 = $dst">; + +class si_SInst_sisiu5_or<string opc, Intrinsic IntID> + : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + u5Imm:$src2), + !strconcat("$dst |= ", !strconcat(opc , "($src1, #$src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + imm:$src2))], + "$dst2 = $dst">; + +class si_SInst_sisiu5_xor<string opc, Intrinsic IntID> + : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + u5Imm:$src2), + !strconcat("$dst ^= ", !strconcat(opc , "($src1, #$src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + imm:$src2))], + "$dst2 = $dst">; + +class si_SInst_sisiu5_acc<string opc, Intrinsic IntID> + : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + u5Imm:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1, #$src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + imm:$src2))], + "$dst2 = $dst">; + +class si_SInst_sisiu5_nac<string opc, Intrinsic IntID> + : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + u5Imm:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1, #$src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + imm:$src2))], + "$dst2 = $dst">; + +class di_SInst_didiu6_acc<string opc, Intrinsic IntID> + : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + u5Imm:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1, #$src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, + DoubleRegs:$src1, imm:$src2))], + "$dst2 = $dst">; + +class di_SInst_didiu6_nac<string opc, Intrinsic IntID> + : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + u5Imm:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1, #$src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, + imm:$src2))], + "$dst2 = $dst">; + + +// +// MInst classes. +// + +class di_MInst_sisi_rnd_hh_s1<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.H, $src2.H):<<1:rnd")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_rnd_hh<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.H, $src2.H):rnd")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_rnd_hl_s1<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.H, $src2.L):<<1:rnd")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_rnd_hl<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.H, $src2.L):rnd")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_rnd_lh_s1<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.L, $src2.H):<<1:rnd")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_rnd_lh<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.L, $src2.H):rnd")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_rnd_ll_s1<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.L, $src2.L):<<1:rnd")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_rnd_ll<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.L, $src2.L):rnd")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_disisi_acc<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_nac<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_acc_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1, $src2):sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_nac_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2):sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_acc_sat_conj<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1, $src2*):sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_nac_sat_conj<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2*):sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_nac_s1_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1, $src2):<<1:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_acc_s1_sat_conj<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , + "($src1, $src2*):<<1:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_nac_s1_sat_conj<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1, $src2*):<<1:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_s8s8<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins s8Imm:$src1, s8Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "(#$src1, #$src2)")), + [(set DoubleRegs:$dst, (IntID imm:$src1, imm:$src2))]>; + +class si_MInst_sis9<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class si_MInst_sisi<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_hh<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_hh_s1<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):<<1")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_lh<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_lh_s1<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):<<1")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_hl<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_hl_s1<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):<<1")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_ll<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_ll_s1<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):<<1")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + + +class si_MInst_sisi_hh<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_hh_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):<<1")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_lh<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_lh_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):<<1")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_hl<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_hl_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):<<1")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_ll<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_ll_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):<<1")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_up<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_didi<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +class di_MInst_didi_conj<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2*)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +class di_MInst_sisi_s1_sat_conj<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1, $src2*):<<1:sat")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_didi_s1_rnd_sat<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1, $src2):<<1:rnd:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +class di_MInst_didi_sat<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +class di_MInst_didi_rnd_sat<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1, $src2):rnd:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +class si_SInst_sisi_sat<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_SInst_didi_sat<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; + +class si_SInst_disi_s1_rnd_sat<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1, $src2):<<1:rnd:sat")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_s1_rnd_sat<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1, $src2):<<1:rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_l_s1_rnd_sat<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1, $src2.L):<<1:rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_h_s1_rnd_sat<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1, $src2.H):<<1:rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_rnd_sat_conj<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1, $src2*):rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_s1_rnd_sat_conj<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1, $src2*):<<1:rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_rnd_sat<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1, $src2):rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_rnd<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisisi_xacc<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst ^= ", !strconcat(opc , "($src2, $src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2, + IntRegs:$src3))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst += ", !strconcat(opc , "($src2, $src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2, + IntRegs:$src3))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst -= ", !strconcat(opc , "($src2, $src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2, + IntRegs:$src3))], + "$dst2 = $dst">; + +class si_MInst_sisis8_acc<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2, + s8Imm:$src3), + !strconcat("$dst += ", !strconcat(opc , "($src2, #$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2, + imm:$src3))], + "$dst2 = $dst">; + +class si_MInst_sisis8_nac<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2, + s8Imm:$src3), + !strconcat("$dst -= ", !strconcat(opc , "($src2, #$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2, + imm:$src3))], + "$dst2 = $dst">; + +class si_MInst_sisiu4u5<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + u4Imm:$src2, u5Imm:$src3), + !strconcat("$dst = ", !strconcat(opc , + "($src1, #$src2, #$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + imm:$src2, imm:$src3))], + "$dst2 = $dst">; + +class si_MInst_sisiu8_acc<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2, + u8Imm:$src3), + !strconcat("$dst += ", !strconcat(opc , "($src2, #$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2, + imm:$src3))], + "$dst2 = $dst">; + +class si_MInst_sisiu8_nac<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2, + u8Imm:$src3), + !strconcat("$dst -= ", !strconcat(opc , "($src2, #$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2, + imm:$src3))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_hh<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.H)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_sat_lh<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , + "($src1.L, $src2.H):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_sat_lh_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , + "($src1.L, $src2.H):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_sat_hh<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , + "($src1.H, $src2.H):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_sat_hh_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , + "($src1.H, $src2.H):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_hh_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , + "($src1.H, $src2.H):<<1")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_hh<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.H)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_sat_hh_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.H, $src2.H):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_sat_hh<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.H, $src2.H):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_sat_hl_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.H, $src2.L):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_sat_hl<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.H, $src2.L):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_sat_lh_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.L, $src2.H):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_sat_lh<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.L, $src2.H):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_sat_ll_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.L, $src2.L):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_sat_ll<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.L, $src2.L):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_hh_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.H, $src2.H):<<1")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_hl<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.L)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_hl_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , + "($src1.H, $src2.L):<<1")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_hl<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.L)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_hl_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.H, $src2.L):<<1")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_lh<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.H)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_lh_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , + "($src1.L, $src2.H):<<1")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_lh<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.H)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_lh_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.L, $src2.H):<<1")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_ll<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.L)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_ll_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , + "($src1.L, $src2.L):<<1")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_sat_ll_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , + "($src1.L, $src2.L):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_sat_hl_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , + "($src1.H, $src2.L):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_sat_ll<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , + "($src1.L, $src2.L):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_sat_hl<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , + "($src1.H, $src2.L):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_ll<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.L)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_ll_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.L, $src2.L):<<1")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_hh_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.H, $src2.H):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_hh_s1_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.H, $src2.H):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_hl_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.H, $src2.L):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_hl_s1_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.H, $src2.L):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_lh_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.L, $src2.H):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_lh_s1_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.L, $src2.H):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_ll_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.L, $src2.L):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_ll_s1_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.L, $src2.L):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_ALU32_sisi<string opc, Intrinsic IntID> + : ALU32_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_sat<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_sat_conj<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2*):sat")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_s1_sat<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):<<1:sat")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_didi_s1_sat<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):<<1:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +class si_MInst_didi_s1_rnd_sat<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1, $src2):<<1:rnd:sat")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; + +class si_MInst_didi_rnd_sat<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd:sat")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; + +class si_MInst_sisi_sat_hh<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_hh_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.H, $src2.H):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_hl<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_hl_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.H, $src2.L):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_lh<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_lh_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.L, $src2.H):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_ll<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_ll_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.L, $src2.L):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_rnd_hh<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.H, $src2.H):rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_rnd_hh<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.H, $src2.H):rnd")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_rnd_hh_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.H, $src2.H):<<1:rnd")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_rnd_hh_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , + "($src1.H, $src2.H):<<1:rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_rnd_hl<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , "($src1.H, $src2.L):rnd")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_rnd_hl_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , "($src1.H, $src2.L):<<1:rnd")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_rnd_hl<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , "($src1.H, $src2.L):rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_rnd_hl_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , "($src1.H, $src2.L):<<1:rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_rnd_lh<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , "($src1.L, $src2.H):rnd")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_rnd_lh<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , "($src1.L, $src2.H):rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_rnd_lh_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , "($src1.L, $src2.H):<<1:rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_rnd_lh_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , "($src1.L, $src2.H):<<1:rnd")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_rnd_ll<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , "($src1.L, $src2.L):rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_rnd_ll_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , "($src1.L, $src2.L):<<1:rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_rnd_ll<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , "($src1.L, $src2.L):rnd")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_rnd_ll_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , "($src1.L, $src2.L):<<1:rnd")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_dididi_acc_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, + DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1, $src2):sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, + DoubleRegs:$src1, + DoubleRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_dididi_acc_rnd_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + DoubleRegs:$src2), + !strconcat("$dst += ", + !strconcat(opc , "($src1, $src2):rnd:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, + DoubleRegs:$src1, + DoubleRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_dididi_acc_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, + DoubleRegs:$src1, + DoubleRegs:$src2), + !strconcat("$dst += ", + !strconcat(opc , "($src1, $src2):<<1")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, + DoubleRegs:$src1, + DoubleRegs:$src2))], + "$dst2 = $dst">; + + +class di_MInst_dididi_acc_s1_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, + DoubleRegs:$src1, + DoubleRegs:$src2), + !strconcat("$dst += ", + !strconcat(opc , "($src1, $src2):<<1:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, + DoubleRegs:$src1, + DoubleRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_dididi_acc_s1_rnd_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + DoubleRegs:$src2), + !strconcat("$dst += ", + !strconcat(opc , "($src1, $src2):<<1:rnd:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, + DoubleRegs:$src1, + DoubleRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_dididi_acc<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + DoubleRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, + DoubleRegs:$src1, + DoubleRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_dididi_acc_conj<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + DoubleRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1, $src2*)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, + DoubleRegs:$src1, + DoubleRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_acc_hh<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.H)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_acc_hl<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.L)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_acc_lh<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.H)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_acc_ll<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.L)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_acc_hh_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", + !strconcat(opc , "($src1.H, $src2.H):<<1")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_acc_hl_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", + !strconcat(opc , "($src1.H, $src2.L):<<1")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_acc_lh_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", + !strconcat(opc , "($src1.L, $src2.H):<<1")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_acc_ll_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", + !strconcat(opc , "($src1.L, $src2.L):<<1")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_nac_hh<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.H)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_nac_hl<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.L)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_nac_lh<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.H)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_nac_ll<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.L)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_nac_hh_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", + !strconcat(opc , "($src1.H, $src2.H):<<1")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_nac_hl_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", + !strconcat(opc , "($src1.H, $src2.L):<<1")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_nac_lh_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", + !strconcat(opc , "($src1.L, $src2.H):<<1")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_nac_ll_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", + !strconcat(opc , "($src1.L, $src2.L):<<1")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_acc_s1_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", + !strconcat(opc , "($src1, $src2):<<1:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disi_s1_sat<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):<<1:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_didisi_acc_s1_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", + !strconcat(opc , "($src1, $src2):<<1:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, + DoubleRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_disi_s1_rnd_sat<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , "($src1, $src2):<<1:rnd:sat")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_didi<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; + +// +// LDInst classes. +// +let mayLoad = 1, neverHasSideEffects = 1 in +class di_LDInstPI_diu4<string opc, Intrinsic IntID> + : LDInstPI<(outs IntRegs:$dst, DoubleRegs:$dst2), + (ins IntRegs:$src1, IntRegs:$src2, CRRegs:$src3, s4Imm:$offset), + "$dst2 = memd($src1++#$offset:circ($src3))", + [], + "$src1 = $dst">; + +/******************************************************************** +* ALU32/ALU * +*********************************************************************/ + +// ALU32 / ALU / Add. +def HEXAGON_A2_add: + si_ALU32_sisi <"add", int_hexagon_A2_add>; +def HEXAGON_A2_addi: + si_ALU32_sis16 <"add", int_hexagon_A2_addi>; + +// ALU32 / ALU / Logical operations. +def HEXAGON_A2_and: + si_ALU32_sisi <"and", int_hexagon_A2_and>; +def HEXAGON_A2_andir: + si_ALU32_sis10 <"and", int_hexagon_A2_andir>; +def HEXAGON_A2_not: + si_ALU32_si <"not", int_hexagon_A2_not>; +def HEXAGON_A2_or: + si_ALU32_sisi <"or", int_hexagon_A2_or>; +def HEXAGON_A2_orir: + si_ALU32_sis10 <"or", int_hexagon_A2_orir>; +def HEXAGON_A2_xor: + si_ALU32_sisi <"xor", int_hexagon_A2_xor>; + +// ALU32 / ALU / Negate. +def HEXAGON_A2_neg: + si_ALU32_si <"neg", int_hexagon_A2_neg>; + +// ALU32 / ALU / Subtract. +def HEXAGON_A2_sub: + si_ALU32_sisi <"sub", int_hexagon_A2_sub>; +def HEXAGON_A2_subri: + si_ALU32_s10si <"sub", int_hexagon_A2_subri>; + +// ALU32 / ALU / Transfer Immediate. +def HEXAGON_A2_tfril: + si_lo_ALU32_siu16 <"", int_hexagon_A2_tfril>; +def HEXAGON_A2_tfrih: + si_hi_ALU32_siu16 <"", int_hexagon_A2_tfrih>; +def HEXAGON_A2_tfrsi: + si_ALU32_s16 <"", int_hexagon_A2_tfrsi>; +def HEXAGON_A2_tfrpi: + di_ALU32_s8 <"", int_hexagon_A2_tfrpi>; + +// ALU32 / ALU / Transfer Register. +def HEXAGON_A2_tfr: + si_ALU32_si_tfr <"", int_hexagon_A2_tfr>; + +/******************************************************************** +* ALU32/PERM * +*********************************************************************/ + +// ALU32 / PERM / Combine. +def HEXAGON_A2_combinew: + di_ALU32_sisi <"combine", int_hexagon_A2_combinew>; +def HEXAGON_A2_combine_hh: + si_MInst_sisi_hh <"combine", int_hexagon_A2_combine_hh>; +def HEXAGON_A2_combine_lh: + si_MInst_sisi_lh <"combine", int_hexagon_A2_combine_lh>; +def HEXAGON_A2_combine_hl: + si_MInst_sisi_hl <"combine", int_hexagon_A2_combine_hl>; +def HEXAGON_A2_combine_ll: + si_MInst_sisi_ll <"combine", int_hexagon_A2_combine_ll>; +def HEXAGON_A2_combineii: + di_MInst_s8s8 <"combine", int_hexagon_A2_combineii>; + +// ALU32 / PERM / Mux. +def HEXAGON_C2_mux: + si_ALU32_qisisi <"mux", int_hexagon_C2_mux>; +def HEXAGON_C2_muxri: + si_ALU32_qis8si <"mux", int_hexagon_C2_muxri>; +def HEXAGON_C2_muxir: + si_ALU32_qisis8 <"mux", int_hexagon_C2_muxir>; +def HEXAGON_C2_muxii: + si_ALU32_qis8s8 <"mux", int_hexagon_C2_muxii>; + +// ALU32 / PERM / Shift halfword. +def HEXAGON_A2_aslh: + si_ALU32_si <"aslh", int_hexagon_A2_aslh>; +def HEXAGON_A2_asrh: + si_ALU32_si <"asrh", int_hexagon_A2_asrh>; +def SI_to_SXTHI_asrh: + si_ALU32_si <"asrh", int_hexagon_SI_to_SXTHI_asrh>; + +// ALU32 / PERM / Sign/zero extend. +def HEXAGON_A2_sxth: + si_ALU32_si <"sxth", int_hexagon_A2_sxth>; +def HEXAGON_A2_sxtb: + si_ALU32_si <"sxtb", int_hexagon_A2_sxtb>; +def HEXAGON_A2_zxth: + si_ALU32_si <"zxth", int_hexagon_A2_zxth>; +def HEXAGON_A2_zxtb: + si_ALU32_si <"zxtb", int_hexagon_A2_zxtb>; + +/******************************************************************** +* ALU32/PRED * +*********************************************************************/ + +// ALU32 / PRED / Compare. +def HEXAGON_C2_cmpeq: + qi_ALU32_sisi <"cmp.eq", int_hexagon_C2_cmpeq>; +def HEXAGON_C2_cmpeqi: + qi_ALU32_sis10 <"cmp.eq", int_hexagon_C2_cmpeqi>; +def HEXAGON_C2_cmpgei: + qi_ALU32_sis8 <"cmp.ge", int_hexagon_C2_cmpgei>; +def HEXAGON_C2_cmpgeui: + qi_ALU32_siu8 <"cmp.geu", int_hexagon_C2_cmpgeui>; +def HEXAGON_C2_cmpgt: + qi_ALU32_sisi <"cmp.gt", int_hexagon_C2_cmpgt>; +def HEXAGON_C2_cmpgti: + qi_ALU32_sis10 <"cmp.gt", int_hexagon_C2_cmpgti>; +def HEXAGON_C2_cmpgtu: + qi_ALU32_sisi <"cmp.gtu", int_hexagon_C2_cmpgtu>; +def HEXAGON_C2_cmpgtui: + qi_ALU32_siu9 <"cmp.gtu", int_hexagon_C2_cmpgtui>; +def HEXAGON_C2_cmplt: + qi_ALU32_sisi <"cmp.lt", int_hexagon_C2_cmplt>; +def HEXAGON_C2_cmpltu: + qi_ALU32_sisi <"cmp.ltu", int_hexagon_C2_cmpltu>; + +/******************************************************************** +* ALU32/VH * +*********************************************************************/ + +// ALU32 / VH / Vector add halfwords. +// Rd32=vadd[u]h(Rs32,Rt32:sat] +def HEXAGON_A2_svaddh: + si_ALU32_sisi <"vaddh", int_hexagon_A2_svaddh>; +def HEXAGON_A2_svaddhs: + si_ALU32_sisi_sat <"vaddh", int_hexagon_A2_svaddhs>; +def HEXAGON_A2_svadduhs: + si_ALU32_sisi_sat <"vadduh", int_hexagon_A2_svadduhs>; + +// ALU32 / VH / Vector average halfwords. +def HEXAGON_A2_svavgh: + si_ALU32_sisi <"vavgh", int_hexagon_A2_svavgh>; +def HEXAGON_A2_svavghs: + si_ALU32_sisi_rnd <"vavgh", int_hexagon_A2_svavghs>; +def HEXAGON_A2_svnavgh: + si_ALU32_sisi <"vnavgh", int_hexagon_A2_svnavgh>; + +// ALU32 / VH / Vector subtract halfwords. +def HEXAGON_A2_svsubh: + si_ALU32_sisi <"vsubh", int_hexagon_A2_svsubh>; +def HEXAGON_A2_svsubhs: + si_ALU32_sisi_sat <"vsubh", int_hexagon_A2_svsubhs>; +def HEXAGON_A2_svsubuhs: + si_ALU32_sisi_sat <"vsubuh", int_hexagon_A2_svsubuhs>; + +/******************************************************************** +* ALU64/ALU * +*********************************************************************/ + +// ALU64 / ALU / Add. +def HEXAGON_A2_addp: + di_ALU64_didi <"add", int_hexagon_A2_addp>; +def HEXAGON_A2_addsat: + si_ALU64_sisi_sat <"add", int_hexagon_A2_addsat>; + +// ALU64 / ALU / Add halfword. +// Even though the definition says hl, it should be lh - +//so DON'T change the class " si_ALU64_sisi_l16_lh " it inherits. +def HEXAGON_A2_addh_l16_hl: + si_ALU64_sisi_l16_lh <"add", int_hexagon_A2_addh_l16_hl>; +def HEXAGON_A2_addh_l16_ll: + si_ALU64_sisi_l16_ll <"add", int_hexagon_A2_addh_l16_ll>; + +def HEXAGON_A2_addh_l16_sat_hl: + si_ALU64_sisi_l16_sat_lh <"add", int_hexagon_A2_addh_l16_sat_hl>; +def HEXAGON_A2_addh_l16_sat_ll: + si_ALU64_sisi_l16_sat_ll <"add", int_hexagon_A2_addh_l16_sat_ll>; + +def HEXAGON_A2_addh_h16_hh: + si_ALU64_sisi_h16_hh <"add", int_hexagon_A2_addh_h16_hh>; +def HEXAGON_A2_addh_h16_hl: + si_ALU64_sisi_h16_hl <"add", int_hexagon_A2_addh_h16_hl>; +def HEXAGON_A2_addh_h16_lh: + si_ALU64_sisi_h16_lh <"add", int_hexagon_A2_addh_h16_lh>; +def HEXAGON_A2_addh_h16_ll: + si_ALU64_sisi_h16_ll <"add", int_hexagon_A2_addh_h16_ll>; + +def HEXAGON_A2_addh_h16_sat_hh: + si_ALU64_sisi_h16_sat_hh <"add", int_hexagon_A2_addh_h16_sat_hh>; +def HEXAGON_A2_addh_h16_sat_hl: + si_ALU64_sisi_h16_sat_hl <"add", int_hexagon_A2_addh_h16_sat_hl>; +def HEXAGON_A2_addh_h16_sat_lh: + si_ALU64_sisi_h16_sat_lh <"add", int_hexagon_A2_addh_h16_sat_lh>; +def HEXAGON_A2_addh_h16_sat_ll: + si_ALU64_sisi_h16_sat_ll <"add", int_hexagon_A2_addh_h16_sat_ll>; + +// ALU64 / ALU / Compare. +def HEXAGON_C2_cmpeqp: + qi_ALU64_didi <"cmp.eq", int_hexagon_C2_cmpeqp>; +def HEXAGON_C2_cmpgtp: + qi_ALU64_didi <"cmp.gt", int_hexagon_C2_cmpgtp>; +def HEXAGON_C2_cmpgtup: + qi_ALU64_didi <"cmp.gtu", int_hexagon_C2_cmpgtup>; + +// ALU64 / ALU / Logical operations. +def HEXAGON_A2_andp: + di_ALU64_didi <"and", int_hexagon_A2_andp>; +def HEXAGON_A2_orp: + di_ALU64_didi <"or", int_hexagon_A2_orp>; +def HEXAGON_A2_xorp: + di_ALU64_didi <"xor", int_hexagon_A2_xorp>; + +// ALU64 / ALU / Maximum. +def HEXAGON_A2_max: + si_ALU64_sisi <"max", int_hexagon_A2_max>; +def HEXAGON_A2_maxu: + si_ALU64_sisi <"maxu", int_hexagon_A2_maxu>; + +// ALU64 / ALU / Minimum. +def HEXAGON_A2_min: + si_ALU64_sisi <"min", int_hexagon_A2_min>; +def HEXAGON_A2_minu: + si_ALU64_sisi <"minu", int_hexagon_A2_minu>; + +// ALU64 / ALU / Subtract. +def HEXAGON_A2_subp: + di_ALU64_didi <"sub", int_hexagon_A2_subp>; +def HEXAGON_A2_subsat: + si_ALU64_sisi_sat <"sub", int_hexagon_A2_subsat>; + +// ALU64 / ALU / Subtract halfword. +// Even though the definition says hl, it should be lh - +//so DON'T change the class " si_ALU64_sisi_l16_lh " it inherits. +def HEXAGON_A2_subh_l16_hl: + si_ALU64_sisi_l16_lh <"sub", int_hexagon_A2_subh_l16_hl>; +def HEXAGON_A2_subh_l16_ll: + si_ALU64_sisi_l16_ll <"sub", int_hexagon_A2_subh_l16_ll>; + +def HEXAGON_A2_subh_l16_sat_hl: + si_ALU64_sisi_l16_sat_lh <"sub", int_hexagon_A2_subh_l16_sat_hl>; +def HEXAGON_A2_subh_l16_sat_ll: + si_ALU64_sisi_l16_sat_ll <"sub", int_hexagon_A2_subh_l16_sat_ll>; + +def HEXAGON_A2_subh_h16_hh: + si_ALU64_sisi_h16_hh <"sub", int_hexagon_A2_subh_h16_hh>; +def HEXAGON_A2_subh_h16_hl: + si_ALU64_sisi_h16_hl <"sub", int_hexagon_A2_subh_h16_hl>; +def HEXAGON_A2_subh_h16_lh: + si_ALU64_sisi_h16_lh <"sub", int_hexagon_A2_subh_h16_lh>; +def HEXAGON_A2_subh_h16_ll: + si_ALU64_sisi_h16_ll <"sub", int_hexagon_A2_subh_h16_ll>; + +def HEXAGON_A2_subh_h16_sat_hh: + si_ALU64_sisi_h16_sat_hh <"sub", int_hexagon_A2_subh_h16_sat_hh>; +def HEXAGON_A2_subh_h16_sat_hl: + si_ALU64_sisi_h16_sat_hl <"sub", int_hexagon_A2_subh_h16_sat_hl>; +def HEXAGON_A2_subh_h16_sat_lh: + si_ALU64_sisi_h16_sat_lh <"sub", int_hexagon_A2_subh_h16_sat_lh>; +def HEXAGON_A2_subh_h16_sat_ll: + si_ALU64_sisi_h16_sat_ll <"sub", int_hexagon_A2_subh_h16_sat_ll>; + +// ALU64 / ALU / Transfer register. +def HEXAGON_A2_tfrp: + di_ALU64_di <"", int_hexagon_A2_tfrp>; + +/******************************************************************** +* ALU64/BIT * +*********************************************************************/ + +// ALU64 / BIT / Masked parity. +def HEXAGON_S2_parityp: + si_ALU64_didi <"parity", int_hexagon_S2_parityp>; + +/******************************************************************** +* ALU64/PERM * +*********************************************************************/ + +// ALU64 / PERM / Vector pack high and low halfwords. +def HEXAGON_S2_packhl: + di_ALU64_sisi <"packhl", int_hexagon_S2_packhl>; + +/******************************************************************** +* ALU64/VB * +*********************************************************************/ + +// ALU64 / VB / Vector add unsigned bytes. +def HEXAGON_A2_vaddub: + di_ALU64_didi <"vaddub", int_hexagon_A2_vaddub>; +def HEXAGON_A2_vaddubs: + di_ALU64_didi_sat <"vaddub", int_hexagon_A2_vaddubs>; + +// ALU64 / VB / Vector average unsigned bytes. +def HEXAGON_A2_vavgub: + di_ALU64_didi <"vavgub", int_hexagon_A2_vavgub>; +def HEXAGON_A2_vavgubr: + di_ALU64_didi_rnd <"vavgub", int_hexagon_A2_vavgubr>; + +// ALU64 / VB / Vector compare unsigned bytes. +def HEXAGON_A2_vcmpbeq: + qi_ALU64_didi <"vcmpb.eq", int_hexagon_A2_vcmpbeq>; +def HEXAGON_A2_vcmpbgtu: + qi_ALU64_didi <"vcmpb.gtu",int_hexagon_A2_vcmpbgtu>; + +// ALU64 / VB / Vector maximum/minimum unsigned bytes. +def HEXAGON_A2_vmaxub: + di_ALU64_didi <"vmaxub", int_hexagon_A2_vmaxub>; +def HEXAGON_A2_vminub: + di_ALU64_didi <"vminub", int_hexagon_A2_vminub>; + +// ALU64 / VB / Vector subtract unsigned bytes. +def HEXAGON_A2_vsubub: + di_ALU64_didi <"vsubub", int_hexagon_A2_vsubub>; +def HEXAGON_A2_vsububs: + di_ALU64_didi_sat <"vsubub", int_hexagon_A2_vsububs>; + +// ALU64 / VB / Vector mux. +def HEXAGON_C2_vmux: + di_ALU64_qididi <"vmux", int_hexagon_C2_vmux>; + + +/******************************************************************** +* ALU64/VH * +*********************************************************************/ + +// ALU64 / VH / Vector add halfwords. +// Rdd64=vadd[u]h(Rss64,Rtt64:sat] +def HEXAGON_A2_vaddh: + di_ALU64_didi <"vaddh", int_hexagon_A2_vaddh>; +def HEXAGON_A2_vaddhs: + di_ALU64_didi_sat <"vaddh", int_hexagon_A2_vaddhs>; +def HEXAGON_A2_vadduhs: + di_ALU64_didi_sat <"vadduh", int_hexagon_A2_vadduhs>; + +// ALU64 / VH / Vector average halfwords. +// Rdd64=v[n]avg[u]h(Rss64,Rtt64:rnd/:crnd][:sat] +def HEXAGON_A2_vavgh: + di_ALU64_didi <"vavgh", int_hexagon_A2_vavgh>; +def HEXAGON_A2_vavghcr: + di_ALU64_didi_crnd <"vavgh", int_hexagon_A2_vavghcr>; +def HEXAGON_A2_vavghr: + di_ALU64_didi_rnd <"vavgh", int_hexagon_A2_vavghr>; +def HEXAGON_A2_vavguh: + di_ALU64_didi <"vavguh", int_hexagon_A2_vavguh>; +def HEXAGON_A2_vavguhr: + di_ALU64_didi_rnd <"vavguh", int_hexagon_A2_vavguhr>; +def HEXAGON_A2_vnavgh: + di_ALU64_didi <"vnavgh", int_hexagon_A2_vnavgh>; +def HEXAGON_A2_vnavghcr: + di_ALU64_didi_crnd_sat <"vnavgh", int_hexagon_A2_vnavghcr>; +def HEXAGON_A2_vnavghr: + di_ALU64_didi_rnd_sat <"vnavgh", int_hexagon_A2_vnavghr>; + +// ALU64 / VH / Vector compare halfwords. +def HEXAGON_A2_vcmpheq: + qi_ALU64_didi <"vcmph.eq", int_hexagon_A2_vcmpheq>; +def HEXAGON_A2_vcmphgt: + qi_ALU64_didi <"vcmph.gt", int_hexagon_A2_vcmphgt>; +def HEXAGON_A2_vcmphgtu: + qi_ALU64_didi <"vcmph.gtu",int_hexagon_A2_vcmphgtu>; + +// ALU64 / VH / Vector maximum halfwords. +def HEXAGON_A2_vmaxh: + di_ALU64_didi <"vmaxh", int_hexagon_A2_vmaxh>; +def HEXAGON_A2_vmaxuh: + di_ALU64_didi <"vmaxuh", int_hexagon_A2_vmaxuh>; + +// ALU64 / VH / Vector minimum halfwords. +def HEXAGON_A2_vminh: + di_ALU64_didi <"vminh", int_hexagon_A2_vminh>; +def HEXAGON_A2_vminuh: + di_ALU64_didi <"vminuh", int_hexagon_A2_vminuh>; + +// ALU64 / VH / Vector subtract halfwords. +def HEXAGON_A2_vsubh: + di_ALU64_didi <"vsubh", int_hexagon_A2_vsubh>; +def HEXAGON_A2_vsubhs: + di_ALU64_didi_sat <"vsubh", int_hexagon_A2_vsubhs>; +def HEXAGON_A2_vsubuhs: + di_ALU64_didi_sat <"vsubuh", int_hexagon_A2_vsubuhs>; + + +/******************************************************************** +* ALU64/VW * +*********************************************************************/ + +// ALU64 / VW / Vector add words. +// Rdd32=vaddw(Rss32,Rtt32)[:sat] +def HEXAGON_A2_vaddw: + di_ALU64_didi <"vaddw", int_hexagon_A2_vaddw>; +def HEXAGON_A2_vaddws: + di_ALU64_didi_sat <"vaddw", int_hexagon_A2_vaddws>; + +// ALU64 / VW / Vector average words. +def HEXAGON_A2_vavguw: + di_ALU64_didi <"vavguw", int_hexagon_A2_vavguw>; +def HEXAGON_A2_vavguwr: + di_ALU64_didi_rnd <"vavguw", int_hexagon_A2_vavguwr>; +def HEXAGON_A2_vavgw: + di_ALU64_didi <"vavgw", int_hexagon_A2_vavgw>; +def HEXAGON_A2_vavgwcr: + di_ALU64_didi_crnd <"vavgw", int_hexagon_A2_vavgwcr>; +def HEXAGON_A2_vavgwr: + di_ALU64_didi_rnd <"vavgw", int_hexagon_A2_vavgwr>; +def HEXAGON_A2_vnavgw: + di_ALU64_didi <"vnavgw", int_hexagon_A2_vnavgw>; +def HEXAGON_A2_vnavgwcr: + di_ALU64_didi_crnd_sat <"vnavgw", int_hexagon_A2_vnavgwcr>; +def HEXAGON_A2_vnavgwr: + di_ALU64_didi_rnd_sat <"vnavgw", int_hexagon_A2_vnavgwr>; + +// ALU64 / VW / Vector compare words. +def HEXAGON_A2_vcmpweq: + qi_ALU64_didi <"vcmpw.eq", int_hexagon_A2_vcmpweq>; +def HEXAGON_A2_vcmpwgt: + qi_ALU64_didi <"vcmpw.gt", int_hexagon_A2_vcmpwgt>; +def HEXAGON_A2_vcmpwgtu: + qi_ALU64_didi <"vcmpw.gtu",int_hexagon_A2_vcmpwgtu>; + +// ALU64 / VW / Vector maximum words. +def HEXAGON_A2_vmaxw: + di_ALU64_didi <"vmaxw", int_hexagon_A2_vmaxw>; +def HEXAGON_A2_vmaxuw: + di_ALU64_didi <"vmaxuw", int_hexagon_A2_vmaxuw>; + +// ALU64 / VW / Vector minimum words. +def HEXAGON_A2_vminw: + di_ALU64_didi <"vminw", int_hexagon_A2_vminw>; +def HEXAGON_A2_vminuw: + di_ALU64_didi <"vminuw", int_hexagon_A2_vminuw>; + +// ALU64 / VW / Vector subtract words. +def HEXAGON_A2_vsubw: + di_ALU64_didi <"vsubw", int_hexagon_A2_vsubw>; +def HEXAGON_A2_vsubws: + di_ALU64_didi_sat <"vsubw", int_hexagon_A2_vsubws>; + + +/******************************************************************** +* CR * +*********************************************************************/ + +// CR / Logical reductions on predicates. +def HEXAGON_C2_all8: + qi_SInst_qi <"all8", int_hexagon_C2_all8>; +def HEXAGON_C2_any8: + qi_SInst_qi <"any8", int_hexagon_C2_any8>; + +// CR / Logical operations on predicates. +def HEXAGON_C2_pxfer_map: + qi_SInst_qi_pxfer <"", int_hexagon_C2_pxfer_map>; +def HEXAGON_C2_and: + qi_SInst_qiqi <"and", int_hexagon_C2_and>; +def HEXAGON_C2_andn: + qi_SInst_qiqi_neg <"and", int_hexagon_C2_andn>; +def HEXAGON_C2_not: + qi_SInst_qi <"not", int_hexagon_C2_not>; +def HEXAGON_C2_or: + qi_SInst_qiqi <"or", int_hexagon_C2_or>; +def HEXAGON_C2_orn: + qi_SInst_qiqi_neg <"or", int_hexagon_C2_orn>; +def HEXAGON_C2_xor: + qi_SInst_qiqi <"xor", int_hexagon_C2_xor>; + + +/******************************************************************** +* MTYPE/ALU * +*********************************************************************/ + +// MTYPE / ALU / Add and accumulate. +def HEXAGON_M2_acci: + si_MInst_sisisi_acc <"add", int_hexagon_M2_acci>; +def HEXAGON_M2_accii: + si_MInst_sisis8_acc <"add", int_hexagon_M2_accii>; +def HEXAGON_M2_nacci: + si_MInst_sisisi_nac <"add", int_hexagon_M2_nacci>; +def HEXAGON_M2_naccii: + si_MInst_sisis8_nac <"add", int_hexagon_M2_naccii>; + +// MTYPE / ALU / Subtract and accumulate. +def HEXAGON_M2_subacc: + si_MInst_sisisi_acc <"sub", int_hexagon_M2_subacc>; + +// MTYPE / ALU / Vector absolute difference. +def HEXAGON_M2_vabsdiffh: + di_MInst_didi <"vabsdiffh",int_hexagon_M2_vabsdiffh>; +def HEXAGON_M2_vabsdiffw: + di_MInst_didi <"vabsdiffw",int_hexagon_M2_vabsdiffw>; + +// MTYPE / ALU / XOR and xor with destination. +def HEXAGON_M2_xor_xacc: + si_MInst_sisisi_xacc <"xor", int_hexagon_M2_xor_xacc>; + + +/******************************************************************** +* MTYPE/COMPLEX * +*********************************************************************/ + +// MTYPE / COMPLEX / Complex multiply. +// Rdd[-+]=cmpy(Rs, Rt:<<1]:sat +def HEXAGON_M2_cmpys_s1: + di_MInst_sisi_s1_sat <"cmpy", int_hexagon_M2_cmpys_s1>; +def HEXAGON_M2_cmpys_s0: + di_MInst_sisi_sat <"cmpy", int_hexagon_M2_cmpys_s0>; +def HEXAGON_M2_cmpysc_s1: + di_MInst_sisi_s1_sat_conj <"cmpy", int_hexagon_M2_cmpysc_s1>; +def HEXAGON_M2_cmpysc_s0: + di_MInst_sisi_sat_conj <"cmpy", int_hexagon_M2_cmpysc_s0>; + +def HEXAGON_M2_cmacs_s1: + di_MInst_disisi_acc_s1_sat <"cmpy", int_hexagon_M2_cmacs_s1>; +def HEXAGON_M2_cmacs_s0: + di_MInst_disisi_acc_sat <"cmpy", int_hexagon_M2_cmacs_s0>; +def HEXAGON_M2_cmacsc_s1: + di_MInst_disisi_acc_s1_sat_conj <"cmpy", int_hexagon_M2_cmacsc_s1>; +def HEXAGON_M2_cmacsc_s0: + di_MInst_disisi_acc_sat_conj <"cmpy", int_hexagon_M2_cmacsc_s0>; + +def HEXAGON_M2_cnacs_s1: + di_MInst_disisi_nac_s1_sat <"cmpy", int_hexagon_M2_cnacs_s1>; +def HEXAGON_M2_cnacs_s0: + di_MInst_disisi_nac_sat <"cmpy", int_hexagon_M2_cnacs_s0>; +def HEXAGON_M2_cnacsc_s1: + di_MInst_disisi_nac_s1_sat_conj <"cmpy", int_hexagon_M2_cnacsc_s1>; +def HEXAGON_M2_cnacsc_s0: + di_MInst_disisi_nac_sat_conj <"cmpy", int_hexagon_M2_cnacsc_s0>; + +// MTYPE / COMPLEX / Complex multiply real or imaginary. +def HEXAGON_M2_cmpyr_s0: + di_MInst_sisi <"cmpyr", int_hexagon_M2_cmpyr_s0>; +def HEXAGON_M2_cmacr_s0: + di_MInst_disisi_acc <"cmpyr", int_hexagon_M2_cmacr_s0>; + +def HEXAGON_M2_cmpyi_s0: + di_MInst_sisi <"cmpyi", int_hexagon_M2_cmpyi_s0>; +def HEXAGON_M2_cmaci_s0: + di_MInst_disisi_acc <"cmpyi", int_hexagon_M2_cmaci_s0>; + +// MTYPE / COMPLEX / Complex multiply with round and pack. +// Rxx32+=cmpy(Rs32,[*]Rt32:<<1]:rnd:sat +def HEXAGON_M2_cmpyrs_s0: + si_MInst_sisi_rnd_sat <"cmpy", int_hexagon_M2_cmpyrs_s0>; +def HEXAGON_M2_cmpyrs_s1: + si_MInst_sisi_s1_rnd_sat <"cmpy", int_hexagon_M2_cmpyrs_s1>; + +def HEXAGON_M2_cmpyrsc_s0: + si_MInst_sisi_rnd_sat_conj <"cmpy", int_hexagon_M2_cmpyrsc_s0>; +def HEXAGON_M2_cmpyrsc_s1: + si_MInst_sisi_s1_rnd_sat_conj <"cmpy", int_hexagon_M2_cmpyrsc_s1>; + +//MTYPE / COMPLEX / Vector complex multiply real or imaginary. +def HEXAGON_M2_vcmpy_s0_sat_i: + di_MInst_didi_sat <"vcmpyi", int_hexagon_M2_vcmpy_s0_sat_i>; +def HEXAGON_M2_vcmpy_s1_sat_i: + di_MInst_didi_s1_sat <"vcmpyi", int_hexagon_M2_vcmpy_s1_sat_i>; + +def HEXAGON_M2_vcmpy_s0_sat_r: + di_MInst_didi_sat <"vcmpyr", int_hexagon_M2_vcmpy_s0_sat_r>; +def HEXAGON_M2_vcmpy_s1_sat_r: + di_MInst_didi_s1_sat <"vcmpyr", int_hexagon_M2_vcmpy_s1_sat_r>; + +def HEXAGON_M2_vcmac_s0_sat_i: + di_MInst_dididi_acc_sat <"vcmpyi", int_hexagon_M2_vcmac_s0_sat_i>; +def HEXAGON_M2_vcmac_s0_sat_r: + di_MInst_dididi_acc_sat <"vcmpyr", int_hexagon_M2_vcmac_s0_sat_r>; + +//MTYPE / COMPLEX / Vector reduce complex multiply real or imaginary. +def HEXAGON_M2_vrcmpyi_s0: + di_MInst_didi <"vrcmpyi", int_hexagon_M2_vrcmpyi_s0>; +def HEXAGON_M2_vrcmpyr_s0: + di_MInst_didi <"vrcmpyr", int_hexagon_M2_vrcmpyr_s0>; + +def HEXAGON_M2_vrcmpyi_s0c: + di_MInst_didi_conj <"vrcmpyi", int_hexagon_M2_vrcmpyi_s0c>; +def HEXAGON_M2_vrcmpyr_s0c: + di_MInst_didi_conj <"vrcmpyr", int_hexagon_M2_vrcmpyr_s0c>; + +def HEXAGON_M2_vrcmaci_s0: + di_MInst_dididi_acc <"vrcmpyi", int_hexagon_M2_vrcmaci_s0>; +def HEXAGON_M2_vrcmacr_s0: + di_MInst_dididi_acc <"vrcmpyr", int_hexagon_M2_vrcmacr_s0>; + +def HEXAGON_M2_vrcmaci_s0c: + di_MInst_dididi_acc_conj <"vrcmpyi", int_hexagon_M2_vrcmaci_s0c>; +def HEXAGON_M2_vrcmacr_s0c: + di_MInst_dididi_acc_conj <"vrcmpyr", int_hexagon_M2_vrcmacr_s0c>; + + +/******************************************************************** +* MTYPE/MPYH * +*********************************************************************/ + +// MTYPE / MPYH / Multiply and use lower result. +//def HEXAGON_M2_mpysmi: +//FIXME: Hexagon_M2_mpysmi should really by of the type si_MInst_sim9, +// not si_MInst_sis9 - but for now, we will use s9. +// def Hexagon_M2_mpysmi: +// si_MInst_sim9 <"mpyi", int_hexagon_M2_mpysmi>; +def Hexagon_M2_mpysmi: + si_MInst_sis9 <"mpyi", int_hexagon_M2_mpysmi>; +def HEXAGON_M2_mpyi: + si_MInst_sisi <"mpyi", int_hexagon_M2_mpyi>; +def HEXAGON_M2_mpyui: + si_MInst_sisi <"mpyui", int_hexagon_M2_mpyui>; +def HEXAGON_M2_macsip: + si_MInst_sisiu8_acc <"mpyi", int_hexagon_M2_macsip>; +def HEXAGON_M2_maci: + si_MInst_sisisi_acc <"mpyi", int_hexagon_M2_maci>; +def HEXAGON_M2_macsin: + si_MInst_sisiu8_nac <"mpyi", int_hexagon_M2_macsin>; + +// MTYPE / MPYH / Multiply word by half (32x16). +//Rdd[+]=vmpywoh(Rss,Rtt)[:<<1][:rnd][:sat] +//Rdd[+]=vmpyweh(Rss,Rtt)[:<<1][:rnd][:sat] +def HEXAGON_M2_mmpyl_rs1: + di_MInst_didi_s1_rnd_sat <"vmpyweh", int_hexagon_M2_mmpyl_rs1>; +def HEXAGON_M2_mmpyl_s1: + di_MInst_didi_s1_sat <"vmpyweh", int_hexagon_M2_mmpyl_s1>; +def HEXAGON_M2_mmpyl_rs0: + di_MInst_didi_rnd_sat <"vmpyweh", int_hexagon_M2_mmpyl_rs0>; +def HEXAGON_M2_mmpyl_s0: + di_MInst_didi_sat <"vmpyweh", int_hexagon_M2_mmpyl_s0>; +def HEXAGON_M2_mmpyh_rs1: + di_MInst_didi_s1_rnd_sat <"vmpywoh", int_hexagon_M2_mmpyh_rs1>; +def HEXAGON_M2_mmpyh_s1: + di_MInst_didi_s1_sat <"vmpywoh", int_hexagon_M2_mmpyh_s1>; +def HEXAGON_M2_mmpyh_rs0: + di_MInst_didi_rnd_sat <"vmpywoh", int_hexagon_M2_mmpyh_rs0>; +def HEXAGON_M2_mmpyh_s0: + di_MInst_didi_sat <"vmpywoh", int_hexagon_M2_mmpyh_s0>; +def HEXAGON_M2_mmacls_rs1: + di_MInst_dididi_acc_s1_rnd_sat <"vmpyweh", int_hexagon_M2_mmacls_rs1>; +def HEXAGON_M2_mmacls_s1: + di_MInst_dididi_acc_s1_sat <"vmpyweh", int_hexagon_M2_mmacls_s1>; +def HEXAGON_M2_mmacls_rs0: + di_MInst_dididi_acc_rnd_sat <"vmpyweh", int_hexagon_M2_mmacls_rs0>; +def HEXAGON_M2_mmacls_s0: + di_MInst_dididi_acc_sat <"vmpyweh", int_hexagon_M2_mmacls_s0>; +def HEXAGON_M2_mmachs_rs1: + di_MInst_dididi_acc_s1_rnd_sat <"vmpywoh", int_hexagon_M2_mmachs_rs1>; +def HEXAGON_M2_mmachs_s1: + di_MInst_dididi_acc_s1_sat <"vmpywoh", int_hexagon_M2_mmachs_s1>; +def HEXAGON_M2_mmachs_rs0: + di_MInst_dididi_acc_rnd_sat <"vmpywoh", int_hexagon_M2_mmachs_rs0>; +def HEXAGON_M2_mmachs_s0: + di_MInst_dididi_acc_sat <"vmpywoh", int_hexagon_M2_mmachs_s0>; + +// MTYPE / MPYH / Multiply word by unsigned half (32x16). +//Rdd[+]=vmpywouh(Rss,Rtt)[:<<1][:rnd][:sat] +//Rdd[+]=vmpyweuh(Rss,Rtt)[:<<1][:rnd][:sat] +def HEXAGON_M2_mmpyul_rs1: + di_MInst_didi_s1_rnd_sat <"vmpyweuh", int_hexagon_M2_mmpyul_rs1>; +def HEXAGON_M2_mmpyul_s1: + di_MInst_didi_s1_sat <"vmpyweuh", int_hexagon_M2_mmpyul_s1>; +def HEXAGON_M2_mmpyul_rs0: + di_MInst_didi_rnd_sat <"vmpyweuh", int_hexagon_M2_mmpyul_rs0>; +def HEXAGON_M2_mmpyul_s0: + di_MInst_didi_sat <"vmpyweuh", int_hexagon_M2_mmpyul_s0>; +def HEXAGON_M2_mmpyuh_rs1: + di_MInst_didi_s1_rnd_sat <"vmpywouh", int_hexagon_M2_mmpyuh_rs1>; +def HEXAGON_M2_mmpyuh_s1: + di_MInst_didi_s1_sat <"vmpywouh", int_hexagon_M2_mmpyuh_s1>; +def HEXAGON_M2_mmpyuh_rs0: + di_MInst_didi_rnd_sat <"vmpywouh", int_hexagon_M2_mmpyuh_rs0>; +def HEXAGON_M2_mmpyuh_s0: + di_MInst_didi_sat <"vmpywouh", int_hexagon_M2_mmpyuh_s0>; +def HEXAGON_M2_mmaculs_rs1: + di_MInst_dididi_acc_s1_rnd_sat <"vmpyweuh", int_hexagon_M2_mmaculs_rs1>; +def HEXAGON_M2_mmaculs_s1: + di_MInst_dididi_acc_s1_sat <"vmpyweuh", int_hexagon_M2_mmaculs_s1>; +def HEXAGON_M2_mmaculs_rs0: + di_MInst_dididi_acc_rnd_sat <"vmpyweuh", int_hexagon_M2_mmaculs_rs0>; +def HEXAGON_M2_mmaculs_s0: + di_MInst_dididi_acc_sat <"vmpyweuh", int_hexagon_M2_mmaculs_s0>; +def HEXAGON_M2_mmacuhs_rs1: + di_MInst_dididi_acc_s1_rnd_sat <"vmpywouh", int_hexagon_M2_mmacuhs_rs1>; +def HEXAGON_M2_mmacuhs_s1: + di_MInst_dididi_acc_s1_sat <"vmpywouh", int_hexagon_M2_mmacuhs_s1>; +def HEXAGON_M2_mmacuhs_rs0: + di_MInst_dididi_acc_rnd_sat <"vmpywouh", int_hexagon_M2_mmacuhs_rs0>; +def HEXAGON_M2_mmacuhs_s0: + di_MInst_dididi_acc_sat <"vmpywouh", int_hexagon_M2_mmacuhs_s0>; + +// MTYPE / MPYH / Multiply and use upper result. +def HEXAGON_M2_hmmpyh_rs1: + si_MInst_sisi_h_s1_rnd_sat <"mpy", int_hexagon_M2_hmmpyh_rs1>; +def HEXAGON_M2_hmmpyl_rs1: + si_MInst_sisi_l_s1_rnd_sat <"mpy", int_hexagon_M2_hmmpyl_rs1>; +def HEXAGON_M2_mpy_up: + si_MInst_sisi <"mpy", int_hexagon_M2_mpy_up>; +def HEXAGON_M2_dpmpyss_rnd_s0: + si_MInst_sisi_rnd <"mpy", int_hexagon_M2_dpmpyss_rnd_s0>; +def HEXAGON_M2_mpyu_up: + si_MInst_sisi <"mpyu", int_hexagon_M2_mpyu_up>; + +// MTYPE / MPYH / Multiply and use full result. +def HEXAGON_M2_dpmpyuu_s0: + di_MInst_sisi <"mpyu", int_hexagon_M2_dpmpyuu_s0>; +def HEXAGON_M2_dpmpyuu_acc_s0: + di_MInst_disisi_acc <"mpyu", int_hexagon_M2_dpmpyuu_acc_s0>; +def HEXAGON_M2_dpmpyuu_nac_s0: + di_MInst_disisi_nac <"mpyu", int_hexagon_M2_dpmpyuu_nac_s0>; +def HEXAGON_M2_dpmpyss_s0: + di_MInst_sisi <"mpy", int_hexagon_M2_dpmpyss_s0>; +def HEXAGON_M2_dpmpyss_acc_s0: + di_MInst_disisi_acc <"mpy", int_hexagon_M2_dpmpyss_acc_s0>; +def HEXAGON_M2_dpmpyss_nac_s0: + di_MInst_disisi_nac <"mpy", int_hexagon_M2_dpmpyss_nac_s0>; + + +/******************************************************************** +* MTYPE/MPYS * +*********************************************************************/ + +// MTYPE / MPYS / Scalar 16x16 multiply signed. +//Rd=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1]| +// [:<<0[:rnd|:sat|:rnd:sat]|:<<1[:rnd|:sat|:rnd:sat]]] +def HEXAGON_M2_mpy_hh_s0: + si_MInst_sisi_hh <"mpy", int_hexagon_M2_mpy_hh_s0>; +def HEXAGON_M2_mpy_hh_s1: + si_MInst_sisi_hh_s1 <"mpy", int_hexagon_M2_mpy_hh_s1>; +def HEXAGON_M2_mpy_rnd_hh_s1: + si_MInst_sisi_rnd_hh_s1 <"mpy", int_hexagon_M2_mpy_rnd_hh_s1>; +def HEXAGON_M2_mpy_sat_rnd_hh_s1: + si_MInst_sisi_sat_rnd_hh_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_hh_s1>; +def HEXAGON_M2_mpy_sat_hh_s1: + si_MInst_sisi_sat_hh_s1 <"mpy", int_hexagon_M2_mpy_sat_hh_s1>; +def HEXAGON_M2_mpy_rnd_hh_s0: + si_MInst_sisi_rnd_hh <"mpy", int_hexagon_M2_mpy_rnd_hh_s0>; +def HEXAGON_M2_mpy_sat_rnd_hh_s0: + si_MInst_sisi_sat_rnd_hh <"mpy", int_hexagon_M2_mpy_sat_rnd_hh_s0>; +def HEXAGON_M2_mpy_sat_hh_s0: + si_MInst_sisi_sat_hh <"mpy", int_hexagon_M2_mpy_sat_hh_s0>; + +def HEXAGON_M2_mpy_hl_s0: + si_MInst_sisi_hl <"mpy", int_hexagon_M2_mpy_hl_s0>; +def HEXAGON_M2_mpy_hl_s1: + si_MInst_sisi_hl_s1 <"mpy", int_hexagon_M2_mpy_hl_s1>; +def HEXAGON_M2_mpy_rnd_hl_s1: + si_MInst_sisi_rnd_hl_s1 <"mpy", int_hexagon_M2_mpy_rnd_hl_s1>; +def HEXAGON_M2_mpy_sat_rnd_hl_s1: + si_MInst_sisi_sat_rnd_hl_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_hl_s1>; +def HEXAGON_M2_mpy_sat_hl_s1: + si_MInst_sisi_sat_hl_s1 <"mpy", int_hexagon_M2_mpy_sat_hl_s1>; +def HEXAGON_M2_mpy_rnd_hl_s0: + si_MInst_sisi_rnd_hl <"mpy", int_hexagon_M2_mpy_rnd_hl_s0>; +def HEXAGON_M2_mpy_sat_rnd_hl_s0: + si_MInst_sisi_sat_rnd_hl <"mpy", int_hexagon_M2_mpy_sat_rnd_hl_s0>; +def HEXAGON_M2_mpy_sat_hl_s0: + si_MInst_sisi_sat_hl <"mpy", int_hexagon_M2_mpy_sat_hl_s0>; + +def HEXAGON_M2_mpy_lh_s0: + si_MInst_sisi_lh <"mpy", int_hexagon_M2_mpy_lh_s0>; +def HEXAGON_M2_mpy_lh_s1: + si_MInst_sisi_lh_s1 <"mpy", int_hexagon_M2_mpy_lh_s1>; +def HEXAGON_M2_mpy_rnd_lh_s1: + si_MInst_sisi_rnd_lh_s1 <"mpy", int_hexagon_M2_mpy_rnd_lh_s1>; +def HEXAGON_M2_mpy_sat_rnd_lh_s1: + si_MInst_sisi_sat_rnd_lh_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_lh_s1>; +def HEXAGON_M2_mpy_sat_lh_s1: + si_MInst_sisi_sat_lh_s1 <"mpy", int_hexagon_M2_mpy_sat_lh_s1>; +def HEXAGON_M2_mpy_rnd_lh_s0: + si_MInst_sisi_rnd_lh <"mpy", int_hexagon_M2_mpy_rnd_lh_s0>; +def HEXAGON_M2_mpy_sat_rnd_lh_s0: + si_MInst_sisi_sat_rnd_lh <"mpy", int_hexagon_M2_mpy_sat_rnd_lh_s0>; +def HEXAGON_M2_mpy_sat_lh_s0: + si_MInst_sisi_sat_lh <"mpy", int_hexagon_M2_mpy_sat_lh_s0>; + +def HEXAGON_M2_mpy_ll_s0: + si_MInst_sisi_ll <"mpy", int_hexagon_M2_mpy_ll_s0>; +def HEXAGON_M2_mpy_ll_s1: + si_MInst_sisi_ll_s1 <"mpy", int_hexagon_M2_mpy_ll_s1>; +def HEXAGON_M2_mpy_rnd_ll_s1: + si_MInst_sisi_rnd_ll_s1 <"mpy", int_hexagon_M2_mpy_rnd_ll_s1>; +def HEXAGON_M2_mpy_sat_rnd_ll_s1: + si_MInst_sisi_sat_rnd_ll_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_ll_s1>; +def HEXAGON_M2_mpy_sat_ll_s1: + si_MInst_sisi_sat_ll_s1 <"mpy", int_hexagon_M2_mpy_sat_ll_s1>; +def HEXAGON_M2_mpy_rnd_ll_s0: + si_MInst_sisi_rnd_ll <"mpy", int_hexagon_M2_mpy_rnd_ll_s0>; +def HEXAGON_M2_mpy_sat_rnd_ll_s0: + si_MInst_sisi_sat_rnd_ll <"mpy", int_hexagon_M2_mpy_sat_rnd_ll_s0>; +def HEXAGON_M2_mpy_sat_ll_s0: + si_MInst_sisi_sat_ll <"mpy", int_hexagon_M2_mpy_sat_ll_s0>; + +//Rdd=mpy(Rs.[H|L],Rt.[H|L])[[:<<0|:<<1]|[:<<0:rnd|:<<1:rnd]] +def HEXAGON_M2_mpyd_hh_s0: + di_MInst_sisi_hh <"mpy", int_hexagon_M2_mpyd_hh_s0>; +def HEXAGON_M2_mpyd_hh_s1: + di_MInst_sisi_hh_s1 <"mpy", int_hexagon_M2_mpyd_hh_s1>; +def HEXAGON_M2_mpyd_rnd_hh_s1: + di_MInst_sisi_rnd_hh_s1 <"mpy", int_hexagon_M2_mpyd_rnd_hh_s1>; +def HEXAGON_M2_mpyd_rnd_hh_s0: + di_MInst_sisi_rnd_hh <"mpy", int_hexagon_M2_mpyd_rnd_hh_s0>; + +def HEXAGON_M2_mpyd_hl_s0: + di_MInst_sisi_hl <"mpy", int_hexagon_M2_mpyd_hl_s0>; +def HEXAGON_M2_mpyd_hl_s1: + di_MInst_sisi_hl_s1 <"mpy", int_hexagon_M2_mpyd_hl_s1>; +def HEXAGON_M2_mpyd_rnd_hl_s1: + di_MInst_sisi_rnd_hl_s1 <"mpy", int_hexagon_M2_mpyd_rnd_hl_s1>; +def HEXAGON_M2_mpyd_rnd_hl_s0: + di_MInst_sisi_rnd_hl <"mpy", int_hexagon_M2_mpyd_rnd_hl_s0>; + +def HEXAGON_M2_mpyd_lh_s0: + di_MInst_sisi_lh <"mpy", int_hexagon_M2_mpyd_lh_s0>; +def HEXAGON_M2_mpyd_lh_s1: + di_MInst_sisi_lh_s1 <"mpy", int_hexagon_M2_mpyd_lh_s1>; +def HEXAGON_M2_mpyd_rnd_lh_s1: + di_MInst_sisi_rnd_lh_s1 <"mpy", int_hexagon_M2_mpyd_rnd_lh_s1>; +def HEXAGON_M2_mpyd_rnd_lh_s0: + di_MInst_sisi_rnd_lh <"mpy", int_hexagon_M2_mpyd_rnd_lh_s0>; + +def HEXAGON_M2_mpyd_ll_s0: + di_MInst_sisi_ll <"mpy", int_hexagon_M2_mpyd_ll_s0>; +def HEXAGON_M2_mpyd_ll_s1: + di_MInst_sisi_ll_s1 <"mpy", int_hexagon_M2_mpyd_ll_s1>; +def HEXAGON_M2_mpyd_rnd_ll_s1: + di_MInst_sisi_rnd_ll_s1 <"mpy", int_hexagon_M2_mpyd_rnd_ll_s1>; +def HEXAGON_M2_mpyd_rnd_ll_s0: + di_MInst_sisi_rnd_ll <"mpy", int_hexagon_M2_mpyd_rnd_ll_s0>; + +//Rx+=mpy(Rs.[H|L],Rt.[H|L])[[[:<<0|:<<1]|[:<<0:sat|:<<1:sat]] +def HEXAGON_M2_mpy_acc_hh_s0: + si_MInst_sisisi_acc_hh <"mpy", int_hexagon_M2_mpy_acc_hh_s0>; +def HEXAGON_M2_mpy_acc_hh_s1: + si_MInst_sisisi_acc_hh_s1 <"mpy", int_hexagon_M2_mpy_acc_hh_s1>; +def HEXAGON_M2_mpy_acc_sat_hh_s1: + si_MInst_sisisi_acc_sat_hh_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_hh_s1>; +def HEXAGON_M2_mpy_acc_sat_hh_s0: + si_MInst_sisisi_acc_sat_hh <"mpy", int_hexagon_M2_mpy_acc_sat_hh_s0>; + +def HEXAGON_M2_mpy_acc_hl_s0: + si_MInst_sisisi_acc_hl <"mpy", int_hexagon_M2_mpy_acc_hl_s0>; +def HEXAGON_M2_mpy_acc_hl_s1: + si_MInst_sisisi_acc_hl_s1 <"mpy", int_hexagon_M2_mpy_acc_hl_s1>; +def HEXAGON_M2_mpy_acc_sat_hl_s1: + si_MInst_sisisi_acc_sat_hl_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_hl_s1>; +def HEXAGON_M2_mpy_acc_sat_hl_s0: + si_MInst_sisisi_acc_sat_hl <"mpy", int_hexagon_M2_mpy_acc_sat_hl_s0>; + +def HEXAGON_M2_mpy_acc_lh_s0: + si_MInst_sisisi_acc_lh <"mpy", int_hexagon_M2_mpy_acc_lh_s0>; +def HEXAGON_M2_mpy_acc_lh_s1: + si_MInst_sisisi_acc_lh_s1 <"mpy", int_hexagon_M2_mpy_acc_lh_s1>; +def HEXAGON_M2_mpy_acc_sat_lh_s1: + si_MInst_sisisi_acc_sat_lh_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_lh_s1>; +def HEXAGON_M2_mpy_acc_sat_lh_s0: + si_MInst_sisisi_acc_sat_lh <"mpy", int_hexagon_M2_mpy_acc_sat_lh_s0>; + +def HEXAGON_M2_mpy_acc_ll_s0: + si_MInst_sisisi_acc_ll <"mpy", int_hexagon_M2_mpy_acc_ll_s0>; +def HEXAGON_M2_mpy_acc_ll_s1: + si_MInst_sisisi_acc_ll_s1 <"mpy", int_hexagon_M2_mpy_acc_ll_s1>; +def HEXAGON_M2_mpy_acc_sat_ll_s1: + si_MInst_sisisi_acc_sat_ll_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_ll_s1>; +def HEXAGON_M2_mpy_acc_sat_ll_s0: + si_MInst_sisisi_acc_sat_ll <"mpy", int_hexagon_M2_mpy_acc_sat_ll_s0>; + +//Rx-=mpy(Rs.[H|L],Rt.[H|L])[[[:<<0|:<<1]|[:<<0:sat|:<<1:sat]] +def HEXAGON_M2_mpy_nac_hh_s0: + si_MInst_sisisi_nac_hh <"mpy", int_hexagon_M2_mpy_nac_hh_s0>; +def HEXAGON_M2_mpy_nac_hh_s1: + si_MInst_sisisi_nac_hh_s1 <"mpy", int_hexagon_M2_mpy_nac_hh_s1>; +def HEXAGON_M2_mpy_nac_sat_hh_s1: + si_MInst_sisisi_nac_sat_hh_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_hh_s1>; +def HEXAGON_M2_mpy_nac_sat_hh_s0: + si_MInst_sisisi_nac_sat_hh <"mpy", int_hexagon_M2_mpy_nac_sat_hh_s0>; + +def HEXAGON_M2_mpy_nac_hl_s0: + si_MInst_sisisi_nac_hl <"mpy", int_hexagon_M2_mpy_nac_hl_s0>; +def HEXAGON_M2_mpy_nac_hl_s1: + si_MInst_sisisi_nac_hl_s1 <"mpy", int_hexagon_M2_mpy_nac_hl_s1>; +def HEXAGON_M2_mpy_nac_sat_hl_s1: + si_MInst_sisisi_nac_sat_hl_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_hl_s1>; +def HEXAGON_M2_mpy_nac_sat_hl_s0: + si_MInst_sisisi_nac_sat_hl <"mpy", int_hexagon_M2_mpy_nac_sat_hl_s0>; + +def HEXAGON_M2_mpy_nac_lh_s0: + si_MInst_sisisi_nac_lh <"mpy", int_hexagon_M2_mpy_nac_lh_s0>; +def HEXAGON_M2_mpy_nac_lh_s1: + si_MInst_sisisi_nac_lh_s1 <"mpy", int_hexagon_M2_mpy_nac_lh_s1>; +def HEXAGON_M2_mpy_nac_sat_lh_s1: + si_MInst_sisisi_nac_sat_lh_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_lh_s1>; +def HEXAGON_M2_mpy_nac_sat_lh_s0: + si_MInst_sisisi_nac_sat_lh <"mpy", int_hexagon_M2_mpy_nac_sat_lh_s0>; + +def HEXAGON_M2_mpy_nac_ll_s0: + si_MInst_sisisi_nac_ll <"mpy", int_hexagon_M2_mpy_nac_ll_s0>; +def HEXAGON_M2_mpy_nac_ll_s1: + si_MInst_sisisi_nac_ll_s1 <"mpy", int_hexagon_M2_mpy_nac_ll_s1>; +def HEXAGON_M2_mpy_nac_sat_ll_s1: + si_MInst_sisisi_nac_sat_ll_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_ll_s1>; +def HEXAGON_M2_mpy_nac_sat_ll_s0: + si_MInst_sisisi_nac_sat_ll <"mpy", int_hexagon_M2_mpy_nac_sat_ll_s0>; + +//Rx+=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1] +def HEXAGON_M2_mpyd_acc_hh_s0: + di_MInst_disisi_acc_hh <"mpy", int_hexagon_M2_mpyd_acc_hh_s0>; +def HEXAGON_M2_mpyd_acc_hh_s1: + di_MInst_disisi_acc_hh_s1 <"mpy", int_hexagon_M2_mpyd_acc_hh_s1>; + +def HEXAGON_M2_mpyd_acc_hl_s0: + di_MInst_disisi_acc_hl <"mpy", int_hexagon_M2_mpyd_acc_hl_s0>; +def HEXAGON_M2_mpyd_acc_hl_s1: + di_MInst_disisi_acc_hl_s1 <"mpy", int_hexagon_M2_mpyd_acc_hl_s1>; + +def HEXAGON_M2_mpyd_acc_lh_s0: + di_MInst_disisi_acc_lh <"mpy", int_hexagon_M2_mpyd_acc_lh_s0>; +def HEXAGON_M2_mpyd_acc_lh_s1: + di_MInst_disisi_acc_lh_s1 <"mpy", int_hexagon_M2_mpyd_acc_lh_s1>; + +def HEXAGON_M2_mpyd_acc_ll_s0: + di_MInst_disisi_acc_ll <"mpy", int_hexagon_M2_mpyd_acc_ll_s0>; +def HEXAGON_M2_mpyd_acc_ll_s1: + di_MInst_disisi_acc_ll_s1 <"mpy", int_hexagon_M2_mpyd_acc_ll_s1>; + +//Rx-=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1] +def HEXAGON_M2_mpyd_nac_hh_s0: + di_MInst_disisi_nac_hh <"mpy", int_hexagon_M2_mpyd_nac_hh_s0>; +def HEXAGON_M2_mpyd_nac_hh_s1: + di_MInst_disisi_nac_hh_s1 <"mpy", int_hexagon_M2_mpyd_nac_hh_s1>; + +def HEXAGON_M2_mpyd_nac_hl_s0: + di_MInst_disisi_nac_hl <"mpy", int_hexagon_M2_mpyd_nac_hl_s0>; +def HEXAGON_M2_mpyd_nac_hl_s1: + di_MInst_disisi_nac_hl_s1 <"mpy", int_hexagon_M2_mpyd_nac_hl_s1>; + +def HEXAGON_M2_mpyd_nac_lh_s0: + di_MInst_disisi_nac_lh <"mpy", int_hexagon_M2_mpyd_nac_lh_s0>; +def HEXAGON_M2_mpyd_nac_lh_s1: + di_MInst_disisi_nac_lh_s1 <"mpy", int_hexagon_M2_mpyd_nac_lh_s1>; + +def HEXAGON_M2_mpyd_nac_ll_s0: + di_MInst_disisi_nac_ll <"mpy", int_hexagon_M2_mpyd_nac_ll_s0>; +def HEXAGON_M2_mpyd_nac_ll_s1: + di_MInst_disisi_nac_ll_s1 <"mpy", int_hexagon_M2_mpyd_nac_ll_s1>; + +// MTYPE / MPYS / Scalar 16x16 multiply unsigned. +//Rd=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1] +def HEXAGON_M2_mpyu_hh_s0: + si_MInst_sisi_hh <"mpyu", int_hexagon_M2_mpyu_hh_s0>; +def HEXAGON_M2_mpyu_hh_s1: + si_MInst_sisi_hh_s1 <"mpyu", int_hexagon_M2_mpyu_hh_s1>; +def HEXAGON_M2_mpyu_hl_s0: + si_MInst_sisi_hl <"mpyu", int_hexagon_M2_mpyu_hl_s0>; +def HEXAGON_M2_mpyu_hl_s1: + si_MInst_sisi_hl_s1 <"mpyu", int_hexagon_M2_mpyu_hl_s1>; +def HEXAGON_M2_mpyu_lh_s0: + si_MInst_sisi_lh <"mpyu", int_hexagon_M2_mpyu_lh_s0>; +def HEXAGON_M2_mpyu_lh_s1: + si_MInst_sisi_lh_s1 <"mpyu", int_hexagon_M2_mpyu_lh_s1>; +def HEXAGON_M2_mpyu_ll_s0: + si_MInst_sisi_ll <"mpyu", int_hexagon_M2_mpyu_ll_s0>; +def HEXAGON_M2_mpyu_ll_s1: + si_MInst_sisi_ll_s1 <"mpyu", int_hexagon_M2_mpyu_ll_s1>; + +//Rdd=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1] +def HEXAGON_M2_mpyud_hh_s0: + di_MInst_sisi_hh <"mpyu", int_hexagon_M2_mpyud_hh_s0>; +def HEXAGON_M2_mpyud_hh_s1: + di_MInst_sisi_hh_s1 <"mpyu", int_hexagon_M2_mpyud_hh_s1>; +def HEXAGON_M2_mpyud_hl_s0: + di_MInst_sisi_hl <"mpyu", int_hexagon_M2_mpyud_hl_s0>; +def HEXAGON_M2_mpyud_hl_s1: + di_MInst_sisi_hl_s1 <"mpyu", int_hexagon_M2_mpyud_hl_s1>; +def HEXAGON_M2_mpyud_lh_s0: + di_MInst_sisi_lh <"mpyu", int_hexagon_M2_mpyud_lh_s0>; +def HEXAGON_M2_mpyud_lh_s1: + di_MInst_sisi_lh_s1 <"mpyu", int_hexagon_M2_mpyud_lh_s1>; +def HEXAGON_M2_mpyud_ll_s0: + di_MInst_sisi_ll <"mpyu", int_hexagon_M2_mpyud_ll_s0>; +def HEXAGON_M2_mpyud_ll_s1: + di_MInst_sisi_ll_s1 <"mpyu", int_hexagon_M2_mpyud_ll_s1>; + +//Rd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1] +def HEXAGON_M2_mpyu_acc_hh_s0: + si_MInst_sisisi_acc_hh <"mpyu", int_hexagon_M2_mpyu_acc_hh_s0>; +def HEXAGON_M2_mpyu_acc_hh_s1: + si_MInst_sisisi_acc_hh_s1 <"mpyu", int_hexagon_M2_mpyu_acc_hh_s1>; +def HEXAGON_M2_mpyu_acc_hl_s0: + si_MInst_sisisi_acc_hl <"mpyu", int_hexagon_M2_mpyu_acc_hl_s0>; +def HEXAGON_M2_mpyu_acc_hl_s1: + si_MInst_sisisi_acc_hl_s1 <"mpyu", int_hexagon_M2_mpyu_acc_hl_s1>; +def HEXAGON_M2_mpyu_acc_lh_s0: + si_MInst_sisisi_acc_lh <"mpyu", int_hexagon_M2_mpyu_acc_lh_s0>; +def HEXAGON_M2_mpyu_acc_lh_s1: + si_MInst_sisisi_acc_lh_s1 <"mpyu", int_hexagon_M2_mpyu_acc_lh_s1>; +def HEXAGON_M2_mpyu_acc_ll_s0: + si_MInst_sisisi_acc_ll <"mpyu", int_hexagon_M2_mpyu_acc_ll_s0>; +def HEXAGON_M2_mpyu_acc_ll_s1: + si_MInst_sisisi_acc_ll_s1 <"mpyu", int_hexagon_M2_mpyu_acc_ll_s1>; + +//Rd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1] +def HEXAGON_M2_mpyu_nac_hh_s0: + si_MInst_sisisi_nac_hh <"mpyu", int_hexagon_M2_mpyu_nac_hh_s0>; +def HEXAGON_M2_mpyu_nac_hh_s1: + si_MInst_sisisi_nac_hh_s1 <"mpyu", int_hexagon_M2_mpyu_nac_hh_s1>; +def HEXAGON_M2_mpyu_nac_hl_s0: + si_MInst_sisisi_nac_hl <"mpyu", int_hexagon_M2_mpyu_nac_hl_s0>; +def HEXAGON_M2_mpyu_nac_hl_s1: + si_MInst_sisisi_nac_hl_s1 <"mpyu", int_hexagon_M2_mpyu_nac_hl_s1>; +def HEXAGON_M2_mpyu_nac_lh_s0: + si_MInst_sisisi_nac_lh <"mpyu", int_hexagon_M2_mpyu_nac_lh_s0>; +def HEXAGON_M2_mpyu_nac_lh_s1: + si_MInst_sisisi_nac_lh_s1 <"mpyu", int_hexagon_M2_mpyu_nac_lh_s1>; +def HEXAGON_M2_mpyu_nac_ll_s0: + si_MInst_sisisi_nac_ll <"mpyu", int_hexagon_M2_mpyu_nac_ll_s0>; +def HEXAGON_M2_mpyu_nac_ll_s1: + si_MInst_sisisi_nac_ll_s1 <"mpyu", int_hexagon_M2_mpyu_nac_ll_s1>; + +//Rdd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1] +def HEXAGON_M2_mpyud_acc_hh_s0: + di_MInst_disisi_acc_hh <"mpyu", int_hexagon_M2_mpyud_acc_hh_s0>; +def HEXAGON_M2_mpyud_acc_hh_s1: + di_MInst_disisi_acc_hh_s1 <"mpyu", int_hexagon_M2_mpyud_acc_hh_s1>; +def HEXAGON_M2_mpyud_acc_hl_s0: + di_MInst_disisi_acc_hl <"mpyu", int_hexagon_M2_mpyud_acc_hl_s0>; +def HEXAGON_M2_mpyud_acc_hl_s1: + di_MInst_disisi_acc_hl_s1 <"mpyu", int_hexagon_M2_mpyud_acc_hl_s1>; +def HEXAGON_M2_mpyud_acc_lh_s0: + di_MInst_disisi_acc_lh <"mpyu", int_hexagon_M2_mpyud_acc_lh_s0>; +def HEXAGON_M2_mpyud_acc_lh_s1: + di_MInst_disisi_acc_lh_s1 <"mpyu", int_hexagon_M2_mpyud_acc_lh_s1>; +def HEXAGON_M2_mpyud_acc_ll_s0: + di_MInst_disisi_acc_ll <"mpyu", int_hexagon_M2_mpyud_acc_ll_s0>; +def HEXAGON_M2_mpyud_acc_ll_s1: + di_MInst_disisi_acc_ll_s1 <"mpyu", int_hexagon_M2_mpyud_acc_ll_s1>; + +//Rdd-=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1] +def HEXAGON_M2_mpyud_nac_hh_s0: + di_MInst_disisi_nac_hh <"mpyu", int_hexagon_M2_mpyud_nac_hh_s0>; +def HEXAGON_M2_mpyud_nac_hh_s1: + di_MInst_disisi_nac_hh_s1 <"mpyu", int_hexagon_M2_mpyud_nac_hh_s1>; +def HEXAGON_M2_mpyud_nac_hl_s0: + di_MInst_disisi_nac_hl <"mpyu", int_hexagon_M2_mpyud_nac_hl_s0>; +def HEXAGON_M2_mpyud_nac_hl_s1: + di_MInst_disisi_nac_hl_s1 <"mpyu", int_hexagon_M2_mpyud_nac_hl_s1>; +def HEXAGON_M2_mpyud_nac_lh_s0: + di_MInst_disisi_nac_lh <"mpyu", int_hexagon_M2_mpyud_nac_lh_s0>; +def HEXAGON_M2_mpyud_nac_lh_s1: + di_MInst_disisi_nac_lh_s1 <"mpyu", int_hexagon_M2_mpyud_nac_lh_s1>; +def HEXAGON_M2_mpyud_nac_ll_s0: + di_MInst_disisi_nac_ll <"mpyu", int_hexagon_M2_mpyud_nac_ll_s0>; +def HEXAGON_M2_mpyud_nac_ll_s1: + di_MInst_disisi_nac_ll_s1 <"mpyu", int_hexagon_M2_mpyud_nac_ll_s1>; + + +/******************************************************************** +* MTYPE/VB * +*********************************************************************/ + +// MTYPE / VB / Vector reduce add unsigned bytes. +def HEXAGON_A2_vraddub: + di_MInst_didi <"vraddub", int_hexagon_A2_vraddub>; +def HEXAGON_A2_vraddub_acc: + di_MInst_dididi_acc <"vraddub", int_hexagon_A2_vraddub_acc>; + +// MTYPE / VB / Vector sum of absolute differences unsigned bytes. +def HEXAGON_A2_vrsadub: + di_MInst_didi <"vrsadub", int_hexagon_A2_vrsadub>; +def HEXAGON_A2_vrsadub_acc: + di_MInst_dididi_acc <"vrsadub", int_hexagon_A2_vrsadub_acc>; + +/******************************************************************** +* MTYPE/VH * +*********************************************************************/ + +// MTYPE / VH / Vector dual multiply. +def HEXAGON_M2_vdmpys_s1: + di_MInst_didi_s1_sat <"vdmpy", int_hexagon_M2_vdmpys_s1>; +def HEXAGON_M2_vdmpys_s0: + di_MInst_didi_sat <"vdmpy", int_hexagon_M2_vdmpys_s0>; +def HEXAGON_M2_vdmacs_s1: + di_MInst_dididi_acc_s1_sat <"vdmpy", int_hexagon_M2_vdmacs_s1>; +def HEXAGON_M2_vdmacs_s0: + di_MInst_dididi_acc_sat <"vdmpy", int_hexagon_M2_vdmacs_s0>; + +// MTYPE / VH / Vector dual multiply with round and pack. +def HEXAGON_M2_vdmpyrs_s0: + si_MInst_didi_rnd_sat <"vdmpy", int_hexagon_M2_vdmpyrs_s0>; +def HEXAGON_M2_vdmpyrs_s1: + si_MInst_didi_s1_rnd_sat <"vdmpy", int_hexagon_M2_vdmpyrs_s1>; + +// MTYPE / VH / Vector multiply even halfwords. +def HEXAGON_M2_vmpy2es_s1: + di_MInst_didi_s1_sat <"vmpyeh", int_hexagon_M2_vmpy2es_s1>; +def HEXAGON_M2_vmpy2es_s0: + di_MInst_didi_sat <"vmpyeh", int_hexagon_M2_vmpy2es_s0>; +def HEXAGON_M2_vmac2es: + di_MInst_dididi_acc <"vmpyeh", int_hexagon_M2_vmac2es>; +def HEXAGON_M2_vmac2es_s1: + di_MInst_dididi_acc_s1_sat <"vmpyeh", int_hexagon_M2_vmac2es_s1>; +def HEXAGON_M2_vmac2es_s0: + di_MInst_dididi_acc_sat <"vmpyeh", int_hexagon_M2_vmac2es_s0>; + +// MTYPE / VH / Vector multiply halfwords. +def HEXAGON_M2_vmpy2s_s0: + di_MInst_sisi_sat <"vmpyh", int_hexagon_M2_vmpy2s_s0>; +def HEXAGON_M2_vmpy2s_s1: + di_MInst_sisi_s1_sat <"vmpyh", int_hexagon_M2_vmpy2s_s1>; +def HEXAGON_M2_vmac2: + di_MInst_disisi_acc <"vmpyh", int_hexagon_M2_vmac2>; +def HEXAGON_M2_vmac2s_s0: + di_MInst_disisi_acc_sat <"vmpyh", int_hexagon_M2_vmac2s_s0>; +def HEXAGON_M2_vmac2s_s1: + di_MInst_disisi_acc_s1_sat <"vmpyh", int_hexagon_M2_vmac2s_s1>; + +// MTYPE / VH / Vector multiply halfwords with round and pack. +def HEXAGON_M2_vmpy2s_s0pack: + si_MInst_sisi_rnd_sat <"vmpyh", int_hexagon_M2_vmpy2s_s0pack>; +def HEXAGON_M2_vmpy2s_s1pack: + si_MInst_sisi_s1_rnd_sat <"vmpyh", int_hexagon_M2_vmpy2s_s1pack>; + +// MTYPE / VH / Vector reduce multiply halfwords. +// Rxx32+=vrmpyh(Rss32,Rtt32) +def HEXAGON_M2_vrmpy_s0: + di_MInst_didi <"vrmpyh", int_hexagon_M2_vrmpy_s0>; +def HEXAGON_M2_vrmac_s0: + di_MInst_dididi_acc <"vrmpyh", int_hexagon_M2_vrmac_s0>; + + +/******************************************************************** +* STYPE/ALU * +*********************************************************************/ + +// STYPE / ALU / Absolute value. +def HEXAGON_A2_abs: + si_SInst_si <"abs", int_hexagon_A2_abs>; +def HEXAGON_A2_absp: + di_SInst_di <"abs", int_hexagon_A2_absp>; +def HEXAGON_A2_abssat: + si_SInst_si_sat <"abs", int_hexagon_A2_abssat>; + +// STYPE / ALU / Negate. +def HEXAGON_A2_negp: + di_SInst_di <"neg", int_hexagon_A2_negp>; +def HEXAGON_A2_negsat: + si_SInst_si_sat <"neg", int_hexagon_A2_negsat>; + +// STYPE / ALU / Logical Not. +def HEXAGON_A2_notp: + di_SInst_di <"not", int_hexagon_A2_notp>; + +// STYPE / ALU / Sign extend word to doubleword. +def HEXAGON_A2_sxtw: + di_SInst_si <"sxtw", int_hexagon_A2_sxtw>; + + +/******************************************************************** +* STYPE/BIT * +*********************************************************************/ + +// STYPE / BIT / Count leading. +def HEXAGON_S2_cl0: + si_SInst_si <"cl0", int_hexagon_S2_cl0>; +def HEXAGON_S2_cl0p: + si_SInst_di <"cl0", int_hexagon_S2_cl0p>; +def HEXAGON_S2_cl1: + si_SInst_si <"cl1", int_hexagon_S2_cl1>; +def HEXAGON_S2_cl1p: + si_SInst_di <"cl1", int_hexagon_S2_cl1p>; +def HEXAGON_S2_clb: + si_SInst_si <"clb", int_hexagon_S2_clb>; +def HEXAGON_S2_clbp: + si_SInst_di <"clb", int_hexagon_S2_clbp>; +def HEXAGON_S2_clbnorm: + si_SInst_si <"normamt", int_hexagon_S2_clbnorm>; + +// STYPE / BIT / Count trailing. +def HEXAGON_S2_ct0: + si_SInst_si <"ct0", int_hexagon_S2_ct0>; +def HEXAGON_S2_ct1: + si_SInst_si <"ct1", int_hexagon_S2_ct1>; + +// STYPE / BIT / Compare bit mask. +def Hexagon_C2_bitsclr: + qi_SInst_sisi <"bitsclr", int_hexagon_C2_bitsclr>; +def Hexagon_C2_bitsclri: + qi_SInst_siu6 <"bitsclr", int_hexagon_C2_bitsclri>; +def Hexagon_C2_bitsset: + qi_SInst_sisi <"bitsset", int_hexagon_C2_bitsset>; + +// STYPE / BIT / Extract unsigned. +// Rd[d][32/64]=extractu(Rs[s],Rt[t],[imm]) +def HEXAGON_S2_extractu: + si_SInst_siu5u5 <"extractu",int_hexagon_S2_extractu>; +def HEXAGON_S2_extractu_rp: + si_SInst_sidi <"extractu",int_hexagon_S2_extractu_rp>; +def HEXAGON_S2_extractup: + di_SInst_diu6u6 <"extractu",int_hexagon_S2_extractup>; +def HEXAGON_S2_extractup_rp: + di_SInst_didi <"extractu",int_hexagon_S2_extractup_rp>; + +// STYPE / BIT / Insert bitfield. +def Hexagon_S2_insert: + si_SInst_sisiu5u5 <"insert", int_hexagon_S2_insert>; +def Hexagon_S2_insert_rp: + si_SInst_sisidi <"insert", int_hexagon_S2_insert_rp>; +def Hexagon_S2_insertp: + di_SInst_didiu6u6 <"insert", int_hexagon_S2_insertp>; +def Hexagon_S2_insertp_rp: + di_SInst_dididi <"insert", int_hexagon_S2_insertp_rp>; + +// STYPE / BIT / Innterleave/deinterleave. +def Hexagon_S2_interleave: + di_SInst_di <"interleave", int_hexagon_S2_interleave>; +def Hexagon_S2_deinterleave: + di_SInst_di <"deinterleave", int_hexagon_S2_deinterleave>; + +// STYPE / BIT / Linear feedback-shift Iteration. +def Hexagon_S2_lfsp: + di_SInst_didi <"lfs", int_hexagon_S2_lfsp>; + +// STYPE / BIT / Bit reverse. +def Hexagon_S2_brev: + si_SInst_si <"brev", int_hexagon_S2_brev>; + +// STYPE / BIT / Set/Clear/Toggle Bit. +def HEXAGON_S2_setbit_i: + si_SInst_siu5 <"setbit", int_hexagon_S2_setbit_i>; +def HEXAGON_S2_togglebit_i: + si_SInst_siu5 <"togglebit", int_hexagon_S2_togglebit_i>; +def HEXAGON_S2_clrbit_i: + si_SInst_siu5 <"clrbit", int_hexagon_S2_clrbit_i>; +def HEXAGON_S2_setbit_r: + si_SInst_sisi <"setbit", int_hexagon_S2_setbit_r>; +def HEXAGON_S2_togglebit_r: + si_SInst_sisi <"togglebit", int_hexagon_S2_togglebit_r>; +def HEXAGON_S2_clrbit_r: + si_SInst_sisi <"clrbit", int_hexagon_S2_clrbit_r>; + +// STYPE / BIT / Test Bit. +def HEXAGON_S2_tstbit_i: + qi_SInst_siu5 <"tstbit", int_hexagon_S2_tstbit_i>; +def HEXAGON_S2_tstbit_r: + qi_SInst_sisi <"tstbit", int_hexagon_S2_tstbit_r>; + + +/******************************************************************** +* STYPE/COMPLEX * +*********************************************************************/ + +// STYPE / COMPLEX / Vector Complex conjugate. +def HEXAGON_A2_vconj: + di_SInst_di_sat <"vconj", int_hexagon_A2_vconj>; + +// STYPE / COMPLEX / Vector Complex rotate. +def HEXAGON_S2_vcrotate: + di_SInst_disi <"vcrotate",int_hexagon_S2_vcrotate>; + + +/******************************************************************** +* STYPE/PERM * +*********************************************************************/ + +// STYPE / PERM / Saturate. +def HEXAGON_A2_sat: + si_SInst_di <"sat", int_hexagon_A2_sat>; +def HEXAGON_A2_satb: + si_SInst_si <"satb", int_hexagon_A2_satb>; +def HEXAGON_A2_sath: + si_SInst_si <"sath", int_hexagon_A2_sath>; +def HEXAGON_A2_satub: + si_SInst_si <"satub", int_hexagon_A2_satub>; +def HEXAGON_A2_satuh: + si_SInst_si <"satuh", int_hexagon_A2_satuh>; + +// STYPE / PERM / Swizzle bytes. +def HEXAGON_A2_swiz: + si_SInst_si <"swiz", int_hexagon_A2_swiz>; + +// STYPE / PERM / Vector align. +// Need custom lowering +def HEXAGON_S2_valignib: + di_SInst_didiu3 <"valignb", int_hexagon_S2_valignib>; +def HEXAGON_S2_valignrb: + di_SInst_didiqi <"valignb", int_hexagon_S2_valignrb>; + +// STYPE / PERM / Vector round and pack. +def HEXAGON_S2_vrndpackwh: + si_SInst_di <"vrndwh", int_hexagon_S2_vrndpackwh>; +def HEXAGON_S2_vrndpackwhs: + si_SInst_di_sat <"vrndwh", int_hexagon_S2_vrndpackwhs>; + +// STYPE / PERM / Vector saturate and pack. +def HEXAGON_S2_svsathb: + si_SInst_si <"vsathb", int_hexagon_S2_svsathb>; +def HEXAGON_S2_vsathb: + si_SInst_di <"vsathb", int_hexagon_S2_vsathb>; +def HEXAGON_S2_svsathub: + si_SInst_si <"vsathub", int_hexagon_S2_svsathub>; +def HEXAGON_S2_vsathub: + si_SInst_di <"vsathub", int_hexagon_S2_vsathub>; +def HEXAGON_S2_vsatwh: + si_SInst_di <"vsatwh", int_hexagon_S2_vsatwh>; +def HEXAGON_S2_vsatwuh: + si_SInst_di <"vsatwuh", int_hexagon_S2_vsatwuh>; + +// STYPE / PERM / Vector saturate without pack. +def HEXAGON_S2_vsathb_nopack: + di_SInst_di <"vsathb", int_hexagon_S2_vsathb_nopack>; +def HEXAGON_S2_vsathub_nopack: + di_SInst_di <"vsathub", int_hexagon_S2_vsathub_nopack>; +def HEXAGON_S2_vsatwh_nopack: + di_SInst_di <"vsatwh", int_hexagon_S2_vsatwh_nopack>; +def HEXAGON_S2_vsatwuh_nopack: + di_SInst_di <"vsatwuh", int_hexagon_S2_vsatwuh_nopack>; + +// STYPE / PERM / Vector shuffle. +def HEXAGON_S2_shuffeb: + di_SInst_didi <"shuffeb", int_hexagon_S2_shuffeb>; +def HEXAGON_S2_shuffeh: + di_SInst_didi <"shuffeh", int_hexagon_S2_shuffeh>; +def HEXAGON_S2_shuffob: + di_SInst_didi <"shuffob", int_hexagon_S2_shuffob>; +def HEXAGON_S2_shuffoh: + di_SInst_didi <"shuffoh", int_hexagon_S2_shuffoh>; + +// STYPE / PERM / Vector splat bytes. +def HEXAGON_S2_vsplatrb: + si_SInst_si <"vsplatb", int_hexagon_S2_vsplatrb>; + +// STYPE / PERM / Vector splat halfwords. +def HEXAGON_S2_vsplatrh: + di_SInst_si <"vsplath", int_hexagon_S2_vsplatrh>; + +// STYPE / PERM / Vector splice. +def Hexagon_S2_vsplicerb: + di_SInst_didiqi <"vspliceb",int_hexagon_S2_vsplicerb>; +def Hexagon_S2_vspliceib: + di_SInst_didiu3 <"vspliceb",int_hexagon_S2_vspliceib>; + +// STYPE / PERM / Sign extend. +def HEXAGON_S2_vsxtbh: + di_SInst_si <"vsxtbh", int_hexagon_S2_vsxtbh>; +def HEXAGON_S2_vsxthw: + di_SInst_si <"vsxthw", int_hexagon_S2_vsxthw>; + +// STYPE / PERM / Truncate. +def HEXAGON_S2_vtrunehb: + si_SInst_di <"vtrunehb",int_hexagon_S2_vtrunehb>; +def HEXAGON_S2_vtrunohb: + si_SInst_di <"vtrunohb",int_hexagon_S2_vtrunohb>; +def HEXAGON_S2_vtrunewh: + di_SInst_didi <"vtrunewh",int_hexagon_S2_vtrunewh>; +def HEXAGON_S2_vtrunowh: + di_SInst_didi <"vtrunowh",int_hexagon_S2_vtrunowh>; + +// STYPE / PERM / Zero extend. +def HEXAGON_S2_vzxtbh: + di_SInst_si <"vzxtbh", int_hexagon_S2_vzxtbh>; +def HEXAGON_S2_vzxthw: + di_SInst_si <"vzxthw", int_hexagon_S2_vzxthw>; + + +/******************************************************************** +* STYPE/PRED * +*********************************************************************/ + +// STYPE / PRED / Mask generate from predicate. +def HEXAGON_C2_mask: + di_SInst_qi <"mask", int_hexagon_C2_mask>; + +// STYPE / PRED / Predicate transfer. +def HEXAGON_C2_tfrpr: + si_SInst_qi <"", int_hexagon_C2_tfrpr>; +def HEXAGON_C2_tfrrp: + qi_SInst_si <"", int_hexagon_C2_tfrrp>; + +// STYPE / PRED / Viterbi pack even and odd predicate bits. +def HEXAGON_C2_vitpack: + si_SInst_qiqi <"vitpack",int_hexagon_C2_vitpack>; + + +/******************************************************************** +* STYPE/SHIFT * +*********************************************************************/ + +// STYPE / SHIFT / Shift by immediate. +def HEXAGON_S2_asl_i_r: + si_SInst_siu5 <"asl", int_hexagon_S2_asl_i_r>; +def HEXAGON_S2_asr_i_r: + si_SInst_siu5 <"asr", int_hexagon_S2_asr_i_r>; +def HEXAGON_S2_lsr_i_r: + si_SInst_siu5 <"lsr", int_hexagon_S2_lsr_i_r>; +def HEXAGON_S2_asl_i_p: + di_SInst_diu6 <"asl", int_hexagon_S2_asl_i_p>; +def HEXAGON_S2_asr_i_p: + di_SInst_diu6 <"asr", int_hexagon_S2_asr_i_p>; +def HEXAGON_S2_lsr_i_p: + di_SInst_diu6 <"lsr", int_hexagon_S2_lsr_i_p>; + +// STYPE / SHIFT / Shift by immediate and accumulate. +def HEXAGON_S2_asl_i_r_acc: + si_SInst_sisiu5_acc <"asl", int_hexagon_S2_asl_i_r_acc>; +def HEXAGON_S2_asr_i_r_acc: + si_SInst_sisiu5_acc <"asr", int_hexagon_S2_asr_i_r_acc>; +def HEXAGON_S2_lsr_i_r_acc: + si_SInst_sisiu5_acc <"lsr", int_hexagon_S2_lsr_i_r_acc>; +def HEXAGON_S2_asl_i_r_nac: + si_SInst_sisiu5_nac <"asl", int_hexagon_S2_asl_i_r_nac>; +def HEXAGON_S2_asr_i_r_nac: + si_SInst_sisiu5_nac <"asr", int_hexagon_S2_asr_i_r_nac>; +def HEXAGON_S2_lsr_i_r_nac: + si_SInst_sisiu5_nac <"lsr", int_hexagon_S2_lsr_i_r_nac>; +def HEXAGON_S2_asl_i_p_acc: + di_SInst_didiu6_acc <"asl", int_hexagon_S2_asl_i_p_acc>; +def HEXAGON_S2_asr_i_p_acc: + di_SInst_didiu6_acc <"asr", int_hexagon_S2_asr_i_p_acc>; +def HEXAGON_S2_lsr_i_p_acc: + di_SInst_didiu6_acc <"lsr", int_hexagon_S2_lsr_i_p_acc>; +def HEXAGON_S2_asl_i_p_nac: + di_SInst_didiu6_nac <"asl", int_hexagon_S2_asl_i_p_nac>; +def HEXAGON_S2_asr_i_p_nac: + di_SInst_didiu6_nac <"asr", int_hexagon_S2_asr_i_p_nac>; +def HEXAGON_S2_lsr_i_p_nac: + di_SInst_didiu6_nac <"lsr", int_hexagon_S2_lsr_i_p_nac>; + +// STYPE / SHIFT / Shift by immediate and add. +def HEXAGON_S2_addasl_rrri: + si_SInst_sisiu3 <"addasl", int_hexagon_S2_addasl_rrri>; + +// STYPE / SHIFT / Shift by immediate and logical. +def HEXAGON_S2_asl_i_r_and: + si_SInst_sisiu5_and <"asl", int_hexagon_S2_asl_i_r_and>; +def HEXAGON_S2_asr_i_r_and: + si_SInst_sisiu5_and <"asr", int_hexagon_S2_asr_i_r_and>; +def HEXAGON_S2_lsr_i_r_and: + si_SInst_sisiu5_and <"lsr", int_hexagon_S2_lsr_i_r_and>; + +def HEXAGON_S2_asl_i_r_xacc: + si_SInst_sisiu5_xor <"asl", int_hexagon_S2_asl_i_r_xacc>; +def HEXAGON_S2_lsr_i_r_xacc: + si_SInst_sisiu5_xor <"lsr", int_hexagon_S2_lsr_i_r_xacc>; + +def HEXAGON_S2_asl_i_r_or: + si_SInst_sisiu5_or <"asl", int_hexagon_S2_asl_i_r_or>; +def HEXAGON_S2_asr_i_r_or: + si_SInst_sisiu5_or <"asr", int_hexagon_S2_asr_i_r_or>; +def HEXAGON_S2_lsr_i_r_or: + si_SInst_sisiu5_or <"lsr", int_hexagon_S2_lsr_i_r_or>; + +def HEXAGON_S2_asl_i_p_and: + di_SInst_didiu6_and <"asl", int_hexagon_S2_asl_i_p_and>; +def HEXAGON_S2_asr_i_p_and: + di_SInst_didiu6_and <"asr", int_hexagon_S2_asr_i_p_and>; +def HEXAGON_S2_lsr_i_p_and: + di_SInst_didiu6_and <"lsr", int_hexagon_S2_lsr_i_p_and>; + +def HEXAGON_S2_asl_i_p_xacc: + di_SInst_didiu6_xor <"asl", int_hexagon_S2_asl_i_p_xacc>; +def HEXAGON_S2_lsr_i_p_xacc: + di_SInst_didiu6_xor <"lsr", int_hexagon_S2_lsr_i_p_xacc>; + +def HEXAGON_S2_asl_i_p_or: + di_SInst_didiu6_or <"asl", int_hexagon_S2_asl_i_p_or>; +def HEXAGON_S2_asr_i_p_or: + di_SInst_didiu6_or <"asr", int_hexagon_S2_asr_i_p_or>; +def HEXAGON_S2_lsr_i_p_or: + di_SInst_didiu6_or <"lsr", int_hexagon_S2_lsr_i_p_or>; + +// STYPE / SHIFT / Shift right by immediate with rounding. +def HEXAGON_S2_asr_i_r_rnd: + si_SInst_siu5_rnd <"asr", int_hexagon_S2_asr_i_r_rnd>; +def HEXAGON_S2_asr_i_r_rnd_goodsyntax: + si_SInst_siu5 <"asrrnd", int_hexagon_S2_asr_i_r_rnd_goodsyntax>; + +// STYPE / SHIFT / Shift left by immediate with saturation. +def HEXAGON_S2_asl_i_r_sat: + si_SInst_sisi_sat <"asl", int_hexagon_S2_asl_i_r_sat>; + +// STYPE / SHIFT / Shift by register. +def HEXAGON_S2_asl_r_r: + si_SInst_sisi <"asl", int_hexagon_S2_asl_r_r>; +def HEXAGON_S2_asr_r_r: + si_SInst_sisi <"asr", int_hexagon_S2_asr_r_r>; +def HEXAGON_S2_lsl_r_r: + si_SInst_sisi <"lsl", int_hexagon_S2_lsl_r_r>; +def HEXAGON_S2_lsr_r_r: + si_SInst_sisi <"lsr", int_hexagon_S2_lsr_r_r>; +def HEXAGON_S2_asl_r_p: + di_SInst_disi <"asl", int_hexagon_S2_asl_r_p>; +def HEXAGON_S2_asr_r_p: + di_SInst_disi <"asr", int_hexagon_S2_asr_r_p>; +def HEXAGON_S2_lsl_r_p: + di_SInst_disi <"lsl", int_hexagon_S2_lsl_r_p>; +def HEXAGON_S2_lsr_r_p: + di_SInst_disi <"lsr", int_hexagon_S2_lsr_r_p>; + +// STYPE / SHIFT / Shift by register and accumulate. +def HEXAGON_S2_asl_r_r_acc: + si_SInst_sisisi_acc <"asl", int_hexagon_S2_asl_r_r_acc>; +def HEXAGON_S2_asr_r_r_acc: + si_SInst_sisisi_acc <"asr", int_hexagon_S2_asr_r_r_acc>; +def HEXAGON_S2_lsl_r_r_acc: + si_SInst_sisisi_acc <"lsl", int_hexagon_S2_lsl_r_r_acc>; +def HEXAGON_S2_lsr_r_r_acc: + si_SInst_sisisi_acc <"lsr", int_hexagon_S2_lsr_r_r_acc>; +def HEXAGON_S2_asl_r_p_acc: + di_SInst_didisi_acc <"asl", int_hexagon_S2_asl_r_p_acc>; +def HEXAGON_S2_asr_r_p_acc: + di_SInst_didisi_acc <"asr", int_hexagon_S2_asr_r_p_acc>; +def HEXAGON_S2_lsl_r_p_acc: + di_SInst_didisi_acc <"lsl", int_hexagon_S2_lsl_r_p_acc>; +def HEXAGON_S2_lsr_r_p_acc: + di_SInst_didisi_acc <"lsr", int_hexagon_S2_lsr_r_p_acc>; + +def HEXAGON_S2_asl_r_r_nac: + si_SInst_sisisi_nac <"asl", int_hexagon_S2_asl_r_r_nac>; +def HEXAGON_S2_asr_r_r_nac: + si_SInst_sisisi_nac <"asr", int_hexagon_S2_asr_r_r_nac>; +def HEXAGON_S2_lsl_r_r_nac: + si_SInst_sisisi_nac <"lsl", int_hexagon_S2_lsl_r_r_nac>; +def HEXAGON_S2_lsr_r_r_nac: + si_SInst_sisisi_nac <"lsr", int_hexagon_S2_lsr_r_r_nac>; +def HEXAGON_S2_asl_r_p_nac: + di_SInst_didisi_nac <"asl", int_hexagon_S2_asl_r_p_nac>; +def HEXAGON_S2_asr_r_p_nac: + di_SInst_didisi_nac <"asr", int_hexagon_S2_asr_r_p_nac>; +def HEXAGON_S2_lsl_r_p_nac: + di_SInst_didisi_nac <"lsl", int_hexagon_S2_lsl_r_p_nac>; +def HEXAGON_S2_lsr_r_p_nac: + di_SInst_didisi_nac <"lsr", int_hexagon_S2_lsr_r_p_nac>; + +// STYPE / SHIFT / Shift by register and logical. +def HEXAGON_S2_asl_r_r_and: + si_SInst_sisisi_and <"asl", int_hexagon_S2_asl_r_r_and>; +def HEXAGON_S2_asr_r_r_and: + si_SInst_sisisi_and <"asr", int_hexagon_S2_asr_r_r_and>; +def HEXAGON_S2_lsl_r_r_and: + si_SInst_sisisi_and <"lsl", int_hexagon_S2_lsl_r_r_and>; +def HEXAGON_S2_lsr_r_r_and: + si_SInst_sisisi_and <"lsr", int_hexagon_S2_lsr_r_r_and>; + +def HEXAGON_S2_asl_r_r_or: + si_SInst_sisisi_or <"asl", int_hexagon_S2_asl_r_r_or>; +def HEXAGON_S2_asr_r_r_or: + si_SInst_sisisi_or <"asr", int_hexagon_S2_asr_r_r_or>; +def HEXAGON_S2_lsl_r_r_or: + si_SInst_sisisi_or <"lsl", int_hexagon_S2_lsl_r_r_or>; +def HEXAGON_S2_lsr_r_r_or: + si_SInst_sisisi_or <"lsr", int_hexagon_S2_lsr_r_r_or>; + +def HEXAGON_S2_asl_r_p_and: + di_SInst_didisi_and <"asl", int_hexagon_S2_asl_r_p_and>; +def HEXAGON_S2_asr_r_p_and: + di_SInst_didisi_and <"asr", int_hexagon_S2_asr_r_p_and>; +def HEXAGON_S2_lsl_r_p_and: + di_SInst_didisi_and <"lsl", int_hexagon_S2_lsl_r_p_and>; +def HEXAGON_S2_lsr_r_p_and: + di_SInst_didisi_and <"lsr", int_hexagon_S2_lsr_r_p_and>; + +def HEXAGON_S2_asl_r_p_or: + di_SInst_didisi_or <"asl", int_hexagon_S2_asl_r_p_or>; +def HEXAGON_S2_asr_r_p_or: + di_SInst_didisi_or <"asr", int_hexagon_S2_asr_r_p_or>; +def HEXAGON_S2_lsl_r_p_or: + di_SInst_didisi_or <"lsl", int_hexagon_S2_lsl_r_p_or>; +def HEXAGON_S2_lsr_r_p_or: + di_SInst_didisi_or <"lsr", int_hexagon_S2_lsr_r_p_or>; + +// STYPE / SHIFT / Shift by register with saturation. +def HEXAGON_S2_asl_r_r_sat: + si_SInst_sisi_sat <"asl", int_hexagon_S2_asl_r_r_sat>; +def HEXAGON_S2_asr_r_r_sat: + si_SInst_sisi_sat <"asr", int_hexagon_S2_asr_r_r_sat>; + +// STYPE / SHIFT / Table Index. +def Hexagon_S2_tableidxb_goodsyntax: + si_MInst_sisiu4u5 <"tableidxb",int_hexagon_S2_tableidxb_goodsyntax>; +def Hexagon_S2_tableidxd_goodsyntax: + si_MInst_sisiu4u5 <"tableidxd",int_hexagon_S2_tableidxd_goodsyntax>; +def Hexagon_S2_tableidxh_goodsyntax: + si_MInst_sisiu4u5 <"tableidxh",int_hexagon_S2_tableidxh_goodsyntax>; +def Hexagon_S2_tableidxw_goodsyntax: + si_MInst_sisiu4u5 <"tableidxw",int_hexagon_S2_tableidxw_goodsyntax>; + + +/******************************************************************** +* STYPE/VH * +*********************************************************************/ + +// STYPE / VH / Vector absolute value halfwords. +// Rdd64=vabsh(Rss64) +def HEXAGON_A2_vabsh: + di_SInst_di <"vabsh", int_hexagon_A2_vabsh>; +def HEXAGON_A2_vabshsat: + di_SInst_di_sat <"vabsh", int_hexagon_A2_vabshsat>; + +// STYPE / VH / Vector shift halfwords by immediate. +// Rdd64=v[asl/asr/lsr]h(Rss64,Rt32) +def HEXAGON_S2_asl_i_vh: + di_SInst_disi <"vaslh", int_hexagon_S2_asl_i_vh>; +def HEXAGON_S2_asr_i_vh: + di_SInst_disi <"vasrh", int_hexagon_S2_asr_i_vh>; +def HEXAGON_S2_lsr_i_vh: + di_SInst_disi <"vlsrh", int_hexagon_S2_lsr_i_vh>; + +// STYPE / VH / Vector shift halfwords by register. +// Rdd64=v[asl/asr/lsl/lsr]w(Rss64,Rt32) +def HEXAGON_S2_asl_r_vh: + di_SInst_disi <"vaslh", int_hexagon_S2_asl_r_vh>; +def HEXAGON_S2_asr_r_vh: + di_SInst_disi <"vasrh", int_hexagon_S2_asr_r_vh>; +def HEXAGON_S2_lsl_r_vh: + di_SInst_disi <"vlslh", int_hexagon_S2_lsl_r_vh>; +def HEXAGON_S2_lsr_r_vh: + di_SInst_disi <"vlsrh", int_hexagon_S2_lsr_r_vh>; + + +/******************************************************************** +* STYPE/VW * +*********************************************************************/ + +// STYPE / VW / Vector absolute value words. +def HEXAGON_A2_vabsw: + di_SInst_di <"vabsw", int_hexagon_A2_vabsw>; +def HEXAGON_A2_vabswsat: + di_SInst_di_sat <"vabsw", int_hexagon_A2_vabswsat>; + +// STYPE / VW / Vector shift words by immediate. +// Rdd64=v[asl/vsl]w(Rss64,Rt32) +def HEXAGON_S2_asl_i_vw: + di_SInst_disi <"vaslw", int_hexagon_S2_asl_i_vw>; +def HEXAGON_S2_asr_i_vw: + di_SInst_disi <"vasrw", int_hexagon_S2_asr_i_vw>; +def HEXAGON_S2_lsr_i_vw: + di_SInst_disi <"vlsrw", int_hexagon_S2_lsr_i_vw>; + +// STYPE / VW / Vector shift words by register. +// Rdd64=v[asl/vsl]w(Rss64,Rt32) +def HEXAGON_S2_asl_r_vw: + di_SInst_disi <"vaslw", int_hexagon_S2_asl_r_vw>; +def HEXAGON_S2_asr_r_vw: + di_SInst_disi <"vasrw", int_hexagon_S2_asr_r_vw>; +def HEXAGON_S2_lsl_r_vw: + di_SInst_disi <"vlslw", int_hexagon_S2_lsl_r_vw>; +def HEXAGON_S2_lsr_r_vw: + di_SInst_disi <"vlsrw", int_hexagon_S2_lsr_r_vw>; + +// STYPE / VW / Vector shift words with truncate and pack. +def HEXAGON_S2_asr_r_svw_trun: + si_SInst_disi <"vasrw", int_hexagon_S2_asr_r_svw_trun>; +def HEXAGON_S2_asr_i_svw_trun: + si_SInst_diu5 <"vasrw", int_hexagon_S2_asr_i_svw_trun>; + +// LD / Circular loads. +def HEXAGON_circ_ldd: + di_LDInstPI_diu4 <"circ_ldd", int_hexagon_circ_ldd>; + +include "HexagonIntrinsicsV3.td" +include "HexagonIntrinsicsV4.td" +include "HexagonIntrinsicsV5.td" diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsDerived.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsDerived.td new file mode 100644 index 000000000000..2788101d5a66 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsDerived.td @@ -0,0 +1,39 @@ +//===-- HexagonIntrinsicsDerived.td - Derived intrinsics ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Multiply 64-bit and use lower result +// +// Optimized with intrinisics accumulates +// +def : Pat <(mul DoubleRegs:$src1, DoubleRegs:$src2), + (i64 + (COMBINE_rr + (HEXAGON_M2_maci + (HEXAGON_M2_maci + (i32 + (EXTRACT_SUBREG + (i64 + (MPYU64 (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), + subreg_loreg)), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), + subreg_loreg)))), + subreg_hireg)), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg)), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg))), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_loreg)), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg))), + (i32 + (EXTRACT_SUBREG + (i64 + (MPYU64 (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg)), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), + subreg_loreg)))), subreg_loreg))))>; + + + diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV3.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV3.td new file mode 100644 index 000000000000..2a54e62d20ae --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV3.td @@ -0,0 +1,50 @@ +//=- HexagonIntrinsicsV3.td - Target Description for Hexagon -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hexagon V3 Compiler Intrinsics in TableGen format. +// +//===----------------------------------------------------------------------===// + + + + +// MTYPE / COMPLEX / Vector reduce complex multiply real or imaginary. +def Hexagon_M2_vrcmpys_s1: + di_MInst_disi_s1_sat <"vrcmpys", int_hexagon_M2_vrcmpys_s1>; +def Hexagon_M2_vrcmpys_acc_s1: + di_MInst_didisi_acc_s1_sat <"vrcmpys", int_hexagon_M2_vrcmpys_acc_s1>; +def Hexagon_M2_vrcmpys_s1rp: + si_MInst_disi_s1_rnd_sat <"vrcmpys", int_hexagon_M2_vrcmpys_s1rp>; + + + + +/******************************************************************** +* MTYPE/VB * +*********************************************************************/ + +// MTYPE / VB / Vector reduce add unsigned bytes. +def Hexagon_M2_vradduh: + si_MInst_didi <"vradduh", int_hexagon_M2_vradduh>; + + +/******************************************************************** +* ALU64/ALU * +*********************************************************************/ + +// ALU64 / ALU / Add. +def Hexagon_A2_addsp: + di_ALU64_sidi <"add", int_hexagon_A2_addsp>; +def Hexagon_A2_addpsat: + di_ALU64_didi <"add", int_hexagon_A2_addpsat>; + +def Hexagon_A2_maxp: + di_ALU64_didi <"max", int_hexagon_A2_maxp>; +def Hexagon_A2_maxup: + di_ALU64_didi <"maxu", int_hexagon_A2_maxup>; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td new file mode 100644 index 000000000000..dd28ebb57231 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td @@ -0,0 +1,369 @@ +//===- HexagonIntrinsicsV4.td - V4 Instruction intrinsics --*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This is populated based on the following specs: +// Hexagon V4 Architecture Extensions +// Application-Level Specification +// 80-V9418-12 Rev. A +// June 15, 2010 + + +// +// ALU 32 types. +// + +class si_ALU32_sisi_not<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, ~$src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_ALU32_s8si<string opc, Intrinsic IntID> + : ALU32_rr<(outs DoubleRegs:$dst), (ins s8Imm:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "(#$src1, $src2)")), + [(set DoubleRegs:$dst, (IntID imm:$src1, IntRegs:$src2))]>; + +class di_ALU32_sis8<string opc, Intrinsic IntID> + : ALU32_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class qi_neg_ALU32_sisi<string opc, Intrinsic IntID> + : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = !", !strconcat(opc , "($src1, $src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class qi_neg_ALU32_sis10<string opc, Intrinsic IntID> + : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2), + !strconcat("$dst = !", !strconcat(opc , "($src1, #$src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class qi_neg_ALU32_siu9<string opc, Intrinsic IntID> + : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, u9Imm:$src2), + !strconcat("$dst = !", !strconcat(opc , "($src1, #$src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class si_neg_ALU32_sisi<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = !", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_neg_ALU32_sis8<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2), + !strconcat("$dst = !", !strconcat(opc , "($src1, #$src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class si_ALU32_sis8<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + + +// +// SInst Classes. +// +class qi_neg_SInst_qiqi<string opc, Intrinsic IntID> + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = !", !strconcat(opc , "($src1, $src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class qi_SInst_qi_andqiqi_neg<string opc, Intrinsic IntID> + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst = ", !strconcat(opc , + "($src1, and($src2, !$src3)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, + IntRegs:$src3))]>; + +class qi_SInst_qi_andqiqi<string opc, Intrinsic IntID> + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst = ", !strconcat(opc , + "($src1, and($src2, $src3)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, + IntRegs:$src3))]>; + +class qi_SInst_qi_orqiqi_neg<string opc, Intrinsic IntID> + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst = ", !strconcat(opc , + "($src1, or($src2, !$src3)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, + IntRegs:$src3))]>; + +class qi_SInst_qi_orqiqi<string opc, Intrinsic IntID> + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst = ", !strconcat(opc , + "($src1, or($src2, $src3)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, + IntRegs:$src3))]>; + +class si_SInst_si_addsis6<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, s6Imm:$src3), + !strconcat("$dst = ", !strconcat(opc , + "($src1, add($src2, #$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, + imm:$src3))]>; + +class si_SInst_si_subs6si<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3), + !strconcat("$dst = ", !strconcat(opc , + "($src1, sub(#$src2, $src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2, + IntRegs:$src3))]>; + +class di_ALU64_didi_neg<string opc, Intrinsic IntID> + : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, ~$src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; + +class di_MInst_dididi_xacc<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + DoubleRegs:$src2), + !strconcat("$dst ^= ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, + DoubleRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_and<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst &= ", !strconcat(opc , "($src2, $src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2, + IntRegs:$src3))]>; + +class si_MInst_sisisi_andn<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst &= ", !strconcat(opc , "($src2, ~$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2, + IntRegs:$src3))]>; + +class si_SInst_sisis10_andi<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, s10Imm:$src3), + !strconcat("$dst = ", !strconcat(opc , + "($src1, and($src2, #$src3))")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, + imm:$src3))]>; + +class si_MInst_sisisi_xor<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst ^= ", !strconcat(opc , "($src2, $src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2, + IntRegs:$src3))]>; + +class si_MInst_sisisi_xorn<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst ^= ", !strconcat(opc , "($src2, ~$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2, + IntRegs:$src3))]>; + +class si_SInst_sisis10_or<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, s10Imm:$src3), + !strconcat("$dst |= ", !strconcat(opc , "($src2, #$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2, + imm:$src3))]>; + +class si_MInst_sisisi_or<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst |= ", !strconcat(opc , "($src2, $src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2, + IntRegs:$src3))]>; + +class si_MInst_sisisi_orn<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst |= ", !strconcat(opc , "($src2, ~$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2, + IntRegs:$src3))]>; + +class si_SInst_siu5_sat<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + + +/******************************************************************** +* ALU32/ALU * +*********************************************************************/ + +// ALU32 / ALU / Logical Operations. +def Hexagon_A4_orn : si_ALU32_sisi_not <"or", int_hexagon_A4_orn>; +def Hexagon_A4_andn : si_ALU32_sisi_not <"and", int_hexagon_A4_andn>; + + +/******************************************************************** +* ALU32/PERM * +*********************************************************************/ + +// ALU32 / PERM / Combine Words Into Doublewords. +def Hexagon_A4_combineir : di_ALU32_s8si <"combine", int_hexagon_A4_combineir>; +def Hexagon_A4_combineri : di_ALU32_sis8 <"combine", int_hexagon_A4_combineri>; + + +/******************************************************************** +* ALU32/PRED * +*********************************************************************/ + +// ALU32 / PRED / Conditional Shift Halfword. +// ALU32 / PRED / Conditional Sign Extend. +// ALU32 / PRED / Conditional Zero Extend. +// ALU32 / PRED / Compare. +def Hexagon_C4_cmpneq : qi_neg_ALU32_sisi <"cmp.eq", int_hexagon_C4_cmpneq>; +def Hexagon_C4_cmpneqi : qi_neg_ALU32_sis10 <"cmp.eq", int_hexagon_C4_cmpneqi>; +def Hexagon_C4_cmplte : qi_neg_ALU32_sisi <"cmp.gt", int_hexagon_C4_cmplte>; +def Hexagon_C4_cmpltei : qi_neg_ALU32_sis10 <"cmp.gt", int_hexagon_C4_cmpltei>; +def Hexagon_C4_cmplteu : qi_neg_ALU32_sisi <"cmp.gtu",int_hexagon_C4_cmplteu>; +def Hexagon_C4_cmplteui: qi_neg_ALU32_siu9 <"cmp.gtu",int_hexagon_C4_cmplteui>; + +// ALU32 / PRED / cmpare To General Register. +def Hexagon_A4_rcmpneq : si_neg_ALU32_sisi <"cmp.eq", int_hexagon_A4_rcmpneq>; +def Hexagon_A4_rcmpneqi: si_neg_ALU32_sis8 <"cmp.eq", int_hexagon_A4_rcmpneqi>; +def Hexagon_A4_rcmpeq : si_ALU32_sisi <"cmp.eq", int_hexagon_A4_rcmpeq>; +def Hexagon_A4_rcmpeqi : si_ALU32_sis8 <"cmp.eq", int_hexagon_A4_rcmpeqi>; + + +/******************************************************************** +* CR * +*********************************************************************/ + +// CR / Corner Detection Acceleration. +def Hexagon_C4_fastcorner9: + qi_SInst_qiqi<"fastcorner9", int_hexagon_C4_fastcorner9>; +def Hexagon_C4_fastcorner9_not: + qi_neg_SInst_qiqi<"fastcorner9",int_hexagon_C4_fastcorner9_not>; + +// CR / Logical Operations On Predicates. +def Hexagon_C4_and_andn: + qi_SInst_qi_andqiqi_neg <"and", int_hexagon_C4_and_andn>; +def Hexagon_C4_and_and: + qi_SInst_qi_andqiqi <"and", int_hexagon_C4_and_and>; +def Hexagon_C4_and_orn: + qi_SInst_qi_orqiqi_neg <"and", int_hexagon_C4_and_orn>; +def Hexagon_C4_and_or: + qi_SInst_qi_orqiqi <"and", int_hexagon_C4_and_or>; +def Hexagon_C4_or_andn: + qi_SInst_qi_andqiqi_neg <"or", int_hexagon_C4_or_andn>; +def Hexagon_C4_or_and: + qi_SInst_qi_andqiqi <"or", int_hexagon_C4_or_and>; +def Hexagon_C4_or_orn: + qi_SInst_qi_orqiqi_neg <"or", int_hexagon_C4_or_orn>; +def Hexagon_C4_or_or: + qi_SInst_qi_orqiqi <"or", int_hexagon_C4_or_or>; + + +/******************************************************************** +* XTYPE/ALU * +*********************************************************************/ + +// XTYPE / ALU / Add And Accumulate. +def Hexagon_S4_addaddi: + si_SInst_si_addsis6 <"add", int_hexagon_S4_addaddi>; +def Hexagon_S4_subaddi: + si_SInst_si_subs6si <"add", int_hexagon_S4_subaddi>; + +// XTYPE / ALU / Logical Doublewords. +def Hexagon_S4_andnp: + di_ALU64_didi_neg <"and", int_hexagon_A4_andnp>; +def Hexagon_S4_ornp: + di_ALU64_didi_neg <"or", int_hexagon_A4_ornp>; + +// XTYPE / ALU / Logical-logical Doublewords. +def Hexagon_M4_xor_xacc: + di_MInst_dididi_xacc <"xor", int_hexagon_M4_xor_xacc>; + +// XTYPE / ALU / Logical-logical Words. +def HEXAGON_M4_and_and: + si_MInst_sisisi_and <"and", int_hexagon_M4_and_and>; +def HEXAGON_M4_and_or: + si_MInst_sisisi_and <"or", int_hexagon_M4_and_or>; +def HEXAGON_M4_and_xor: + si_MInst_sisisi_and <"xor", int_hexagon_M4_and_xor>; +def HEXAGON_M4_and_andn: + si_MInst_sisisi_andn <"and", int_hexagon_M4_and_andn>; +def HEXAGON_M4_xor_and: + si_MInst_sisisi_xor <"and", int_hexagon_M4_xor_and>; +def HEXAGON_M4_xor_or: + si_MInst_sisisi_xor <"or", int_hexagon_M4_xor_or>; +def HEXAGON_M4_xor_andn: + si_MInst_sisisi_xorn <"and", int_hexagon_M4_xor_andn>; +def HEXAGON_M4_or_and: + si_MInst_sisisi_or <"and", int_hexagon_M4_or_and>; +def HEXAGON_M4_or_or: + si_MInst_sisisi_or <"or", int_hexagon_M4_or_or>; +def HEXAGON_M4_or_xor: + si_MInst_sisisi_or <"xor", int_hexagon_M4_or_xor>; +def HEXAGON_M4_or_andn: + si_MInst_sisisi_orn <"and", int_hexagon_M4_or_andn>; +def HEXAGON_S4_or_andix: + si_SInst_sisis10_andi <"or", int_hexagon_S4_or_andix>; +def HEXAGON_S4_or_andi: + si_SInst_sisis10_or <"and", int_hexagon_S4_or_andi>; +def HEXAGON_S4_or_ori: + si_SInst_sisis10_or <"or", int_hexagon_S4_or_ori>; + +// XTYPE / ALU / Modulo wrap. +def HEXAGON_A4_modwrapu: + si_ALU64_sisi <"modwrap", int_hexagon_A4_modwrapu>; + +// XTYPE / ALU / Round. +def HEXAGON_A4_cround_ri: + si_SInst_siu5 <"cround", int_hexagon_A4_cround_ri>; +def HEXAGON_A4_cround_rr: + si_SInst_sisi <"cround", int_hexagon_A4_cround_rr>; +def HEXAGON_A4_round_ri: + si_SInst_siu5 <"round", int_hexagon_A4_round_ri>; +def HEXAGON_A4_round_rr: + si_SInst_sisi <"round", int_hexagon_A4_round_rr>; +def HEXAGON_A4_round_ri_sat: + si_SInst_siu5_sat <"round", int_hexagon_A4_round_ri_sat>; +def HEXAGON_A4_round_rr_sat: + si_SInst_sisi_sat <"round", int_hexagon_A4_round_rr_sat>; + +// XTYPE / ALU / Vector reduce add unsigned halfwords. +// XTYPE / ALU / Vector add bytes. +// XTYPE / ALU / Vector conditional negate. +// XTYPE / ALU / Vector maximum bytes. +// XTYPE / ALU / Vector reduce maximum halfwords. +// XTYPE / ALU / Vector reduce maximum words. +// XTYPE / ALU / Vector minimum bytes. +// XTYPE / ALU / Vector reduce minimum halfwords. +// XTYPE / ALU / Vector reduce minimum words. +// XTYPE / ALU / Vector subtract bytes. + + +/******************************************************************** +* XTYPE/BIT * +*********************************************************************/ + +// XTYPE / BIT / Count leading. +// XTYPE / BIT / Count trailing. +// XTYPE / BIT / Extract bitfield. +// XTYPE / BIT / Masked parity. +// XTYPE / BIT / Bit reverse. +// XTYPE / BIT / Split bitfield. + + +/******************************************************************** +* XTYPE/COMPLEX * +*********************************************************************/ + +// XTYPE / COMPLEX / Complex add/sub halfwords. +// XTYPE / COMPLEX / Complex add/sub words. +// XTYPE / COMPLEX / Complex multiply 32x16. +// XTYPE / COMPLEX / Vector reduce complex rotate. + + +/******************************************************************** +* XTYPE/MPY * +*********************************************************************/ + +// XTYPE / COMPLEX / Complex add/sub halfwords. diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV5.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV5.td new file mode 100644 index 000000000000..1d44b526d298 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV5.td @@ -0,0 +1,395 @@ +class sf_SInst_sf<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1), + !strconcat("$dst = ", !strconcat(opc , "($src1)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1))]>; + +class si_SInst_sf<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1), + !strconcat("$dst = ", !strconcat(opc , "($src1)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1))]>; + +class sf_SInst_si<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1), + !strconcat("$dst = ", !strconcat(opc , "($src1)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1))]>; + +class sf_SInst_di<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1), + !strconcat("$dst = ", !strconcat(opc , "($src1)")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src1))]>; + +class sf_SInst_df<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1), + !strconcat("$dst = ", !strconcat(opc , "($src1)")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src1))]>; + +class si_SInst_df<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1), + !strconcat("$dst = ", !strconcat(opc , "($src1)")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src1))]>; + +class df_SInst_sf<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1), + !strconcat("$dst = ", !strconcat(opc , "($src1)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1))]>; + +class di_SInst_sf<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1), + !strconcat("$dst = ", !strconcat(opc , "($src1)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1))]>; + +class df_SInst_si<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1), + !strconcat("$dst = ", !strconcat(opc , "($src1)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1))]>; + +class df_SInst_df<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1), + !strconcat("$dst = ", !strconcat(opc , "($src1)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1))]>; + +class di_SInst_df<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1), + !strconcat("$dst = ", !strconcat(opc , "($src1)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1))]>; + + +class df_SInst_di<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1), + !strconcat("$dst = ", !strconcat(opc , "($src1)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1))]>; + +class sf_MInst_sfsf<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class df_MInst_dfdf<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; + +class qi_ALU64_dfdf<string opc, Intrinsic IntID> + : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set PredRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; + +class qi_ALU64_dfu5<string opc, Intrinsic IntID> + : ALU64_ri<(outs PredRegs:$dst), (ins DoubleRegs:$src1, u5Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set PredRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>; + + +class sf_MInst_sfsfsf_acc<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, + IntRegs:$dst2), + !strconcat("$dst += ", !strconcat(opc , + "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, + IntRegs:$src2, IntRegs:$dst2))], + "$dst2 = $dst">; + +class sf_MInst_sfsfsf_nac<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, + IntRegs:$dst2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, + IntRegs:$src2, IntRegs:$dst2))], + "$dst2 = $dst">; + + +class sf_MInst_sfsfsfsi_sc<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2, IntRegs:$src3), + !strconcat("$dst += ", !strconcat(opc , + "($src1, $src2, $src3):scale")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2, IntRegs:$src3))], + "$dst2 = $dst">; + +class sf_MInst_sfsfsf_acc_lib<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, + IntRegs:$dst2), + !strconcat("$dst += ", !strconcat(opc , + "($src1, $src2):lib")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, + IntRegs:$src2, IntRegs:$dst2))], + "$dst2 = $dst">; + +class sf_MInst_sfsfsf_nac_lib<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, + IntRegs:$dst2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1, $src2):lib")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, + IntRegs:$src2, IntRegs:$dst2))], + "$dst2 = $dst">; + +class df_MInst_dfdfdf_acc<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2, + DoubleRegs:$dst2), + !strconcat("$dst += ", !strconcat(opc , + "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2, DoubleRegs:$dst2))], + "$dst2 = $dst">; + +class df_MInst_dfdfdf_nac<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2, + DoubleRegs:$dst2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2, DoubleRegs:$dst2))], + "$dst2 = $dst">; + + +class df_MInst_dfdfdfsi_sc<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + DoubleRegs:$src2, IntRegs:$src3), + !strconcat("$dst += ", !strconcat(opc , + "($src1, $src2, $src3):scale")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, + DoubleRegs:$src2, IntRegs:$src3))], + "$dst2 = $dst">; + +class df_MInst_dfdfdf_acc_lib<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2, + DoubleRegs:$dst2), + !strconcat("$dst += ", !strconcat(opc , + "($src1, $src2):lib")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2, DoubleRegs:$dst2))], + "$dst2 = $dst">; + +class df_MInst_dfdfdf_nac_lib<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2, + DoubleRegs:$dst2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1, $src2):lib")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2, DoubleRegs:$dst2))], + "$dst2 = $dst">; + +class qi_SInst_sfsf<string opc, Intrinsic IntID> + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class qi_SInst_sfu5<string opc, Intrinsic IntID> + : MInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class sf_ALU64_u10_pos<string opc, Intrinsic IntID> + : ALU64_ri<(outs IntRegs:$dst), (ins u10Imm:$src1), + !strconcat("$dst = ", !strconcat(opc , "#$src1):pos")), + [(set IntRegs:$dst, (IntID imm:$src1))]>; + +class sf_ALU64_u10_neg<string opc, Intrinsic IntID> + : ALU64_ri<(outs IntRegs:$dst), (ins u10Imm:$src1), + !strconcat("$dst = ", !strconcat(opc , "#$src1):neg")), + [(set IntRegs:$dst, (IntID imm:$src1))]>; + +class df_ALU64_u10_pos<string opc, Intrinsic IntID> + : ALU64_ri<(outs DoubleRegs:$dst), (ins u10Imm:$src1), + !strconcat("$dst = ", !strconcat(opc , "#$src1):pos")), + [(set DoubleRegs:$dst, (IntID imm:$src1))]>; + +class df_ALU64_u10_neg<string opc, Intrinsic IntID> + : ALU64_ri<(outs DoubleRegs:$dst), (ins u10Imm:$src1), + !strconcat("$dst = ", !strconcat(opc , "#$src1):neg")), + [(set DoubleRegs:$dst, (IntID imm:$src1))]>; + +class di_MInst_diu6<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>; + +class di_MInst_diu4_rnd<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u4Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):rnd")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>; + +class si_MInst_diu4_rnd_sat<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, u4Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):rnd:sat")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>; + +class si_SInst_diu4_sat<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, u4Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):sat")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>; + + +def HEXAGON_C4_fastcorner9: + qi_SInst_qiqi <"fastcorner9", int_hexagon_C4_fastcorner9>; +def HEXAGON_C4_fastcorner9_not: + qi_SInst_qiqi <"!fastcorner9", int_hexagon_C4_fastcorner9_not>; +def HEXAGON_M5_vrmpybuu: + di_MInst_didi <"vrmpybu", int_hexagon_M5_vrmpybuu>; +def HEXAGON_M5_vrmacbuu: + di_MInst_dididi_acc <"vrmpybu", int_hexagon_M5_vrmacbuu>; +def HEXAGON_M5_vrmpybsu: + di_MInst_didi <"vrmpybsu", int_hexagon_M5_vrmpybsu>; +def HEXAGON_M5_vrmacbsu: + di_MInst_dididi_acc <"vrmpybsu", int_hexagon_M5_vrmacbsu>; +def HEXAGON_M5_vmpybuu: + di_MInst_sisi <"vmpybu", int_hexagon_M5_vmpybuu>; +def HEXAGON_M5_vmpybsu: + di_MInst_sisi <"vmpybsu", int_hexagon_M5_vmpybsu>; +def HEXAGON_M5_vmacbuu: + di_MInst_disisi_acc <"vmpybu", int_hexagon_M5_vmacbuu>; +def HEXAGON_M5_vmacbsu: + di_MInst_disisi_acc <"vmpybsu", int_hexagon_M5_vmacbsu>; +def HEXAGON_M5_vdmpybsu: + di_MInst_didi_sat <"vdmpybsu", int_hexagon_M5_vdmpybsu>; +def HEXAGON_M5_vdmacbsu: + di_MInst_dididi_acc_sat <"vdmpybsu", int_hexagon_M5_vdmacbsu>; +def HEXAGON_A5_vaddhubs: + si_SInst_didi_sat <"vaddhub", int_hexagon_A5_vaddhubs>; +def HEXAGON_S5_popcountp: + si_SInst_di <"popcount", int_hexagon_S5_popcountp>; +def HEXAGON_S5_asrhub_rnd_sat_goodsyntax: + si_MInst_diu4_rnd_sat <"vasrhub", int_hexagon_S5_asrhub_rnd_sat_goodsyntax>; +def HEXAGON_S5_asrhub_sat: + si_SInst_diu4_sat <"vasrhub", int_hexagon_S5_asrhub_sat>; +def HEXAGON_S5_vasrhrnd_goodsyntax: + di_MInst_diu4_rnd <"vasrh", int_hexagon_S5_vasrhrnd_goodsyntax>; +def HEXAGON_S2_asr_i_p_rnd: + di_SInst_diu6 <"asr", int_hexagon_S2_asr_i_p_rnd>; +def HEXAGON_S2_asr_i_p_rnd_goodsyntax: + di_MInst_diu6 <"asrrnd", int_hexagon_S2_asr_i_p_rnd_goodsyntax>; +def HEXAGON_F2_sfadd: + sf_MInst_sfsf <"sfadd", int_hexagon_F2_sfadd>; +def HEXAGON_F2_sfsub: + sf_MInst_sfsf <"sfsub", int_hexagon_F2_sfsub>; +def HEXAGON_F2_sfmpy: + sf_MInst_sfsf <"sfmpy", int_hexagon_F2_sfmpy>; +def HEXAGON_F2_sffma: + sf_MInst_sfsfsf_acc <"sfmpy", int_hexagon_F2_sffma>; +def HEXAGON_F2_sffma_sc: + sf_MInst_sfsfsfsi_sc <"sfmpy", int_hexagon_F2_sffma_sc>; +def HEXAGON_F2_sffms: + sf_MInst_sfsfsf_nac <"sfmpy", int_hexagon_F2_sffms>; +def HEXAGON_F2_sffma_lib: + sf_MInst_sfsfsf_acc_lib <"sfmpy", int_hexagon_F2_sffma_lib>; +def HEXAGON_F2_sffms_lib: + sf_MInst_sfsfsf_nac_lib <"sfmpy", int_hexagon_F2_sffms_lib>; +def HEXAGON_F2_sfcmpeq: + qi_SInst_sfsf <"sfcmp.eq", int_hexagon_F2_sfcmpeq>; +def HEXAGON_F2_sfcmpgt: + qi_SInst_sfsf <"sfcmp.gt", int_hexagon_F2_sfcmpgt>; +def HEXAGON_F2_sfcmpge: + qi_SInst_sfsf <"sfcmp.ge", int_hexagon_F2_sfcmpge>; +def HEXAGON_F2_sfcmpuo: + qi_SInst_sfsf <"sfcmp.uo", int_hexagon_F2_sfcmpuo>; +def HEXAGON_F2_sfmax: + sf_MInst_sfsf <"sfmax", int_hexagon_F2_sfmax>; +def HEXAGON_F2_sfmin: + sf_MInst_sfsf <"sfmin", int_hexagon_F2_sfmin>; +def HEXAGON_F2_sfclass: + qi_SInst_sfu5 <"sfclass", int_hexagon_F2_sfclass>; +def HEXAGON_F2_sfimm_p: + sf_ALU64_u10_pos <"sfmake", int_hexagon_F2_sfimm_p>; +def HEXAGON_F2_sfimm_n: + sf_ALU64_u10_neg <"sfmake", int_hexagon_F2_sfimm_n>; +def HEXAGON_F2_sffixupn: + sf_MInst_sfsf <"sffixupn", int_hexagon_F2_sffixupn>; +def HEXAGON_F2_sffixupd: + sf_MInst_sfsf <"sffixupd", int_hexagon_F2_sffixupd>; +def HEXAGON_F2_sffixupr: + sf_SInst_sf <"sffixupr", int_hexagon_F2_sffixupr>; +def HEXAGON_F2_dfadd: + df_MInst_dfdf <"dfadd", int_hexagon_F2_dfadd>; +def HEXAGON_F2_dfsub: + df_MInst_dfdf <"dfsub", int_hexagon_F2_dfsub>; +def HEXAGON_F2_dfmpy: + df_MInst_dfdf <"dfmpy", int_hexagon_F2_dfmpy>; +def HEXAGON_F2_dffma: + df_MInst_dfdfdf_acc <"dfmpy", int_hexagon_F2_dffma>; +def HEXAGON_F2_dffms: + df_MInst_dfdfdf_nac <"dfmpy", int_hexagon_F2_dffms>; +def HEXAGON_F2_dffma_lib: + df_MInst_dfdfdf_acc_lib <"dfmpy", int_hexagon_F2_dffma_lib>; +def HEXAGON_F2_dffms_lib: + df_MInst_dfdfdf_nac_lib <"dfmpy", int_hexagon_F2_dffms_lib>; +def HEXAGON_F2_dffma_sc: + df_MInst_dfdfdfsi_sc <"dfmpy", int_hexagon_F2_dffma_sc>; +def HEXAGON_F2_dfmax: + df_MInst_dfdf <"dfmax", int_hexagon_F2_dfmax>; +def HEXAGON_F2_dfmin: + df_MInst_dfdf <"dfmin", int_hexagon_F2_dfmin>; +def HEXAGON_F2_dfcmpeq: + qi_ALU64_dfdf <"dfcmp.eq", int_hexagon_F2_dfcmpeq>; +def HEXAGON_F2_dfcmpgt: + qi_ALU64_dfdf <"dfcmp.gt", int_hexagon_F2_dfcmpgt>; +def HEXAGON_F2_dfcmpge: + qi_ALU64_dfdf <"dfcmp.ge", int_hexagon_F2_dfcmpge>; +def HEXAGON_F2_dfcmpuo: + qi_ALU64_dfdf <"dfcmp.uo", int_hexagon_F2_dfcmpuo>; +def HEXAGON_F2_dfclass: + qi_ALU64_dfu5 <"dfclass", int_hexagon_F2_dfclass>; +def HEXAGON_F2_dfimm_p: + df_ALU64_u10_pos <"dfmake", int_hexagon_F2_dfimm_p>; +def HEXAGON_F2_dfimm_n: + df_ALU64_u10_neg <"dfmake", int_hexagon_F2_dfimm_n>; +def HEXAGON_F2_dffixupn: + df_MInst_dfdf <"dffixupn", int_hexagon_F2_dffixupn>; +def HEXAGON_F2_dffixupd: + df_MInst_dfdf <"dffixupd", int_hexagon_F2_dffixupd>; +def HEXAGON_F2_dffixupr: + df_SInst_df <"dffixupr", int_hexagon_F2_dffixupr>; +def HEXAGON_F2_conv_sf2df: + df_SInst_sf <"convert_sf2df", int_hexagon_F2_conv_sf2df>; +def HEXAGON_F2_conv_df2sf: + sf_SInst_df <"convert_df2sf", int_hexagon_F2_conv_df2sf>; +def HEXAGON_F2_conv_uw2sf: + sf_SInst_si <"convert_uw2sf", int_hexagon_F2_conv_uw2sf>; +def HEXAGON_F2_conv_uw2df: + df_SInst_si <"convert_uw2df", int_hexagon_F2_conv_uw2df>; +def HEXAGON_F2_conv_w2sf: + sf_SInst_si <"convert_w2sf", int_hexagon_F2_conv_w2sf>; +def HEXAGON_F2_conv_w2df: + df_SInst_si <"convert_w2df", int_hexagon_F2_conv_w2df>; +def HEXAGON_F2_conv_ud2sf: + sf_SInst_di <"convert_ud2sf", int_hexagon_F2_conv_ud2sf>; +def HEXAGON_F2_conv_ud2df: + df_SInst_di <"convert_ud2df", int_hexagon_F2_conv_ud2df>; +def HEXAGON_F2_conv_d2sf: + sf_SInst_di <"convert_d2sf", int_hexagon_F2_conv_d2sf>; +def HEXAGON_F2_conv_d2df: + df_SInst_di <"convert_d2df", int_hexagon_F2_conv_d2df>; +def HEXAGON_F2_conv_sf2uw: + si_SInst_sf <"convert_sf2uw", int_hexagon_F2_conv_sf2uw>; +def HEXAGON_F2_conv_sf2w: + si_SInst_sf <"convert_sf2w", int_hexagon_F2_conv_sf2w>; +def HEXAGON_F2_conv_sf2ud: + di_SInst_sf <"convert_sf2ud", int_hexagon_F2_conv_sf2ud>; +def HEXAGON_F2_conv_sf2d: + di_SInst_sf <"convert_sf2d", int_hexagon_F2_conv_sf2d>; +def HEXAGON_F2_conv_df2uw: + si_SInst_df <"convert_df2uw", int_hexagon_F2_conv_df2uw>; +def HEXAGON_F2_conv_df2w: + si_SInst_df <"convert_df2w", int_hexagon_F2_conv_df2w>; +def HEXAGON_F2_conv_df2ud: + di_SInst_df <"convert_df2ud", int_hexagon_F2_conv_df2ud>; +def HEXAGON_F2_conv_df2d: + di_SInst_df <"convert_df2d", int_hexagon_F2_conv_df2d>; +def HEXAGON_F2_conv_sf2uw_chop: + si_SInst_sf <"convert_sf2uw", int_hexagon_F2_conv_sf2uw_chop>; +def HEXAGON_F2_conv_sf2w_chop: + si_SInst_sf <"convert_sf2w", int_hexagon_F2_conv_sf2w_chop>; +def HEXAGON_F2_conv_sf2ud_chop: + di_SInst_sf <"convert_sf2ud", int_hexagon_F2_conv_sf2ud_chop>; +def HEXAGON_F2_conv_sf2d_chop: + di_SInst_sf <"convert_sf2d", int_hexagon_F2_conv_sf2d_chop>; +def HEXAGON_F2_conv_df2uw_chop: + si_SInst_df <"convert_df2uw", int_hexagon_F2_conv_df2uw_chop>; +def HEXAGON_F2_conv_df2w_chop: + si_SInst_df <"convert_df2w", int_hexagon_F2_conv_df2w_chop>; +def HEXAGON_F2_conv_df2ud_chop: + di_SInst_df <"convert_df2ud", int_hexagon_F2_conv_df2ud_chop>; +def HEXAGON_F2_conv_df2d_chop: + di_SInst_df <"convert_df2d", int_hexagon_F2_conv_df2d_chop>; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp new file mode 100644 index 000000000000..bbb2fa4c42de --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp @@ -0,0 +1,95 @@ +//===- HexagonMCInstLower.cpp - Convert Hexagon MachineInstr to an MCInst -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains code to lower Hexagon MachineInstrs to their corresponding +// MCInst records. +// +//===----------------------------------------------------------------------===// + +#include "Hexagon.h" +#include "HexagonAsmPrinter.h" +#include "HexagonMachineFunctionInfo.h" +#include "MCTargetDesc/HexagonMCInst.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Target/Mangler.h" + +using namespace llvm; + +static MCOperand GetSymbolRef(const MachineOperand& MO, const MCSymbol* Symbol, + HexagonAsmPrinter& Printer) { + MCContext &MC = Printer.OutContext; + const MCExpr *ME; + + ME = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, MC); + + if (!MO.isJTI() && MO.getOffset()) + ME = MCBinaryExpr::CreateAdd(ME, MCConstantExpr::Create(MO.getOffset(), MC), + MC); + + return (MCOperand::CreateExpr(ME)); +} + +// Create an MCInst from a MachineInstr +void llvm::HexagonLowerToMC(const MachineInstr* MI, HexagonMCInst& MCI, + HexagonAsmPrinter& AP) { + MCI.setOpcode(MI->getOpcode()); + MCI.setDesc(MI->getDesc()); + + for (unsigned i = 0, e = MI->getNumOperands(); i < e; i++) { + const MachineOperand &MO = MI->getOperand(i); + MCOperand MCO; + + switch (MO.getType()) { + default: + MI->dump(); + llvm_unreachable("unknown operand type"); + case MachineOperand::MO_Register: + // Ignore all implicit register operands. + if (MO.isImplicit()) continue; + MCO = MCOperand::CreateReg(MO.getReg()); + break; + case MachineOperand::MO_FPImmediate: { + APFloat Val = MO.getFPImm()->getValueAPF(); + // FP immediates are used only when setting GPRs, so they may be dealt + // with like regular immediates from this point on. + MCO = MCOperand::CreateImm(*Val.bitcastToAPInt().getRawData()); + break; + } + case MachineOperand::MO_Immediate: + MCO = MCOperand::CreateImm(MO.getImm()); + break; + case MachineOperand::MO_MachineBasicBlock: + MCO = MCOperand::CreateExpr + (MCSymbolRefExpr::Create(MO.getMBB()->getSymbol(), + AP.OutContext)); + break; + case MachineOperand::MO_GlobalAddress: + MCO = GetSymbolRef(MO, AP.getSymbol(MO.getGlobal()), AP); + break; + case MachineOperand::MO_ExternalSymbol: + MCO = GetSymbolRef(MO, AP.GetExternalSymbolSymbol(MO.getSymbolName()), + AP); + break; + case MachineOperand::MO_JumpTableIndex: + MCO = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP); + break; + case MachineOperand::MO_ConstantPoolIndex: + MCO = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP); + break; + case MachineOperand::MO_BlockAddress: + MCO = GetSymbolRef(MO, AP.GetBlockAddressSymbol(MO.getBlockAddress()),AP); + break; + } + + MCI.addOperand(MCO); + } +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp new file mode 100644 index 000000000000..9579c8b6df16 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp @@ -0,0 +1,16 @@ +//= HexagonMachineFunctionInfo.cpp - Hexagon machine function info *- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "HexagonMachineFunctionInfo.h" + +using namespace llvm; + +// pin vtable to this file +void HexagonMachineFunctionInfo::anchor() {} + diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h new file mode 100644 index 000000000000..a59c8c9dd2a2 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h @@ -0,0 +1,80 @@ +//=- HexagonMachineFunctionInfo.h - Hexagon machine function info -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef HexagonMACHINEFUNCTIONINFO_H +#define HexagonMACHINEFUNCTIONINFO_H + +#include <map> +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { + + namespace Hexagon { + const unsigned int StartPacket = 0x1; + const unsigned int EndPacket = 0x2; + } + + +/// Hexagon target-specific information for each MachineFunction. +class HexagonMachineFunctionInfo : public MachineFunctionInfo { + // SRetReturnReg - Some subtargets require that sret lowering includes + // returning the value of the returned struct in a register. This field + // holds the virtual register into which the sret argument is passed. + unsigned SRetReturnReg; + std::vector<MachineInstr*> AllocaAdjustInsts; + int VarArgsFrameIndex; + bool HasClobberLR; + bool HasEHReturn; + std::map<const MachineInstr*, unsigned> PacketInfo; + virtual void anchor(); + +public: + HexagonMachineFunctionInfo() : SRetReturnReg(0), HasClobberLR(0), + HasEHReturn(false) {} + + HexagonMachineFunctionInfo(MachineFunction &MF) : SRetReturnReg(0), + HasClobberLR(0), + HasEHReturn(false) {} + + unsigned getSRetReturnReg() const { return SRetReturnReg; } + void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; } + + void addAllocaAdjustInst(MachineInstr* MI) { + AllocaAdjustInsts.push_back(MI); + } + const std::vector<MachineInstr*>& getAllocaAdjustInsts() { + return AllocaAdjustInsts; + } + + void setVarArgsFrameIndex(int v) { VarArgsFrameIndex = v; } + int getVarArgsFrameIndex() { return VarArgsFrameIndex; } + + void setStartPacket(MachineInstr* MI) { + PacketInfo[MI] |= Hexagon::StartPacket; + } + void setEndPacket(MachineInstr* MI) { + PacketInfo[MI] |= Hexagon::EndPacket; + } + bool isStartPacket(const MachineInstr* MI) const { + return (PacketInfo.count(MI) && + (PacketInfo.find(MI)->second & Hexagon::StartPacket)); + } + bool isEndPacket(const MachineInstr* MI) const { + return (PacketInfo.count(MI) && + (PacketInfo.find(MI)->second & Hexagon::EndPacket)); + } + void setHasClobberLR(bool v) { HasClobberLR = v; } + bool hasClobberLR() const { return HasClobberLR; } + + bool hasEHReturn() const { return HasEHReturn; }; + void setHasEHReturn(bool H = true) { HasEHReturn = H; }; +}; +} // End llvm namespace + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp new file mode 100644 index 000000000000..c94f081ab13b --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp @@ -0,0 +1,691 @@ +//===- HexagonMachineScheduler.cpp - MI Scheduler for Hexagon -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// MachineScheduler schedules machine instructions after phi elimination. It +// preserves LiveIntervals so it can be invoked before register allocation. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "misched" + +#include "HexagonMachineScheduler.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/IR/Function.h" + +using namespace llvm; + +/// Platform specific modifications to DAG. +void VLIWMachineScheduler::postprocessDAG() { + SUnit* LastSequentialCall = NULL; + // Currently we only catch the situation when compare gets scheduled + // before preceding call. + for (unsigned su = 0, e = SUnits.size(); su != e; ++su) { + // Remember the call. + if (SUnits[su].getInstr()->isCall()) + LastSequentialCall = &(SUnits[su]); + // Look for a compare that defines a predicate. + else if (SUnits[su].getInstr()->isCompare() && LastSequentialCall) + SUnits[su].addPred(SDep(LastSequentialCall, SDep::Barrier)); + } +} + +/// Check if scheduling of this SU is possible +/// in the current packet. +/// It is _not_ precise (statefull), it is more like +/// another heuristic. Many corner cases are figured +/// empirically. +bool VLIWResourceModel::isResourceAvailable(SUnit *SU) { + if (!SU || !SU->getInstr()) + return false; + + // First see if the pipeline could receive this instruction + // in the current cycle. + switch (SU->getInstr()->getOpcode()) { + default: + if (!ResourcesModel->canReserveResources(SU->getInstr())) + return false; + case TargetOpcode::EXTRACT_SUBREG: + case TargetOpcode::INSERT_SUBREG: + case TargetOpcode::SUBREG_TO_REG: + case TargetOpcode::REG_SEQUENCE: + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::COPY: + case TargetOpcode::INLINEASM: + break; + } + + // Now see if there are no other dependencies to instructions already + // in the packet. + for (unsigned i = 0, e = Packet.size(); i != e; ++i) { + if (Packet[i]->Succs.size() == 0) + continue; + for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(), + E = Packet[i]->Succs.end(); I != E; ++I) { + // Since we do not add pseudos to packets, might as well + // ignore order dependencies. + if (I->isCtrl()) + continue; + + if (I->getSUnit() == SU) + return false; + } + } + return true; +} + +/// Keep track of available resources. +bool VLIWResourceModel::reserveResources(SUnit *SU) { + bool startNewCycle = false; + // Artificially reset state. + if (!SU) { + ResourcesModel->clearResources(); + Packet.clear(); + TotalPackets++; + return false; + } + // If this SU does not fit in the packet + // start a new one. + if (!isResourceAvailable(SU)) { + ResourcesModel->clearResources(); + Packet.clear(); + TotalPackets++; + startNewCycle = true; + } + + switch (SU->getInstr()->getOpcode()) { + default: + ResourcesModel->reserveResources(SU->getInstr()); + break; + case TargetOpcode::EXTRACT_SUBREG: + case TargetOpcode::INSERT_SUBREG: + case TargetOpcode::SUBREG_TO_REG: + case TargetOpcode::REG_SEQUENCE: + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: + case TargetOpcode::PROLOG_LABEL: + case TargetOpcode::EH_LABEL: + case TargetOpcode::COPY: + case TargetOpcode::INLINEASM: + break; + } + Packet.push_back(SU); + +#ifndef NDEBUG + DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n"); + for (unsigned i = 0, e = Packet.size(); i != e; ++i) { + DEBUG(dbgs() << "\t[" << i << "] SU("); + DEBUG(dbgs() << Packet[i]->NodeNum << ")\t"); + DEBUG(Packet[i]->getInstr()->dump()); + } +#endif + + // If packet is now full, reset the state so in the next cycle + // we start fresh. + if (Packet.size() >= SchedModel->getIssueWidth()) { + ResourcesModel->clearResources(); + Packet.clear(); + TotalPackets++; + startNewCycle = true; + } + + return startNewCycle; +} + +/// schedule - Called back from MachineScheduler::runOnMachineFunction +/// after setting up the current scheduling region. [RegionBegin, RegionEnd) +/// only includes instructions that have DAG nodes, not scheduling boundaries. +void VLIWMachineScheduler::schedule() { + DEBUG(dbgs() + << "********** MI Converging Scheduling VLIW BB#" << BB->getNumber() + << " " << BB->getName() + << " in_func " << BB->getParent()->getFunction()->getName() + << " at loop depth " << MLI.getLoopDepth(BB) + << " \n"); + + buildDAGWithRegPressure(); + + // Postprocess the DAG to add platform specific artificial dependencies. + postprocessDAG(); + + SmallVector<SUnit*, 8> TopRoots, BotRoots; + findRootsAndBiasEdges(TopRoots, BotRoots); + + // Initialize the strategy before modifying the DAG. + SchedImpl->initialize(this); + + // To view Height/Depth correctly, they should be accessed at least once. + // + // FIXME: SUnit::dumpAll always recompute depth and height now. The max + // depth/height could be computed directly from the roots and leaves. + DEBUG(unsigned maxH = 0; + for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + if (SUnits[su].getHeight() > maxH) + maxH = SUnits[su].getHeight(); + dbgs() << "Max Height " << maxH << "\n";); + DEBUG(unsigned maxD = 0; + for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + if (SUnits[su].getDepth() > maxD) + maxD = SUnits[su].getDepth(); + dbgs() << "Max Depth " << maxD << "\n";); + DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + SUnits[su].dumpAll(this)); + + initQueues(TopRoots, BotRoots); + + bool IsTopNode = false; + while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { + if (!checkSchedLimit()) + break; + + scheduleMI(SU, IsTopNode); + + updateQueues(SU, IsTopNode); + } + assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); + + placeDebugValues(); +} + +void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) { + DAG = static_cast<VLIWMachineScheduler*>(dag); + SchedModel = DAG->getSchedModel(); + + Top.init(DAG, SchedModel); + Bot.init(DAG, SchedModel); + + // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or + // are disabled, then these HazardRecs will be disabled. + const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries(); + const TargetMachine &TM = DAG->MF.getTarget(); + delete Top.HazardRec; + delete Bot.HazardRec; + Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); + Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); + + delete Top.ResourceModel; + delete Bot.ResourceModel; + Top.ResourceModel = new VLIWResourceModel(TM, DAG->getSchedModel()); + Bot.ResourceModel = new VLIWResourceModel(TM, DAG->getSchedModel()); + + assert((!llvm::ForceTopDown || !llvm::ForceBottomUp) && + "-misched-topdown incompatible with -misched-bottomup"); +} + +void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) { + if (SU->isScheduled) + return; + + for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; + unsigned MinLatency = I->getLatency(); +#ifndef NDEBUG + Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); +#endif + if (SU->TopReadyCycle < PredReadyCycle + MinLatency) + SU->TopReadyCycle = PredReadyCycle + MinLatency; + } + Top.releaseNode(SU, SU->TopReadyCycle); +} + +void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) { + if (SU->isScheduled) + return; + + assert(SU->getInstr() && "Scheduled SUnit must have instr"); + + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; + unsigned MinLatency = I->getLatency(); +#ifndef NDEBUG + Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency); +#endif + if (SU->BotReadyCycle < SuccReadyCycle + MinLatency) + SU->BotReadyCycle = SuccReadyCycle + MinLatency; + } + Bot.releaseNode(SU, SU->BotReadyCycle); +} + +/// Does this SU have a hazard within the current instruction group. +/// +/// The scheduler supports two modes of hazard recognition. The first is the +/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that +/// supports highly complicated in-order reservation tables +/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic. +/// +/// The second is a streamlined mechanism that checks for hazards based on +/// simple counters that the scheduler itself maintains. It explicitly checks +/// for instruction dispatch limitations, including the number of micro-ops that +/// can dispatch per cycle. +/// +/// TODO: Also check whether the SU must start a new group. +bool ConvergingVLIWScheduler::SchedBoundary::checkHazard(SUnit *SU) { + if (HazardRec->isEnabled()) + return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; + + unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); + if (IssueCount + uops > SchedModel->getIssueWidth()) + return true; + + return false; +} + +void ConvergingVLIWScheduler::SchedBoundary::releaseNode(SUnit *SU, + unsigned ReadyCycle) { + if (ReadyCycle < MinReadyCycle) + MinReadyCycle = ReadyCycle; + + // Check for interlocks first. For the purpose of other heuristics, an + // instruction that cannot issue appears as if it's not in the ReadyQueue. + if (ReadyCycle > CurrCycle || checkHazard(SU)) + + Pending.push(SU); + else + Available.push(SU); +} + +/// Move the boundary of scheduled code by one cycle. +void ConvergingVLIWScheduler::SchedBoundary::bumpCycle() { + unsigned Width = SchedModel->getIssueWidth(); + IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; + + assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); + unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle); + + if (!HazardRec->isEnabled()) { + // Bypass HazardRec virtual calls. + CurrCycle = NextCycle; + } else { + // Bypass getHazardType calls in case of long latency. + for (; CurrCycle != NextCycle; ++CurrCycle) { + if (isTop()) + HazardRec->AdvanceCycle(); + else + HazardRec->RecedeCycle(); + } + } + CheckPending = true; + + DEBUG(dbgs() << "*** " << Available.getName() << " cycle " + << CurrCycle << '\n'); +} + +/// Move the boundary of scheduled code by one SUnit. +void ConvergingVLIWScheduler::SchedBoundary::bumpNode(SUnit *SU) { + bool startNewCycle = false; + + // Update the reservation table. + if (HazardRec->isEnabled()) { + if (!isTop() && SU->isCall) { + // Calls are scheduled with their preceding instructions. For bottom-up + // scheduling, clear the pipeline state before emitting. + HazardRec->Reset(); + } + HazardRec->EmitInstruction(SU); + } + + // Update DFA model. + startNewCycle = ResourceModel->reserveResources(SU); + + // Check the instruction group dispatch limit. + // TODO: Check if this SU must end a dispatch group. + IssueCount += SchedModel->getNumMicroOps(SU->getInstr()); + if (startNewCycle) { + DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n'); + bumpCycle(); + } + else + DEBUG(dbgs() << "*** IssueCount " << IssueCount + << " at cycle " << CurrCycle << '\n'); +} + +/// Release pending ready nodes in to the available queue. This makes them +/// visible to heuristics. +void ConvergingVLIWScheduler::SchedBoundary::releasePending() { + // If the available queue is empty, it is safe to reset MinReadyCycle. + if (Available.empty()) + MinReadyCycle = UINT_MAX; + + // Check to see if any of the pending instructions are ready to issue. If + // so, add them to the available queue. + for (unsigned i = 0, e = Pending.size(); i != e; ++i) { + SUnit *SU = *(Pending.begin()+i); + unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; + + if (ReadyCycle < MinReadyCycle) + MinReadyCycle = ReadyCycle; + + if (ReadyCycle > CurrCycle) + continue; + + if (checkHazard(SU)) + continue; + + Available.push(SU); + Pending.remove(Pending.begin()+i); + --i; --e; + } + CheckPending = false; +} + +/// Remove SU from the ready set for this boundary. +void ConvergingVLIWScheduler::SchedBoundary::removeReady(SUnit *SU) { + if (Available.isInQueue(SU)) + Available.remove(Available.find(SU)); + else { + assert(Pending.isInQueue(SU) && "bad ready count"); + Pending.remove(Pending.find(SU)); + } +} + +/// If this queue only has one ready candidate, return it. As a side effect, +/// advance the cycle until at least one node is ready. If multiple instructions +/// are ready, return NULL. +SUnit *ConvergingVLIWScheduler::SchedBoundary::pickOnlyChoice() { + if (CheckPending) + releasePending(); + + for (unsigned i = 0; Available.empty(); ++i) { + assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) && + "permanent hazard"); (void)i; + ResourceModel->reserveResources(0); + bumpCycle(); + releasePending(); + } + if (Available.size() == 1) + return *Available.begin(); + return NULL; +} + +#ifndef NDEBUG +void ConvergingVLIWScheduler::traceCandidate(const char *Label, + const ReadyQueue &Q, + SUnit *SU, PressureChange P) { + dbgs() << Label << " " << Q.getName() << " "; + if (P.isValid()) + dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":" + << P.getUnitInc() << " "; + else + dbgs() << " "; + SU->dump(DAG); +} +#endif + +/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor +/// of SU, return it, otherwise return null. +static SUnit *getSingleUnscheduledPred(SUnit *SU) { + SUnit *OnlyAvailablePred = 0; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + SUnit &Pred = *I->getSUnit(); + if (!Pred.isScheduled) { + // We found an available, but not scheduled, predecessor. If it's the + // only one we have found, keep track of it... otherwise give up. + if (OnlyAvailablePred && OnlyAvailablePred != &Pred) + return 0; + OnlyAvailablePred = &Pred; + } + } + return OnlyAvailablePred; +} + +/// getSingleUnscheduledSucc - If there is exactly one unscheduled successor +/// of SU, return it, otherwise return null. +static SUnit *getSingleUnscheduledSucc(SUnit *SU) { + SUnit *OnlyAvailableSucc = 0; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + SUnit &Succ = *I->getSUnit(); + if (!Succ.isScheduled) { + // We found an available, but not scheduled, successor. If it's the + // only one we have found, keep track of it... otherwise give up. + if (OnlyAvailableSucc && OnlyAvailableSucc != &Succ) + return 0; + OnlyAvailableSucc = &Succ; + } + } + return OnlyAvailableSucc; +} + +// Constants used to denote relative importance of +// heuristic components for cost computation. +static const unsigned PriorityOne = 200; +static const unsigned PriorityTwo = 50; +static const unsigned ScaleTwo = 10; +static const unsigned FactorOne = 2; + +/// Single point to compute overall scheduling cost. +/// TODO: More heuristics will be used soon. +int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, + SchedCandidate &Candidate, + RegPressureDelta &Delta, + bool verbose) { + // Initial trivial priority. + int ResCount = 1; + + // Do not waste time on a node that is already scheduled. + if (!SU || SU->isScheduled) + return ResCount; + + // Forced priority is high. + if (SU->isScheduleHigh) + ResCount += PriorityOne; + + // Critical path first. + if (Q.getID() == TopQID) { + ResCount += (SU->getHeight() * ScaleTwo); + + // If resources are available for it, multiply the + // chance of scheduling. + if (Top.ResourceModel->isResourceAvailable(SU)) + ResCount <<= FactorOne; + } else { + ResCount += (SU->getDepth() * ScaleTwo); + + // If resources are available for it, multiply the + // chance of scheduling. + if (Bot.ResourceModel->isResourceAvailable(SU)) + ResCount <<= FactorOne; + } + + unsigned NumNodesBlocking = 0; + if (Q.getID() == TopQID) { + // How many SUs does it block from scheduling? + // Look at all of the successors of this node. + // Count the number of nodes that + // this node is the sole unscheduled node for. + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) + if (getSingleUnscheduledPred(I->getSUnit()) == SU) + ++NumNodesBlocking; + } else { + // How many unscheduled predecessors block this node? + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) + if (getSingleUnscheduledSucc(I->getSUnit()) == SU) + ++NumNodesBlocking; + } + ResCount += (NumNodesBlocking * ScaleTwo); + + // Factor in reg pressure as a heuristic. + ResCount -= (Delta.Excess.getUnitInc()*PriorityTwo); + ResCount -= (Delta.CriticalMax.getUnitInc()*PriorityTwo); + + DEBUG(if (verbose) dbgs() << " Total(" << ResCount << ")"); + + return ResCount; +} + +/// Pick the best candidate from the top queue. +/// +/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during +/// DAG building. To adjust for the current scheduling location we need to +/// maintain the number of vreg uses remaining to be top-scheduled. +ConvergingVLIWScheduler::CandResult ConvergingVLIWScheduler:: +pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker, + SchedCandidate &Candidate) { + DEBUG(Q.dump()); + + // getMaxPressureDelta temporarily modifies the tracker. + RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); + + // BestSU remains NULL if no top candidates beat the best existing candidate. + CandResult FoundCandidate = NoCand; + for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { + RegPressureDelta RPDelta; + TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta, + DAG->getRegionCriticalPSets(), + DAG->getRegPressure().MaxSetPressure); + + int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false); + + // Initialize the candidate if needed. + if (!Candidate.SU) { + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + Candidate.SCost = CurrentCost; + FoundCandidate = NodeOrder; + continue; + } + + // Best cost. + if (CurrentCost > Candidate.SCost) { + DEBUG(traceCandidate("CCAND", Q, *I)); + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + Candidate.SCost = CurrentCost; + FoundCandidate = BestCost; + continue; + } + + // Fall through to original instruction order. + // Only consider node order if Candidate was chosen from this Q. + if (FoundCandidate == NoCand) + continue; + } + return FoundCandidate; +} + +/// Pick the best candidate node from either the top or bottom queue. +SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) { + // Schedule as far as possible in the direction of no choice. This is most + // efficient, but also provides the best heuristics for CriticalPSets. + if (SUnit *SU = Bot.pickOnlyChoice()) { + IsTopNode = false; + return SU; + } + if (SUnit *SU = Top.pickOnlyChoice()) { + IsTopNode = true; + return SU; + } + SchedCandidate BotCand; + // Prefer bottom scheduling when heuristics are silent. + CandResult BotResult = pickNodeFromQueue(Bot.Available, + DAG->getBotRPTracker(), BotCand); + assert(BotResult != NoCand && "failed to find the first candidate"); + + // If either Q has a single candidate that provides the least increase in + // Excess pressure, we can immediately schedule from that Q. + // + // RegionCriticalPSets summarizes the pressure within the scheduled region and + // affects picking from either Q. If scheduling in one direction must + // increase pressure for one of the excess PSets, then schedule in that + // direction first to provide more freedom in the other direction. + if (BotResult == SingleExcess || BotResult == SingleCritical) { + IsTopNode = false; + return BotCand.SU; + } + // Check if the top Q has a better candidate. + SchedCandidate TopCand; + CandResult TopResult = pickNodeFromQueue(Top.Available, + DAG->getTopRPTracker(), TopCand); + assert(TopResult != NoCand && "failed to find the first candidate"); + + if (TopResult == SingleExcess || TopResult == SingleCritical) { + IsTopNode = true; + return TopCand.SU; + } + // If either Q has a single candidate that minimizes pressure above the + // original region's pressure pick it. + if (BotResult == SingleMax) { + IsTopNode = false; + return BotCand.SU; + } + if (TopResult == SingleMax) { + IsTopNode = true; + return TopCand.SU; + } + if (TopCand.SCost > BotCand.SCost) { + IsTopNode = true; + return TopCand.SU; + } + // Otherwise prefer the bottom candidate in node order. + IsTopNode = false; + return BotCand.SU; +} + +/// Pick the best node to balance the schedule. Implements MachineSchedStrategy. +SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) { + if (DAG->top() == DAG->bottom()) { + assert(Top.Available.empty() && Top.Pending.empty() && + Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); + return NULL; + } + SUnit *SU; + if (llvm::ForceTopDown) { + SU = Top.pickOnlyChoice(); + if (!SU) { + SchedCandidate TopCand; + CandResult TopResult = + pickNodeFromQueue(Top.Available, DAG->getTopRPTracker(), TopCand); + assert(TopResult != NoCand && "failed to find the first candidate"); + (void)TopResult; + SU = TopCand.SU; + } + IsTopNode = true; + } else if (llvm::ForceBottomUp) { + SU = Bot.pickOnlyChoice(); + if (!SU) { + SchedCandidate BotCand; + CandResult BotResult = + pickNodeFromQueue(Bot.Available, DAG->getBotRPTracker(), BotCand); + assert(BotResult != NoCand && "failed to find the first candidate"); + (void)BotResult; + SU = BotCand.SU; + } + IsTopNode = false; + } else { + SU = pickNodeBidrectional(IsTopNode); + } + if (SU->isTopReady()) + Top.removeReady(SU); + if (SU->isBottomReady()) + Bot.removeReady(SU); + + DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom") + << " Scheduling Instruction in cycle " + << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n'; + SU->dump(DAG)); + return SU; +} + +/// Update the scheduler's state after scheduling a node. This is the same node +/// that was just returned by pickNode(). However, VLIWMachineScheduler needs +/// to update it's state based on the current cycle before MachineSchedStrategy +/// does. +void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) { + if (IsTopNode) { + SU->TopReadyCycle = Top.CurrCycle; + Top.bumpNode(SU); + } else { + SU->BotReadyCycle = Bot.CurrCycle; + Bot.bumpNode(SU); + } +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h new file mode 100644 index 000000000000..8ac333fa7db3 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h @@ -0,0 +1,243 @@ +//===-- HexagonMachineScheduler.h - Custom Hexagon MI scheduler. ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Custom Hexagon MI scheduler. +// +//===----------------------------------------------------------------------===// + +#ifndef HEXAGONASMPRINTER_H +#define HEXAGONASMPRINTER_H + +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/PriorityQueue.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineScheduler.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/RegisterPressure.h" +#include "llvm/CodeGen/ResourcePriorityQueue.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" + +using namespace llvm; + +namespace llvm { +//===----------------------------------------------------------------------===// +// ConvergingVLIWScheduler - Implementation of the standard +// MachineSchedStrategy. +//===----------------------------------------------------------------------===// + +class VLIWResourceModel { + /// ResourcesModel - Represents VLIW state. + /// Not limited to VLIW targets per say, but assumes + /// definition of DFA by a target. + DFAPacketizer *ResourcesModel; + + const TargetSchedModel *SchedModel; + + /// Local packet/bundle model. Purely + /// internal to the MI schedulre at the time. + std::vector<SUnit*> Packet; + + /// Total packets created. + unsigned TotalPackets; + +public: +VLIWResourceModel(const TargetMachine &TM, const TargetSchedModel *SM) : + SchedModel(SM), TotalPackets(0) { + ResourcesModel = TM.getInstrInfo()->CreateTargetScheduleState(&TM,NULL); + + // This hard requirement could be relaxed, + // but for now do not let it proceed. + assert(ResourcesModel && "Unimplemented CreateTargetScheduleState."); + + Packet.resize(SchedModel->getIssueWidth()); + Packet.clear(); + ResourcesModel->clearResources(); + } + + ~VLIWResourceModel() { + delete ResourcesModel; + } + + void resetPacketState() { + Packet.clear(); + } + + void resetDFA() { + ResourcesModel->clearResources(); + } + + void reset() { + Packet.clear(); + ResourcesModel->clearResources(); + } + + bool isResourceAvailable(SUnit *SU); + bool reserveResources(SUnit *SU); + unsigned getTotalPackets() const { return TotalPackets; } +}; + +/// Extend the standard ScheduleDAGMI to provide more context and override the +/// top-level schedule() driver. +class VLIWMachineScheduler : public ScheduleDAGMI { +public: + VLIWMachineScheduler(MachineSchedContext *C, MachineSchedStrategy *S): + ScheduleDAGMI(C, S) {} + + /// Schedule - This is called back from ScheduleDAGInstrs::Run() when it's + /// time to do some work. + virtual void schedule(); + /// Perform platform specific DAG postprocessing. + void postprocessDAG(); +}; + +/// ConvergingVLIWScheduler shrinks the unscheduled zone using heuristics +/// to balance the schedule. +class ConvergingVLIWScheduler : public MachineSchedStrategy { + + /// Store the state used by ConvergingVLIWScheduler heuristics, required + /// for the lifetime of one invocation of pickNode(). + struct SchedCandidate { + // The best SUnit candidate. + SUnit *SU; + + // Register pressure values for the best candidate. + RegPressureDelta RPDelta; + + // Best scheduling cost. + int SCost; + + SchedCandidate(): SU(NULL), SCost(0) {} + }; + /// Represent the type of SchedCandidate found within a single queue. + enum CandResult { + NoCand, NodeOrder, SingleExcess, SingleCritical, SingleMax, MultiPressure, + BestCost}; + + /// Each Scheduling boundary is associated with ready queues. It tracks the + /// current cycle in whichever direction at has moved, and maintains the state + /// of "hazards" and other interlocks at the current cycle. + struct SchedBoundary { + VLIWMachineScheduler *DAG; + const TargetSchedModel *SchedModel; + + ReadyQueue Available; + ReadyQueue Pending; + bool CheckPending; + + ScheduleHazardRecognizer *HazardRec; + VLIWResourceModel *ResourceModel; + + unsigned CurrCycle; + unsigned IssueCount; + + /// MinReadyCycle - Cycle of the soonest available instruction. + unsigned MinReadyCycle; + + // Remember the greatest min operand latency. + unsigned MaxMinLatency; + + /// Pending queues extend the ready queues with the same ID and the + /// PendingFlag set. + SchedBoundary(unsigned ID, const Twine &Name): + DAG(0), SchedModel(0), Available(ID, Name+".A"), + Pending(ID << ConvergingVLIWScheduler::LogMaxQID, Name+".P"), + CheckPending(false), HazardRec(0), ResourceModel(0), + CurrCycle(0), IssueCount(0), + MinReadyCycle(UINT_MAX), MaxMinLatency(0) {} + + ~SchedBoundary() { + delete ResourceModel; + delete HazardRec; + } + + void init(VLIWMachineScheduler *dag, const TargetSchedModel *smodel) { + DAG = dag; + SchedModel = smodel; + } + + bool isTop() const { + return Available.getID() == ConvergingVLIWScheduler::TopQID; + } + + bool checkHazard(SUnit *SU); + + void releaseNode(SUnit *SU, unsigned ReadyCycle); + + void bumpCycle(); + + void bumpNode(SUnit *SU); + + void releasePending(); + + void removeReady(SUnit *SU); + + SUnit *pickOnlyChoice(); + }; + + VLIWMachineScheduler *DAG; + const TargetSchedModel *SchedModel; + + // State of the top and bottom scheduled instruction boundaries. + SchedBoundary Top; + SchedBoundary Bot; + +public: + /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) + enum { + TopQID = 1, + BotQID = 2, + LogMaxQID = 2 + }; + + ConvergingVLIWScheduler(): + DAG(0), SchedModel(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {} + + virtual void initialize(ScheduleDAGMI *dag); + + virtual SUnit *pickNode(bool &IsTopNode); + + virtual void schedNode(SUnit *SU, bool IsTopNode); + + virtual void releaseTopNode(SUnit *SU); + + virtual void releaseBottomNode(SUnit *SU); + + unsigned ReportPackets() { + return Top.ResourceModel->getTotalPackets() + + Bot.ResourceModel->getTotalPackets(); + } + +protected: + SUnit *pickNodeBidrectional(bool &IsTopNode); + + int SchedulingCost(ReadyQueue &Q, + SUnit *SU, SchedCandidate &Candidate, + RegPressureDelta &Delta, bool verbose); + + CandResult pickNodeFromQueue(ReadyQueue &Q, + const RegPressureTracker &RPTracker, + SchedCandidate &Candidate); +#ifndef NDEBUG + void traceCandidate(const char *Label, const ReadyQueue &Q, SUnit *SU, + PressureChange P = PressureChange()); +#endif +}; + +} // namespace + + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp new file mode 100644 index 000000000000..f7c45132138e --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp @@ -0,0 +1,657 @@ +//===----- HexagonNewValueJump.cpp - Hexagon Backend New Value Jump -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements NewValueJump pass in Hexagon. +// Ideally, we should merge this as a Peephole pass prior to register +// allocation, but because we have a spill in between the feeder and new value +// jump instructions, we are forced to write after register allocation. +// Having said that, we should re-attempt to pull this earlier at some point +// in future. + +// The basic approach looks for sequence of predicated jump, compare instruciton +// that genereates the predicate and, the feeder to the predicate. Once it finds +// all, it collapses compare and jump instruction into a new valu jump +// intstructions. +// +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "hexagon-nvj" +#include "llvm/PassSupport.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "Hexagon.h" +#include "HexagonTargetMachine.h" +#include "HexagonRegisterInfo.h" +#include "HexagonSubtarget.h" +#include "HexagonInstrInfo.h" +#include "HexagonMachineFunctionInfo.h" + +#include <map> + +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +STATISTIC(NumNVJGenerated, "Number of New Value Jump Instructions created"); + +static cl::opt<int> +DbgNVJCount("nvj-count", cl::init(-1), cl::Hidden, cl::desc( + "Maximum number of predicated jumps to be converted to New Value Jump")); + +static cl::opt<bool> DisableNewValueJumps("disable-nvjump", cl::Hidden, + cl::ZeroOrMore, cl::init(false), + cl::desc("Disable New Value Jumps")); + +namespace llvm { + void initializeHexagonNewValueJumpPass(PassRegistry&); +} + + +namespace { + struct HexagonNewValueJump : public MachineFunctionPass { + const HexagonInstrInfo *QII; + const HexagonRegisterInfo *QRI; + + public: + static char ID; + + HexagonNewValueJump() : MachineFunctionPass(ID) { + initializeHexagonNewValueJumpPass(*PassRegistry::getPassRegistry()); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineBranchProbabilityInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + const char *getPassName() const { + return "Hexagon NewValueJump"; + } + + virtual bool runOnMachineFunction(MachineFunction &Fn); + + private: + /// \brief A handle to the branch probability pass. + const MachineBranchProbabilityInfo *MBPI; + + }; + +} // end of anonymous namespace + +char HexagonNewValueJump::ID = 0; + +INITIALIZE_PASS_BEGIN(HexagonNewValueJump, "hexagon-nvj", + "Hexagon NewValueJump", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_END(HexagonNewValueJump, "hexagon-nvj", + "Hexagon NewValueJump", false, false) + + +// We have identified this II could be feeder to NVJ, +// verify that it can be. +static bool canBeFeederToNewValueJump(const HexagonInstrInfo *QII, + const TargetRegisterInfo *TRI, + MachineBasicBlock::iterator II, + MachineBasicBlock::iterator end, + MachineBasicBlock::iterator skip, + MachineFunction &MF) { + + // Predicated instruction can not be feeder to NVJ. + if (QII->isPredicated(II)) + return false; + + // Bail out if feederReg is a paired register (double regs in + // our case). One would think that we can check to see if a given + // register cmpReg1 or cmpReg2 is a sub register of feederReg + // using -- if (QRI->isSubRegister(feederReg, cmpReg1) logic + // before the callsite of this function + // But we can not as it comes in the following fashion. + // %D0<def> = Hexagon_S2_lsr_r_p %D0<kill>, %R2<kill> + // %R0<def> = KILL %R0, %D0<imp-use,kill> + // %P0<def> = CMPEQri %R0<kill>, 0 + // Hence, we need to check if it's a KILL instruction. + if (II->getOpcode() == TargetOpcode::KILL) + return false; + + + // Make sure there there is no 'def' or 'use' of any of the uses of + // feeder insn between it's definition, this MI and jump, jmpInst + // skipping compare, cmpInst. + // Here's the example. + // r21=memub(r22+r24<<#0) + // p0 = cmp.eq(r21, #0) + // r4=memub(r3+r21<<#0) + // if (p0.new) jump:t .LBB29_45 + // Without this check, it will be converted into + // r4=memub(r3+r21<<#0) + // r21=memub(r22+r24<<#0) + // p0 = cmp.eq(r21, #0) + // if (p0.new) jump:t .LBB29_45 + // and result WAR hazards if converted to New Value Jump. + + for (unsigned i = 0; i < II->getNumOperands(); ++i) { + if (II->getOperand(i).isReg() && + (II->getOperand(i).isUse() || II->getOperand(i).isDef())) { + MachineBasicBlock::iterator localII = II; + ++localII; + unsigned Reg = II->getOperand(i).getReg(); + for (MachineBasicBlock::iterator localBegin = localII; + localBegin != end; ++localBegin) { + if (localBegin == skip ) continue; + // Check for Subregisters too. + if (localBegin->modifiesRegister(Reg, TRI) || + localBegin->readsRegister(Reg, TRI)) + return false; + } + } + } + return true; +} + +// These are the common checks that need to performed +// to determine if +// 1. compare instruction can be moved before jump. +// 2. feeder to the compare instruction can be moved before jump. +static bool commonChecksToProhibitNewValueJump(bool afterRA, + MachineBasicBlock::iterator MII) { + + // If store in path, bail out. + if (MII->getDesc().mayStore()) + return false; + + // if call in path, bail out. + if (MII->getOpcode() == Hexagon::CALLv3) + return false; + + // if NVJ is running prior to RA, do the following checks. + if (!afterRA) { + // The following Target Opcode instructions are spurious + // to new value jump. If they are in the path, bail out. + // KILL sets kill flag on the opcode. It also sets up a + // single register, out of pair. + // %D0<def> = Hexagon_S2_lsr_r_p %D0<kill>, %R2<kill> + // %R0<def> = KILL %R0, %D0<imp-use,kill> + // %P0<def> = CMPEQri %R0<kill>, 0 + // PHI can be anything after RA. + // COPY can remateriaze things in between feeder, compare and nvj. + if (MII->getOpcode() == TargetOpcode::KILL || + MII->getOpcode() == TargetOpcode::PHI || + MII->getOpcode() == TargetOpcode::COPY) + return false; + + // The following pseudo Hexagon instructions sets "use" and "def" + // of registers by individual passes in the backend. At this time, + // we don't know the scope of usage and definitions of these + // instructions. + if (MII->getOpcode() == Hexagon::TFR_condset_rr || + MII->getOpcode() == Hexagon::TFR_condset_ii || + MII->getOpcode() == Hexagon::TFR_condset_ri || + MII->getOpcode() == Hexagon::TFR_condset_ir || + MII->getOpcode() == Hexagon::LDriw_pred || + MII->getOpcode() == Hexagon::STriw_pred) + return false; + } + + return true; +} + +static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII, + const TargetRegisterInfo *TRI, + MachineBasicBlock::iterator II, + unsigned pReg, + bool secondReg, + bool optLocation, + MachineBasicBlock::iterator end, + MachineFunction &MF) { + + MachineInstr *MI = II; + + // If the second operand of the compare is an imm, make sure it's in the + // range specified by the arch. + if (!secondReg) { + int64_t v = MI->getOperand(2).getImm(); + + if (!(isUInt<5>(v) || + ((MI->getOpcode() == Hexagon::CMPEQri || + MI->getOpcode() == Hexagon::CMPGTri) && + (v == -1)))) + return false; + } + + unsigned cmpReg1, cmpOp2 = 0; // cmpOp2 assignment silences compiler warning. + cmpReg1 = MI->getOperand(1).getReg(); + + if (secondReg) { + cmpOp2 = MI->getOperand(2).getReg(); + + // Make sure that that second register is not from COPY + // At machine code level, we don't need this, but if we decide + // to move new value jump prior to RA, we would be needing this. + MachineRegisterInfo &MRI = MF.getRegInfo(); + if (secondReg && !TargetRegisterInfo::isPhysicalRegister(cmpOp2)) { + MachineInstr *def = MRI.getVRegDef(cmpOp2); + if (def->getOpcode() == TargetOpcode::COPY) + return false; + } + } + + // Walk the instructions after the compare (predicate def) to the jump, + // and satisfy the following conditions. + ++II ; + for (MachineBasicBlock::iterator localII = II; localII != end; + ++localII) { + + // Check 1. + // If "common" checks fail, bail out. + if (!commonChecksToProhibitNewValueJump(optLocation, localII)) + return false; + + // Check 2. + // If there is a def or use of predicate (result of compare), bail out. + if (localII->modifiesRegister(pReg, TRI) || + localII->readsRegister(pReg, TRI)) + return false; + + // Check 3. + // If there is a def of any of the use of the compare (operands of compare), + // bail out. + // Eg. + // p0 = cmp.eq(r2, r0) + // r2 = r4 + // if (p0.new) jump:t .LBB28_3 + if (localII->modifiesRegister(cmpReg1, TRI) || + (secondReg && localII->modifiesRegister(cmpOp2, TRI))) + return false; + } + return true; +} + +// Given a compare operator, return a matching New Value Jump +// compare operator. Make sure that MI here is included in +// HexagonInstrInfo.cpp::isNewValueJumpCandidate +static unsigned getNewValueJumpOpcode(MachineInstr *MI, int reg, + bool secondRegNewified, + MachineBasicBlock *jmpTarget, + const MachineBranchProbabilityInfo + *MBPI) { + bool taken = false; + MachineBasicBlock *Src = MI->getParent(); + const BranchProbability Prediction = + MBPI->getEdgeProbability(Src, jmpTarget); + + if (Prediction >= BranchProbability(1,2)) + taken = true; + + switch (MI->getOpcode()) { + case Hexagon::CMPEQrr: + return taken ? Hexagon::CMPEQrr_t_Jumpnv_t_V4 + : Hexagon::CMPEQrr_t_Jumpnv_nt_V4; + + case Hexagon::CMPEQri: { + if (reg >= 0) + return taken ? Hexagon::CMPEQri_t_Jumpnv_t_V4 + : Hexagon::CMPEQri_t_Jumpnv_nt_V4; + else + return taken ? Hexagon::CMPEQn1_t_Jumpnv_t_V4 + : Hexagon::CMPEQn1_t_Jumpnv_nt_V4; + } + + case Hexagon::CMPGTrr: { + if (secondRegNewified) + return taken ? Hexagon::CMPLTrr_t_Jumpnv_t_V4 + : Hexagon::CMPLTrr_t_Jumpnv_nt_V4; + else + return taken ? Hexagon::CMPGTrr_t_Jumpnv_t_V4 + : Hexagon::CMPGTrr_t_Jumpnv_nt_V4; + } + + case Hexagon::CMPGTri: { + if (reg >= 0) + return taken ? Hexagon::CMPGTri_t_Jumpnv_t_V4 + : Hexagon::CMPGTri_t_Jumpnv_nt_V4; + else + return taken ? Hexagon::CMPGTn1_t_Jumpnv_t_V4 + : Hexagon::CMPGTn1_t_Jumpnv_nt_V4; + } + + case Hexagon::CMPGTUrr: { + if (secondRegNewified) + return taken ? Hexagon::CMPLTUrr_t_Jumpnv_t_V4 + : Hexagon::CMPLTUrr_t_Jumpnv_nt_V4; + else + return taken ? Hexagon::CMPGTUrr_t_Jumpnv_t_V4 + : Hexagon::CMPGTUrr_t_Jumpnv_nt_V4; + } + + case Hexagon::CMPGTUri: + return taken ? Hexagon::CMPGTUri_t_Jumpnv_t_V4 + : Hexagon::CMPGTUri_t_Jumpnv_nt_V4; + + default: + llvm_unreachable("Could not find matching New Value Jump instruction."); + } + // return *some value* to avoid compiler warning + return 0; +} + +bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { + + DEBUG(dbgs() << "********** Hexagon New Value Jump **********\n" + << "********** Function: " + << MF.getName() << "\n"); + +#if 0 + // for now disable this, if we move NewValueJump before register + // allocation we need this information. + LiveVariables &LVs = getAnalysis<LiveVariables>(); +#endif + + QII = static_cast<const HexagonInstrInfo *>(MF.getTarget().getInstrInfo()); + QRI = + static_cast<const HexagonRegisterInfo *>(MF.getTarget().getRegisterInfo()); + MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); + + if (!QRI->Subtarget.hasV4TOps() || + DisableNewValueJumps) { + return false; + } + + int nvjCount = DbgNVJCount; + int nvjGenerated = 0; + + // Loop through all the bb's of the function + for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end(); + MBBb != MBBe; ++MBBb) { + MachineBasicBlock* MBB = MBBb; + + DEBUG(dbgs() << "** dumping bb ** " + << MBB->getNumber() << "\n"); + DEBUG(MBB->dump()); + DEBUG(dbgs() << "\n" << "********** dumping instr bottom up **********\n"); + bool foundJump = false; + bool foundCompare = false; + bool invertPredicate = false; + unsigned predReg = 0; // predicate reg of the jump. + unsigned cmpReg1 = 0; + int cmpOp2 = 0; + bool MO1IsKill = false; + bool MO2IsKill = false; + MachineBasicBlock::iterator jmpPos; + MachineBasicBlock::iterator cmpPos; + MachineInstr *cmpInstr = NULL, *jmpInstr = NULL; + MachineBasicBlock *jmpTarget = NULL; + bool afterRA = false; + bool isSecondOpReg = false; + bool isSecondOpNewified = false; + // Traverse the basic block - bottom up + for (MachineBasicBlock::iterator MII = MBB->end(), E = MBB->begin(); + MII != E;) { + MachineInstr *MI = --MII; + if (MI->isDebugValue()) { + continue; + } + + if ((nvjCount == 0) || (nvjCount > -1 && nvjCount <= nvjGenerated)) + break; + + DEBUG(dbgs() << "Instr: "; MI->dump(); dbgs() << "\n"); + + if (!foundJump && + (MI->getOpcode() == Hexagon::JMP_t || + MI->getOpcode() == Hexagon::JMP_f || + MI->getOpcode() == Hexagon::JMP_tnew_t || + MI->getOpcode() == Hexagon::JMP_tnew_nt || + MI->getOpcode() == Hexagon::JMP_fnew_t || + MI->getOpcode() == Hexagon::JMP_fnew_nt)) { + // This is where you would insert your compare and + // instr that feeds compare + jmpPos = MII; + jmpInstr = MI; + predReg = MI->getOperand(0).getReg(); + afterRA = TargetRegisterInfo::isPhysicalRegister(predReg); + + // If ifconverter had not messed up with the kill flags of the + // operands, the following check on the kill flag would suffice. + // if(!jmpInstr->getOperand(0).isKill()) break; + + // This predicate register is live out out of BB + // this would only work if we can actually use Live + // variable analysis on phy regs - but LLVM does not + // provide LV analysis on phys regs. + //if(LVs.isLiveOut(predReg, *MBB)) break; + + // Get all the successors of this block - which will always + // be 2. Check if the predicate register is live in in those + // successor. If yes, we can not delete the predicate - + // I am doing this only because LLVM does not provide LiveOut + // at the BB level. + bool predLive = false; + for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(), + SIE = MBB->succ_end(); SI != SIE; ++SI) { + MachineBasicBlock* succMBB = *SI; + if (succMBB->isLiveIn(predReg)) { + predLive = true; + } + } + if (predLive) + break; + + jmpTarget = MI->getOperand(1).getMBB(); + foundJump = true; + if (MI->getOpcode() == Hexagon::JMP_f || + MI->getOpcode() == Hexagon::JMP_fnew_t || + MI->getOpcode() == Hexagon::JMP_fnew_nt) { + invertPredicate = true; + } + continue; + } + + // No new value jump if there is a barrier. A barrier has to be in its + // own packet. A barrier has zero operands. We conservatively bail out + // here if we see any instruction with zero operands. + if (foundJump && MI->getNumOperands() == 0) + break; + + if (foundJump && + !foundCompare && + MI->getOperand(0).isReg() && + MI->getOperand(0).getReg() == predReg) { + + // Not all compares can be new value compare. Arch Spec: 7.6.1.1 + if (QII->isNewValueJumpCandidate(MI)) { + + assert((MI->getDesc().isCompare()) && + "Only compare instruction can be collapsed into New Value Jump"); + isSecondOpReg = MI->getOperand(2).isReg(); + + if (!canCompareBeNewValueJump(QII, QRI, MII, predReg, isSecondOpReg, + afterRA, jmpPos, MF)) + break; + + cmpInstr = MI; + cmpPos = MII; + foundCompare = true; + + // We need cmpReg1 and cmpOp2(imm or reg) while building + // new value jump instruction. + cmpReg1 = MI->getOperand(1).getReg(); + if (MI->getOperand(1).isKill()) + MO1IsKill = true; + + if (isSecondOpReg) { + cmpOp2 = MI->getOperand(2).getReg(); + if (MI->getOperand(2).isKill()) + MO2IsKill = true; + } else + cmpOp2 = MI->getOperand(2).getImm(); + continue; + } + } + + if (foundCompare && foundJump) { + + // If "common" checks fail, bail out on this BB. + if (!commonChecksToProhibitNewValueJump(afterRA, MII)) + break; + + bool foundFeeder = false; + MachineBasicBlock::iterator feederPos = MII; + if (MI->getOperand(0).isReg() && + MI->getOperand(0).isDef() && + (MI->getOperand(0).getReg() == cmpReg1 || + (isSecondOpReg && + MI->getOperand(0).getReg() == (unsigned) cmpOp2))) { + + unsigned feederReg = MI->getOperand(0).getReg(); + + // First try to see if we can get the feeder from the first operand + // of the compare. If we can not, and if secondOpReg is true + // (second operand of the compare is also register), try that one. + // TODO: Try to come up with some heuristic to figure out which + // feeder would benefit. + + if (feederReg == cmpReg1) { + if (!canBeFeederToNewValueJump(QII, QRI, MII, jmpPos, cmpPos, MF)) { + if (!isSecondOpReg) + break; + else + continue; + } else + foundFeeder = true; + } + + if (!foundFeeder && + isSecondOpReg && + feederReg == (unsigned) cmpOp2) + if (!canBeFeederToNewValueJump(QII, QRI, MII, jmpPos, cmpPos, MF)) + break; + + if (isSecondOpReg) { + // In case of CMPLT, or CMPLTU, or EQ with the second register + // to newify, swap the operands. + if (cmpInstr->getOpcode() == Hexagon::CMPEQrr && + feederReg == (unsigned) cmpOp2) { + unsigned tmp = cmpReg1; + bool tmpIsKill = MO1IsKill; + cmpReg1 = cmpOp2; + MO1IsKill = MO2IsKill; + cmpOp2 = tmp; + MO2IsKill = tmpIsKill; + } + + // Now we have swapped the operands, all we need to check is, + // if the second operand (after swap) is the feeder. + // And if it is, make a note. + if (feederReg == (unsigned)cmpOp2) + isSecondOpNewified = true; + } + + // Now that we are moving feeder close the jump, + // make sure we are respecting the kill values of + // the operands of the feeder. + + bool updatedIsKill = false; + for (unsigned i = 0; i < MI->getNumOperands(); i++) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isUse()) { + unsigned feederReg = MO.getReg(); + for (MachineBasicBlock::iterator localII = feederPos, + end = jmpPos; localII != end; localII++) { + MachineInstr *localMI = localII; + for (unsigned j = 0; j < localMI->getNumOperands(); j++) { + MachineOperand &localMO = localMI->getOperand(j); + if (localMO.isReg() && localMO.isUse() && + localMO.isKill() && feederReg == localMO.getReg()) { + // We found that there is kill of a use register + // Set up a kill flag on the register + localMO.setIsKill(false); + MO.setIsKill(); + updatedIsKill = true; + break; + } + } + if (updatedIsKill) break; + } + } + if (updatedIsKill) break; + } + + MBB->splice(jmpPos, MI->getParent(), MI); + MBB->splice(jmpPos, MI->getParent(), cmpInstr); + DebugLoc dl = MI->getDebugLoc(); + MachineInstr *NewMI; + + assert((QII->isNewValueJumpCandidate(cmpInstr)) && + "This compare is not a New Value Jump candidate."); + unsigned opc = getNewValueJumpOpcode(cmpInstr, cmpOp2, + isSecondOpNewified, + jmpTarget, MBPI); + if (invertPredicate) + opc = QII->getInvertedPredicatedOpcode(opc); + + if (isSecondOpReg) + NewMI = BuildMI(*MBB, jmpPos, dl, + QII->get(opc)) + .addReg(cmpReg1, getKillRegState(MO1IsKill)) + .addReg(cmpOp2, getKillRegState(MO2IsKill)) + .addMBB(jmpTarget); + + else if ((cmpInstr->getOpcode() == Hexagon::CMPEQri || + cmpInstr->getOpcode() == Hexagon::CMPGTri) && + cmpOp2 == -1 ) + // Corresponding new-value compare jump instructions don't have the + // operand for -1 immediate value. + NewMI = BuildMI(*MBB, jmpPos, dl, + QII->get(opc)) + .addReg(cmpReg1, getKillRegState(MO1IsKill)) + .addMBB(jmpTarget); + + else + NewMI = BuildMI(*MBB, jmpPos, dl, + QII->get(opc)) + .addReg(cmpReg1, getKillRegState(MO1IsKill)) + .addImm(cmpOp2) + .addMBB(jmpTarget); + + assert(NewMI && "New Value Jump Instruction Not created!"); + (void)NewMI; + if (cmpInstr->getOperand(0).isReg() && + cmpInstr->getOperand(0).isKill()) + cmpInstr->getOperand(0).setIsKill(false); + if (cmpInstr->getOperand(1).isReg() && + cmpInstr->getOperand(1).isKill()) + cmpInstr->getOperand(1).setIsKill(false); + cmpInstr->eraseFromParent(); + jmpInstr->eraseFromParent(); + ++nvjGenerated; + ++NumNVJGenerated; + break; + } + } + } + } + + return true; + +} + +FunctionPass *llvm::createHexagonNewValueJump() { + return new HexagonNewValueJump(); +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td b/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td new file mode 100644 index 000000000000..c79d78f21080 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td @@ -0,0 +1,858 @@ +//===- HexagonOperands.td - Hexagon immediate processing -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illnois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// Immediate operands. + +let PrintMethod = "printImmOperand" in { + // f32Ext type is used to identify constant extended floating point immediates. + def f32Ext : Operand<f32>; + def s32Imm : Operand<i32>; + def s26_6Imm : Operand<i32>; + def s16Imm : Operand<i32>; + def s12Imm : Operand<i32>; + def s11Imm : Operand<i32>; + def s11_0Imm : Operand<i32>; + def s11_1Imm : Operand<i32>; + def s11_2Imm : Operand<i32>; + def s11_3Imm : Operand<i32>; + def s10Imm : Operand<i32>; + def s9Imm : Operand<i32>; + def m9Imm : Operand<i32>; + def s8Imm : Operand<i32>; + def s8Imm64 : Operand<i64>; + def s6Imm : Operand<i32>; + def s4Imm : Operand<i32>; + def s4_0Imm : Operand<i32>; + def s4_1Imm : Operand<i32>; + def s4_2Imm : Operand<i32>; + def s4_3Imm : Operand<i32>; + def u64Imm : Operand<i64>; + def u32Imm : Operand<i32>; + def u26_6Imm : Operand<i32>; + def u16Imm : Operand<i32>; + def u16_0Imm : Operand<i32>; + def u16_1Imm : Operand<i32>; + def u16_2Imm : Operand<i32>; + def u11_3Imm : Operand<i32>; + def u10Imm : Operand<i32>; + def u9Imm : Operand<i32>; + def u8Imm : Operand<i32>; + def u7Imm : Operand<i32>; + def u6Imm : Operand<i32>; + def u6_0Imm : Operand<i32>; + def u6_1Imm : Operand<i32>; + def u6_2Imm : Operand<i32>; + def u6_3Imm : Operand<i32>; + def u5Imm : Operand<i32>; + def u4Imm : Operand<i32>; + def u3Imm : Operand<i32>; + def u2Imm : Operand<i32>; + def u1Imm : Operand<i32>; + def n8Imm : Operand<i32>; + def m6Imm : Operand<i32>; +} + +let PrintMethod = "printNOneImmOperand" in +def nOneImm : Operand<i32>; + +// +// Immediate predicates +// +def s32ImmPred : PatLeaf<(i32 imm), [{ + // s32ImmPred predicate - True if the immediate fits in a 32-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isInt<32>(v); +}]>; + +def s32_24ImmPred : PatLeaf<(i32 imm), [{ + // s32_24ImmPred predicate - True if the immediate fits in a 32-bit sign + // extended field that is a multiple of 0x1000000. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<32,24>(v); +}]>; + +def s32_16s8ImmPred : PatLeaf<(i32 imm), [{ + // s32_16s8ImmPred predicate - True if the immediate fits in a 32-bit sign + // extended field that is a multiple of 0x10000. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<24,16>(v); +}]>; + +def s26_6ImmPred : PatLeaf<(i32 imm), [{ + // s26_6ImmPred predicate - True if the immediate fits in a 32-bit + // sign extended field. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<26,6>(v); +}]>; + + +def s16ImmPred : PatLeaf<(i32 imm), [{ + // s16ImmPred predicate - True if the immediate fits in a 16-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isInt<16>(v); +}]>; + + +def s13ImmPred : PatLeaf<(i32 imm), [{ + // s13ImmPred predicate - True if the immediate fits in a 13-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isInt<13>(v); +}]>; + + +def s12ImmPred : PatLeaf<(i32 imm), [{ + // s12ImmPred predicate - True if the immediate fits in a 12-bit + // sign extended field. + int64_t v = (int64_t)N->getSExtValue(); + return isInt<12>(v); +}]>; + +def s11_0ImmPred : PatLeaf<(i32 imm), [{ + // s11_0ImmPred predicate - True if the immediate fits in a 11-bit + // sign extended field. + int64_t v = (int64_t)N->getSExtValue(); + return isInt<11>(v); +}]>; + + +def s11_1ImmPred : PatLeaf<(i32 imm), [{ + // s11_1ImmPred predicate - True if the immediate fits in a 12-bit + // sign extended field and is a multiple of 2. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<11,1>(v); +}]>; + + +def s11_2ImmPred : PatLeaf<(i32 imm), [{ + // s11_2ImmPred predicate - True if the immediate fits in a 13-bit + // sign extended field and is a multiple of 4. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<11,2>(v); +}]>; + + +def s11_3ImmPred : PatLeaf<(i32 imm), [{ + // s11_3ImmPred predicate - True if the immediate fits in a 14-bit + // sign extended field and is a multiple of 8. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<11,3>(v); +}]>; + + +def s10ImmPred : PatLeaf<(i32 imm), [{ + // s10ImmPred predicate - True if the immediate fits in a 10-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isInt<10>(v); +}]>; + + +def s9ImmPred : PatLeaf<(i32 imm), [{ + // s9ImmPred predicate - True if the immediate fits in a 9-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isInt<9>(v); +}]>; + +def m9ImmPred : PatLeaf<(i32 imm), [{ + // m9ImmPred predicate - True if the immediate fits in a 9-bit magnitude + // field. The range of m9 is -255 to 255. + int64_t v = (int64_t)N->getSExtValue(); + return isInt<9>(v) && (v != -256); +}]>; + +def s8ImmPred : PatLeaf<(i32 imm), [{ + // s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isInt<8>(v); +}]>; + + +def s8Imm64Pred : PatLeaf<(i64 imm), [{ + // s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isInt<8>(v); +}]>; + + +def s6ImmPred : PatLeaf<(i32 imm), [{ + // s6ImmPred predicate - True if the immediate fits in a 6-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isInt<6>(v); +}]>; + + +def s4_0ImmPred : PatLeaf<(i32 imm), [{ + // s4_0ImmPred predicate - True if the immediate fits in a 4-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isInt<4>(v); +}]>; + + +def s4_1ImmPred : PatLeaf<(i32 imm), [{ + // s4_1ImmPred predicate - True if the immediate fits in a 4-bit sign extended + // field of 2. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<4,1>(v); +}]>; + + +def s4_2ImmPred : PatLeaf<(i32 imm), [{ + // s4_2ImmPred predicate - True if the immediate fits in a 4-bit sign extended + // field that is a multiple of 4. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<4,2>(v); +}]>; + + +def s4_3ImmPred : PatLeaf<(i32 imm), [{ + // s4_3ImmPred predicate - True if the immediate fits in a 4-bit sign extended + // field that is a multiple of 8. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<4,3>(v); +}]>; + + +def u64ImmPred : PatLeaf<(i64 imm), [{ + // Adding "N ||" to suppress gcc unused warning. + return (N || true); +}]>; + +def u32ImmPred : PatLeaf<(i32 imm), [{ + // u32ImmPred predicate - True if the immediate fits in a 32-bit field. + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<32>(v); +}]>; + +def u26_6ImmPred : PatLeaf<(i32 imm), [{ + // u26_6ImmPred - True if the immediate fits in a 32-bit field and + // is a multiple of 64. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<26,6>(v); +}]>; + +def u16ImmPred : PatLeaf<(i32 imm), [{ + // u16ImmPred predicate - True if the immediate fits in a 16-bit unsigned + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<16>(v); +}]>; + +def u16_s8ImmPred : PatLeaf<(i32 imm), [{ + // u16_s8ImmPred predicate - True if the immediate fits in a 16-bit sign + // extended s8 field. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<16,8>(v); +}]>; + +def u9ImmPred : PatLeaf<(i32 imm), [{ + // u9ImmPred predicate - True if the immediate fits in a 9-bit unsigned + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<9>(v); +}]>; + + +def u8ImmPred : PatLeaf<(i32 imm), [{ + // u8ImmPred predicate - True if the immediate fits in a 8-bit unsigned + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<8>(v); +}]>; + +def u7StrictPosImmPred : ImmLeaf<i32, [{ + // u7StrictPosImmPred predicate - True if the immediate fits in an 7-bit + // unsigned field and is strictly greater than 0. + return isUInt<7>(Imm) && Imm > 0; +}]>; + +def u7ImmPred : PatLeaf<(i32 imm), [{ + // u7ImmPred predicate - True if the immediate fits in a 7-bit unsigned + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<7>(v); +}]>; + + +def u6ImmPred : PatLeaf<(i32 imm), [{ + // u6ImmPred predicate - True if the immediate fits in a 6-bit unsigned + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<6>(v); +}]>; + +def u6_0ImmPred : PatLeaf<(i32 imm), [{ + // u6_0ImmPred predicate - True if the immediate fits in a 6-bit unsigned + // field. Same as u6ImmPred. + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<6>(v); +}]>; + +def u6_1ImmPred : PatLeaf<(i32 imm), [{ + // u6_1ImmPred predicate - True if the immediate fits in a 7-bit unsigned + // field that is 1 bit alinged - multiple of 2. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<6,1>(v); +}]>; + +def u6_2ImmPred : PatLeaf<(i32 imm), [{ + // u6_2ImmPred predicate - True if the immediate fits in a 8-bit unsigned + // field that is 2 bits alinged - multiple of 4. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<6,2>(v); +}]>; + +def u6_3ImmPred : PatLeaf<(i32 imm), [{ + // u6_3ImmPred predicate - True if the immediate fits in a 9-bit unsigned + // field that is 3 bits alinged - multiple of 8. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<6,3>(v); +}]>; + +def u5ImmPred : PatLeaf<(i32 imm), [{ + // u5ImmPred predicate - True if the immediate fits in a 5-bit unsigned + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<5>(v); +}]>; + + +def u3ImmPred : PatLeaf<(i32 imm), [{ + // u3ImmPred predicate - True if the immediate fits in a 3-bit unsigned + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<3>(v); +}]>; + + +def u2ImmPred : PatLeaf<(i32 imm), [{ + // u2ImmPred predicate - True if the immediate fits in a 2-bit unsigned + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<2>(v); +}]>; + + +def u1ImmPred : PatLeaf<(i1 imm), [{ + // u1ImmPred predicate - True if the immediate fits in a 1-bit unsigned + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<1>(v); +}]>; + +def m5BImmPred : PatLeaf<(i32 imm), [{ + // m5BImmPred predicate - True if the (char) number is in range -1 .. -31 + // and will fit in a 5 bit field when made positive, for use in memops. + // this is specific to the zero extending of a negative by CombineInstr + int8_t v = (int8_t)N->getSExtValue(); + return (-31 <= v && v <= -1); +}]>; + +def m5HImmPred : PatLeaf<(i32 imm), [{ + // m5HImmPred predicate - True if the (short) number is in range -1 .. -31 + // and will fit in a 5 bit field when made positive, for use in memops. + // this is specific to the zero extending of a negative by CombineInstr + int16_t v = (int16_t)N->getSExtValue(); + return (-31 <= v && v <= -1); +}]>; + +def m5ImmPred : PatLeaf<(i32 imm), [{ + // m5ImmPred predicate - True if the number is in range -1 .. -31 + // and will fit in a 5 bit field when made positive, for use in memops. + int64_t v = (int64_t)N->getSExtValue(); + return (-31 <= v && v <= -1); +}]>; + +//InN means negative integers in [-(2^N - 1), 0] +def n8ImmPred : PatLeaf<(i32 imm), [{ + // n8ImmPred predicate - True if the immediate fits in a 8-bit signed + // field. + int64_t v = (int64_t)N->getSExtValue(); + return (-255 <= v && v <= 0); +}]>; + +def nOneImmPred : PatLeaf<(i32 imm), [{ + // nOneImmPred predicate - True if the immediate is -1. + int64_t v = (int64_t)N->getSExtValue(); + return (-1 == v); +}]>; + +def Set5ImmPred : PatLeaf<(i32 imm), [{ + // Set5ImmPred predicate - True if the number is in the series of values. + // [ 2^0, 2^1, ... 2^31 ] + // For use in setbit immediate. + uint32_t v = (int32_t)N->getSExtValue(); + // Constrain to 32 bits, and then check for single bit. + return ImmIsSingleBit(v); +}]>; + +def Clr5ImmPred : PatLeaf<(i32 imm), [{ + // Clr5ImmPred predicate - True if the number is in the series of + // bit negated values. + // [ 2^0, 2^1, ... 2^31 ] + // For use in clrbit immediate. + // Note: we are bit NOTing the value. + uint32_t v = ~ (int32_t)N->getSExtValue(); + // Constrain to 32 bits, and then check for single bit. + return ImmIsSingleBit(v); +}]>; + +def SetClr5ImmPred : PatLeaf<(i32 imm), [{ + // SetClr5ImmPred predicate - True if the immediate is in range 0..31. + int32_t v = (int32_t)N->getSExtValue(); + return (v >= 0 && v <= 31); +}]>; + +def Set4ImmPred : PatLeaf<(i32 imm), [{ + // Set4ImmPred predicate - True if the number is in the series of values: + // [ 2^0, 2^1, ... 2^15 ]. + // For use in setbit immediate. + uint16_t v = (int16_t)N->getSExtValue(); + // Constrain to 16 bits, and then check for single bit. + return ImmIsSingleBit(v); +}]>; + +def Clr4ImmPred : PatLeaf<(i32 imm), [{ + // Clr4ImmPred predicate - True if the number is in the series of + // bit negated values: + // [ 2^0, 2^1, ... 2^15 ]. + // For use in setbit and clrbit immediate. + uint16_t v = ~ (int16_t)N->getSExtValue(); + // Constrain to 16 bits, and then check for single bit. + return ImmIsSingleBit(v); +}]>; + +def SetClr4ImmPred : PatLeaf<(i32 imm), [{ + // SetClr4ImmPred predicate - True if the immediate is in the range 0..15. + int16_t v = (int16_t)N->getSExtValue(); + return (v >= 0 && v <= 15); +}]>; + +def Set3ImmPred : PatLeaf<(i32 imm), [{ + // Set3ImmPred predicate - True if the number is in the series of values: + // [ 2^0, 2^1, ... 2^7 ]. + // For use in setbit immediate. + uint8_t v = (int8_t)N->getSExtValue(); + // Constrain to 8 bits, and then check for single bit. + return ImmIsSingleBit(v); +}]>; + +def Clr3ImmPred : PatLeaf<(i32 imm), [{ + // Clr3ImmPred predicate - True if the number is in the series of + // bit negated values: + // [ 2^0, 2^1, ... 2^7 ]. + // For use in setbit and clrbit immediate. + uint8_t v = ~ (int8_t)N->getSExtValue(); + // Constrain to 8 bits, and then check for single bit. + return ImmIsSingleBit(v); +}]>; + +def SetClr3ImmPred : PatLeaf<(i32 imm), [{ + // SetClr3ImmPred predicate - True if the immediate is in the range 0..7. + int8_t v = (int8_t)N->getSExtValue(); + return (v >= 0 && v <= 7); +}]>; + + +// Extendable immediate operands. + +let PrintMethod = "printExtOperand" in { + def s16Ext : Operand<i32>; + def s12Ext : Operand<i32>; + def s10Ext : Operand<i32>; + def s9Ext : Operand<i32>; + def s8Ext : Operand<i32>; + def s6Ext : Operand<i32>; + def s11_0Ext : Operand<i32>; + def s11_1Ext : Operand<i32>; + def s11_2Ext : Operand<i32>; + def s11_3Ext : Operand<i32>; + def u6Ext : Operand<i32>; + def u7Ext : Operand<i32>; + def u8Ext : Operand<i32>; + def u9Ext : Operand<i32>; + def u10Ext : Operand<i32>; + def u6_0Ext : Operand<i32>; + def u6_1Ext : Operand<i32>; + def u6_2Ext : Operand<i32>; + def u6_3Ext : Operand<i32>; +} + +let PrintMethod = "printImmOperand" in +def u0AlwaysExt : Operand<i32>; + +// Predicates for constant extendable operands +def s16ExtPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + if (!Subtarget.hasV4TOps()) + // Return true if the immediate can fit in a 16-bit sign extended field. + return isInt<16>(v); + else { + if (isInt<16>(v)) + return true; + + // Return true if extending this immediate is profitable and the value + // can fit in a 32-bit signed field. + return isConstExtProfitable(Node) && isInt<32>(v); + } +}]>; + +def s10ExtPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + if (!Subtarget.hasV4TOps()) + // Return true if the immediate can fit in a 10-bit sign extended field. + return isInt<10>(v); + else { + if (isInt<10>(v)) + return true; + + // Return true if extending this immediate is profitable and the value + // can fit in a 32-bit signed field. + return isConstExtProfitable(Node) && isInt<32>(v); + } +}]>; + +def s9ExtPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + if (!Subtarget.hasV4TOps()) + // Return true if the immediate can fit in a 9-bit sign extended field. + return isInt<9>(v); + else { + if (isInt<9>(v)) + return true; + + // Return true if extending this immediate is profitable and the value + // can fit in a 32-bit unsigned field. + return isConstExtProfitable(Node) && isInt<32>(v); + } +}]>; + +def s8ExtPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + if (!Subtarget.hasV4TOps()) + // Return true if the immediate can fit in a 8-bit sign extended field. + return isInt<8>(v); + else { + if (isInt<8>(v)) + return true; + + // Return true if extending this immediate is profitable and the value + // can fit in a 32-bit signed field. + return isConstExtProfitable(Node) && isInt<32>(v); + } +}]>; + +def s8_16ExtPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + if (!Subtarget.hasV4TOps()) + // Return true if the immediate fits in a 8-bit sign extended field. + return isInt<8>(v); + else { + if (isInt<8>(v)) + return true; + + // Return true if extending this immediate is profitable and the value + // can't fit in a 16-bit signed field. This is required to avoid + // unnecessary constant extenders. + return isConstExtProfitable(Node) && !isInt<16>(v); + } +}]>; + +def s6ExtPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + if (!Subtarget.hasV4TOps()) + // Return true if the immediate can fit in a 6-bit sign extended field. + return isInt<6>(v); + else { + if (isInt<6>(v)) + return true; + + // Return true if extending this immediate is profitable and the value + // can fit in a 32-bit unsigned field. + return isConstExtProfitable(Node) && isInt<32>(v); + } +}]>; + +def s6_16ExtPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + if (!Subtarget.hasV4TOps()) + // Return true if the immediate fits in a 6-bit sign extended field. + return isInt<6>(v); + else { + if (isInt<6>(v)) + return true; + + // Return true if extending this immediate is profitable and the value + // can't fit in a 16-bit signed field. This is required to avoid + // unnecessary constant extenders. + return isConstExtProfitable(Node) && !isInt<16>(v); + } +}]>; + +def s6_10ExtPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + if (!Subtarget.hasV4TOps()) + // Return true if the immediate can fit in a 6-bit sign extended field. + return isInt<6>(v); + else { + if (isInt<6>(v)) + return true; + + // Return true if extending this immediate is profitable and the value + // can't fit in a 10-bit signed field. This is required to avoid + // unnecessary constant extenders. + return isConstExtProfitable(Node) && !isInt<10>(v); + } +}]>; + +def s11_0ExtPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + if (!Subtarget.hasV4TOps()) + // Return true if the immediate can fit in a 11-bit sign extended field. + return isShiftedInt<11,0>(v); + else { + if (isInt<11>(v)) + return true; + + // Return true if extending this immediate is profitable and the value + // can fit in a 32-bit signed field. + return isConstExtProfitable(Node) && isInt<32>(v); + } +}]>; + +def s11_1ExtPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + if (!Subtarget.hasV4TOps()) + // Return true if the immediate can fit in a 12-bit sign extended field and + // is 2 byte aligned. + return isShiftedInt<11,1>(v); + else { + if (isInt<12>(v)) + return isShiftedInt<11,1>(v); + + // Return true if extending this immediate is profitable and the low 1 bit + // is zero (2-byte aligned). + return isConstExtProfitable(Node) && isInt<32>(v) && ((v % 2) == 0); + } +}]>; + +def s11_2ExtPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + if (!Subtarget.hasV4TOps()) + // Return true if the immediate can fit in a 13-bit sign extended field and + // is 4-byte aligned. + return isShiftedInt<11,2>(v); + else { + if (isInt<13>(v)) + return isShiftedInt<11,2>(v); + + // Return true if extending this immediate is profitable and the low 2-bits + // are zero (4-byte aligned). + return isConstExtProfitable(Node) && isInt<32>(v) && ((v % 4) == 0); + } +}]>; + +def s11_3ExtPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + if (!Subtarget.hasV4TOps()) + // Return true if the immediate can fit in a 14-bit sign extended field and + // is 8-byte aligned. + return isShiftedInt<11,3>(v); + else { + if (isInt<14>(v)) + return isShiftedInt<11,3>(v); + + // Return true if extending this immediate is profitable and the low 3-bits + // are zero (8-byte aligned). + return isConstExtProfitable(Node) && isInt<32>(v) && ((v % 8) == 0); + } +}]>; + +def u0AlwaysExtPred : PatLeaf<(i32 imm), [{ + // Predicate for an unsigned 32-bit value that always needs to be extended. + if (Subtarget.hasV4TOps()) { + if (isConstExtProfitable(Node)) { + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<32>(v); + } + } + return false; +}]>; + +def u6ExtPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + if (!Subtarget.hasV4TOps()) + // Return true if the immediate can fit in a 6-bit unsigned field. + return isUInt<6>(v); + else { + if (isUInt<6>(v)) + return true; + + // Return true if extending this immediate is profitable and the value + // can fit in a 32-bit unsigned field. + return isConstExtProfitable(Node) && isUInt<32>(v); + } +}]>; + +def u7ExtPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + if (!Subtarget.hasV4TOps()) + // Return true if the immediate can fit in a 7-bit unsigned field. + return isUInt<7>(v); + else { + if (isUInt<7>(v)) + return true; + + // Return true if extending this immediate is profitable and the value + // can fit in a 32-bit unsigned field. + return isConstExtProfitable(Node) && isUInt<32>(v); + } +}]>; + +def u8ExtPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + if (!Subtarget.hasV4TOps()) + // Return true if the immediate can fit in a 8-bit unsigned field. + return isUInt<8>(v); + else { + if (isUInt<8>(v)) + return true; + + // Return true if extending this immediate is profitable and the value + // can fit in a 32-bit unsigned field. + return isConstExtProfitable(Node) && isUInt<32>(v); + } +}]>; + +def u9ExtPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + if (!Subtarget.hasV4TOps()) + // Return true if the immediate can fit in a 9-bit unsigned field. + return isUInt<9>(v); + else { + if (isUInt<9>(v)) + return true; + + // Return true if extending this immediate is profitable and the value + // can fit in a 32-bit unsigned field. + return isConstExtProfitable(Node) && isUInt<32>(v); + } +}]>; + +def u6_1ExtPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + if (!Subtarget.hasV4TOps()) + // Return true if the immediate can fit in a 7-bit unsigned field and + // is 2-byte aligned. + return isShiftedUInt<6,1>(v); + else { + if (isUInt<7>(v)) + return isShiftedUInt<6,1>(v); + + // Return true if extending this immediate is profitable and the value + // can fit in a 32-bit unsigned field. + return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 2) == 0); + } +}]>; + +def u6_2ExtPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + if (!Subtarget.hasV4TOps()) + // Return true if the immediate can fit in a 8-bit unsigned field and + // is 4-byte aligned. + return isShiftedUInt<6,2>(v); + else { + if (isUInt<8>(v)) + return isShiftedUInt<6,2>(v); + + // Return true if extending this immediate is profitable and the value + // can fit in a 32-bit unsigned field. + return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 4) == 0); + } +}]>; + +def u6_3ExtPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + if (!Subtarget.hasV4TOps()) + // Return true if the immediate can fit in a 9-bit unsigned field and + // is 8-byte aligned. + return isShiftedUInt<6,3>(v); + else { + if (isUInt<9>(v)) + return isShiftedUInt<6,3>(v); + + // Return true if extending this immediate is profitable and the value + // can fit in a 32-bit unsigned field. + return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 8) == 0); + } +}]>; + +// Addressing modes. + +def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>; +def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], []>; +def ADDRriS11_0 : ComplexPattern<i32, 2, "SelectADDRriS11_0", [frameindex], []>; +def ADDRriS11_1 : ComplexPattern<i32, 2, "SelectADDRriS11_1", [frameindex], []>; +def ADDRriS11_2 : ComplexPattern<i32, 2, "SelectADDRriS11_2", [frameindex], []>; +def ADDRriS11_3 : ComplexPattern<i32, 2, "SelectADDRriS11_3", [frameindex], []>; +def ADDRriU6_0 : ComplexPattern<i32, 2, "SelectADDRriU6_0", [frameindex], []>; +def ADDRriU6_1 : ComplexPattern<i32, 2, "SelectADDRriU6_1", [frameindex], []>; +def ADDRriU6_2 : ComplexPattern<i32, 2, "SelectADDRriU6_2", [frameindex], []>; + +// Address operands. + +def MEMrr : Operand<i32> { + let PrintMethod = "printMEMrrOperand"; + let MIOperandInfo = (ops IntRegs, IntRegs); +} + +def MEMri : Operand<i32> { + let PrintMethod = "printMEMriOperand"; + let MIOperandInfo = (ops IntRegs, IntRegs); +} + +def MEMri_s11_2 : Operand<i32>, + ComplexPattern<i32, 2, "SelectMEMriS11_2", []> { + let PrintMethod = "printMEMriOperand"; + let MIOperandInfo = (ops IntRegs, s11Imm); +} + +def FrameIndex : Operand<i32> { + let PrintMethod = "printFrameIndexOperand"; + let MIOperandInfo = (ops IntRegs, s11Imm); +} + +let PrintMethod = "printGlobalOperand" in { + def globaladdress : Operand<i32>; + def globaladdressExt : Operand<i32>; +} + +let PrintMethod = "printJumpTable" in +def jumptablebase : Operand<i32>; + +def brtarget : Operand<OtherVT>; +def brtargetExt : Operand<OtherVT>; +def calltarget : Operand<i32>; + +def bblabel : Operand<i32>; +def bbl : SDNode<"ISD::BasicBlock", SDTPtrLeaf , [], "BasicBlockSDNode">; + +def symbolHi32 : Operand<i32> { + let PrintMethod = "printSymbolHi"; +} +def symbolLo32 : Operand<i32> { + let PrintMethod = "printSymbolLo"; +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp new file mode 100644 index 000000000000..5490ecd6e3e6 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp @@ -0,0 +1,346 @@ +//===-- HexagonPeephole.cpp - Hexagon Peephole Optimiztions ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// This peephole pass optimizes in the following cases. +// 1. Optimizes redundant sign extends for the following case +// Transform the following pattern +// %vreg170<def> = SXTW %vreg166 +// ... +// %vreg176<def> = COPY %vreg170:subreg_loreg +// +// Into +// %vreg176<def> = COPY vreg166 +// +// 2. Optimizes redundant negation of predicates. +// %vreg15<def> = CMPGTrr %vreg6, %vreg2 +// ... +// %vreg16<def> = NOT_p %vreg15<kill> +// ... +// JMP_c %vreg16<kill>, <BB#1>, %PC<imp-def,dead> +// +// Into +// %vreg15<def> = CMPGTrr %vreg6, %vreg2; +// ... +// JMP_cNot %vreg15<kill>, <BB#1>, %PC<imp-def,dead>; +// +// Note: The peephole pass makes the instrucstions like +// %vreg170<def> = SXTW %vreg166 or %vreg16<def> = NOT_p %vreg15<kill> +// redundant and relies on some form of dead removal instructions, like +// DCE or DIE to actually eliminate them. + + +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "hexagon-peephole" +#include "Hexagon.h" +#include "HexagonTargetMachine.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Constants.h" +#include "llvm/PassSupport.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <algorithm> + +using namespace llvm; + +static cl::opt<bool> DisableHexagonPeephole("disable-hexagon-peephole", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Disable Peephole Optimization")); + +static cl::opt<bool> DisablePNotP("disable-hexagon-pnotp", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Disable Optimization of PNotP")); + +static cl::opt<bool> DisableOptSZExt("disable-hexagon-optszext", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Disable Optimization of Sign/Zero Extends")); + +static cl::opt<bool> DisableOptExtTo64("disable-hexagon-opt-ext-to-64", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Disable Optimization of extensions to i64.")); + +namespace llvm { + void initializeHexagonPeepholePass(PassRegistry&); +} + +namespace { + struct HexagonPeephole : public MachineFunctionPass { + const HexagonInstrInfo *QII; + const HexagonRegisterInfo *QRI; + const MachineRegisterInfo *MRI; + + public: + static char ID; + HexagonPeephole() : MachineFunctionPass(ID) { + initializeHexagonPeepholePass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const { + return "Hexagon optimize redundant zero and size extends"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const { + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + void ChangeOpInto(MachineOperand &Dst, MachineOperand &Src); + }; +} + +char HexagonPeephole::ID = 0; + +INITIALIZE_PASS(HexagonPeephole, "hexagon-peephole", "Hexagon Peephole", + false, false) + +bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { + QII = static_cast<const HexagonInstrInfo *>(MF.getTarget(). + getInstrInfo()); + QRI = static_cast<const HexagonRegisterInfo *>(MF.getTarget(). + getRegisterInfo()); + MRI = &MF.getRegInfo(); + + DenseMap<unsigned, unsigned> PeepholeMap; + DenseMap<unsigned, std::pair<unsigned, unsigned> > PeepholeDoubleRegsMap; + + if (DisableHexagonPeephole) return false; + + // Loop over all of the basic blocks. + for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end(); + MBBb != MBBe; ++MBBb) { + MachineBasicBlock* MBB = MBBb; + PeepholeMap.clear(); + PeepholeDoubleRegsMap.clear(); + + // Traverse the basic block. + for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end(); + ++MII) { + MachineInstr *MI = MII; + // Look for sign extends: + // %vreg170<def> = SXTW %vreg166 + if (!DisableOptSZExt && MI->getOpcode() == Hexagon::SXTW) { + assert (MI->getNumOperands() == 2); + MachineOperand &Dst = MI->getOperand(0); + MachineOperand &Src = MI->getOperand(1); + unsigned DstReg = Dst.getReg(); + unsigned SrcReg = Src.getReg(); + // Just handle virtual registers. + if (TargetRegisterInfo::isVirtualRegister(DstReg) && + TargetRegisterInfo::isVirtualRegister(SrcReg)) { + // Map the following: + // %vreg170<def> = SXTW %vreg166 + // PeepholeMap[170] = vreg166 + PeepholeMap[DstReg] = SrcReg; + } + } + + // Look for %vreg170<def> = COMBINE_ir_V4 (0, %vreg169) + // %vreg170:DoublRegs, %vreg169:IntRegs + if (!DisableOptExtTo64 && + MI->getOpcode () == Hexagon::COMBINE_Ir_V4) { + assert (MI->getNumOperands() == 3); + MachineOperand &Dst = MI->getOperand(0); + MachineOperand &Src1 = MI->getOperand(1); + MachineOperand &Src2 = MI->getOperand(2); + if (Src1.getImm() != 0) + continue; + unsigned DstReg = Dst.getReg(); + unsigned SrcReg = Src2.getReg(); + PeepholeMap[DstReg] = SrcReg; + } + + // Look for this sequence below + // %vregDoubleReg1 = LSRd_ri %vregDoubleReg0, 32 + // %vregIntReg = COPY %vregDoubleReg1:subreg_loreg. + // and convert into + // %vregIntReg = COPY %vregDoubleReg0:subreg_hireg. + if (MI->getOpcode() == Hexagon::LSRd_ri) { + assert(MI->getNumOperands() == 3); + MachineOperand &Dst = MI->getOperand(0); + MachineOperand &Src1 = MI->getOperand(1); + MachineOperand &Src2 = MI->getOperand(2); + if (Src2.getImm() != 32) + continue; + unsigned DstReg = Dst.getReg(); + unsigned SrcReg = Src1.getReg(); + PeepholeDoubleRegsMap[DstReg] = + std::make_pair(*&SrcReg, 1/*Hexagon::subreg_hireg*/); + } + + // Look for P=NOT(P). + if (!DisablePNotP && + (MI->getOpcode() == Hexagon::NOT_p)) { + assert (MI->getNumOperands() == 2); + MachineOperand &Dst = MI->getOperand(0); + MachineOperand &Src = MI->getOperand(1); + unsigned DstReg = Dst.getReg(); + unsigned SrcReg = Src.getReg(); + // Just handle virtual registers. + if (TargetRegisterInfo::isVirtualRegister(DstReg) && + TargetRegisterInfo::isVirtualRegister(SrcReg)) { + // Map the following: + // %vreg170<def> = NOT_xx %vreg166 + // PeepholeMap[170] = vreg166 + PeepholeMap[DstReg] = SrcReg; + } + } + + // Look for copy: + // %vreg176<def> = COPY %vreg170:subreg_loreg + if (!DisableOptSZExt && MI->isCopy()) { + assert (MI->getNumOperands() == 2); + MachineOperand &Dst = MI->getOperand(0); + MachineOperand &Src = MI->getOperand(1); + + // Make sure we are copying the lower 32 bits. + if (Src.getSubReg() != Hexagon::subreg_loreg) + continue; + + unsigned DstReg = Dst.getReg(); + unsigned SrcReg = Src.getReg(); + if (TargetRegisterInfo::isVirtualRegister(DstReg) && + TargetRegisterInfo::isVirtualRegister(SrcReg)) { + // Try to find in the map. + if (unsigned PeepholeSrc = PeepholeMap.lookup(SrcReg)) { + // Change the 1st operand. + MI->RemoveOperand(1); + MI->addOperand(MachineOperand::CreateReg(PeepholeSrc, false)); + } else { + DenseMap<unsigned, std::pair<unsigned, unsigned> >::iterator DI = + PeepholeDoubleRegsMap.find(SrcReg); + if (DI != PeepholeDoubleRegsMap.end()) { + std::pair<unsigned,unsigned> PeepholeSrc = DI->second; + MI->RemoveOperand(1); + MI->addOperand(MachineOperand::CreateReg(PeepholeSrc.first, + false /*isDef*/, + false /*isImp*/, + false /*isKill*/, + false /*isDead*/, + false /*isUndef*/, + false /*isEarlyClobber*/, + PeepholeSrc.second)); + } + } + } + } + + // Look for Predicated instructions. + if (!DisablePNotP) { + bool Done = false; + if (QII->isPredicated(MI)) { + MachineOperand &Op0 = MI->getOperand(0); + unsigned Reg0 = Op0.getReg(); + const TargetRegisterClass *RC0 = MRI->getRegClass(Reg0); + if (RC0->getID() == Hexagon::PredRegsRegClassID) { + // Handle instructions that have a prediate register in op0 + // (most cases of predicable instructions). + if (TargetRegisterInfo::isVirtualRegister(Reg0)) { + // Try to find in the map. + if (unsigned PeepholeSrc = PeepholeMap.lookup(Reg0)) { + // Change the 1st operand and, flip the opcode. + MI->getOperand(0).setReg(PeepholeSrc); + int NewOp = QII->getInvertedPredicatedOpcode(MI->getOpcode()); + MI->setDesc(QII->get(NewOp)); + Done = true; + } + } + } + } + + if (!Done) { + // Handle special instructions. + unsigned Op = MI->getOpcode(); + unsigned NewOp = 0; + unsigned PR = 1, S1 = 2, S2 = 3; // Operand indices. + + switch (Op) { + case Hexagon::TFR_condset_rr: + case Hexagon::TFR_condset_ii: + case Hexagon::MUX_ii: + case Hexagon::MUX_rr: + NewOp = Op; + break; + case Hexagon::TFR_condset_ri: + NewOp = Hexagon::TFR_condset_ir; + break; + case Hexagon::TFR_condset_ir: + NewOp = Hexagon::TFR_condset_ri; + break; + case Hexagon::MUX_ri: + NewOp = Hexagon::MUX_ir; + break; + case Hexagon::MUX_ir: + NewOp = Hexagon::MUX_ri; + break; + } + if (NewOp) { + unsigned PSrc = MI->getOperand(PR).getReg(); + if (unsigned POrig = PeepholeMap.lookup(PSrc)) { + MI->getOperand(PR).setReg(POrig); + MI->setDesc(QII->get(NewOp)); + // Swap operands S1 and S2. + MachineOperand Op1 = MI->getOperand(S1); + MachineOperand Op2 = MI->getOperand(S2); + ChangeOpInto(MI->getOperand(S1), Op2); + ChangeOpInto(MI->getOperand(S2), Op1); + } + } // if (NewOp) + } // if (!Done) + + } // if (!DisablePNotP) + + } // Instruction + } // Basic Block + return true; +} + +void HexagonPeephole::ChangeOpInto(MachineOperand &Dst, MachineOperand &Src) { + assert (&Dst != &Src && "Cannot duplicate into itself"); + switch (Dst.getType()) { + case MachineOperand::MO_Register: + if (Src.isReg()) { + Dst.setReg(Src.getReg()); + } else if (Src.isImm()) { + Dst.ChangeToImmediate(Src.getImm()); + } else { + llvm_unreachable("Unexpected src operand type"); + } + break; + + case MachineOperand::MO_Immediate: + if (Src.isImm()) { + Dst.setImm(Src.getImm()); + } else if (Src.isReg()) { + Dst.ChangeToRegister(Src.getReg(), Src.isDef(), Src.isImplicit(), + Src.isKill(), Src.isDead(), Src.isUndef(), + Src.isDebug()); + } else { + llvm_unreachable("Unexpected src operand type"); + } + break; + + default: + llvm_unreachable("Unexpected dst operand type"); + break; + } +} + +FunctionPass *llvm::createHexagonPeephole() { + return new HexagonPeephole(); +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp new file mode 100644 index 000000000000..1786e9daa306 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -0,0 +1,299 @@ +//===-- HexagonRegisterInfo.cpp - Hexagon Register Information ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Hexagon implementation of the TargetRegisterInfo +// class. +// +//===----------------------------------------------------------------------===// + +#include "HexagonRegisterInfo.h" +#include "Hexagon.h" +#include "HexagonSubtarget.h" +#include "HexagonTargetMachine.h" +#include "HexagonMachineFunctionInfo.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Type.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + + +HexagonRegisterInfo::HexagonRegisterInfo(HexagonSubtarget &st) + : HexagonGenRegisterInfo(Hexagon::R31), + Subtarget(st) { +} + +const uint16_t* HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction + *MF) + const { + static const uint16_t CalleeSavedRegsV2[] = { + Hexagon::R24, Hexagon::R25, Hexagon::R26, Hexagon::R27, 0 + }; + static const uint16_t CalleeSavedRegsV3[] = { + Hexagon::R16, Hexagon::R17, Hexagon::R18, Hexagon::R19, + Hexagon::R20, Hexagon::R21, Hexagon::R22, Hexagon::R23, + Hexagon::R24, Hexagon::R25, Hexagon::R26, Hexagon::R27, 0 + }; + + switch(Subtarget.getHexagonArchVersion()) { + case HexagonSubtarget::V1: + break; + case HexagonSubtarget::V2: + return CalleeSavedRegsV2; + case HexagonSubtarget::V3: + case HexagonSubtarget::V4: + case HexagonSubtarget::V5: + return CalleeSavedRegsV3; + } + llvm_unreachable("Callee saved registers requested for unknown architecture " + "version"); +} + +BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF) + const { + BitVector Reserved(getNumRegs()); + Reserved.set(HEXAGON_RESERVED_REG_1); + Reserved.set(HEXAGON_RESERVED_REG_2); + Reserved.set(Hexagon::R29); + Reserved.set(Hexagon::R30); + Reserved.set(Hexagon::R31); + Reserved.set(Hexagon::D14); + Reserved.set(Hexagon::D15); + Reserved.set(Hexagon::LC0); + Reserved.set(Hexagon::LC1); + Reserved.set(Hexagon::SA0); + Reserved.set(Hexagon::SA1); + return Reserved; +} + + +const TargetRegisterClass* const* +HexagonRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { + static const TargetRegisterClass * const CalleeSavedRegClassesV2[] = { + &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass, + &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass, + }; + static const TargetRegisterClass * const CalleeSavedRegClassesV3[] = { + &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass, + &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass, + &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass, + &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass, + &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass, + &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass, + }; + + switch(Subtarget.getHexagonArchVersion()) { + case HexagonSubtarget::V1: + break; + case HexagonSubtarget::V2: + return CalleeSavedRegClassesV2; + case HexagonSubtarget::V3: + case HexagonSubtarget::V4: + case HexagonSubtarget::V5: + return CalleeSavedRegClassesV3; + } + llvm_unreachable("Callee saved register classes requested for unknown " + "architecture version"); +} + +void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { + // + // Hexagon_TODO: Do we need to enforce this for Hexagon? + assert(SPAdj == 0 && "Unexpected"); + + MachineInstr &MI = *II; + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); + + // Addressable stack objects are accessed using neg. offsets from %fp. + MachineFunction &MF = *MI.getParent()->getParent(); + const HexagonInstrInfo &TII = + *static_cast<const HexagonInstrInfo*>(MF.getTarget().getInstrInfo()); + int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + + unsigned FrameReg = getFrameRegister(MF); + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + if (!TFI->hasFP(MF)) { + // We will not reserve space on the stack for the lr and fp registers. + Offset -= 2 * Hexagon_WordSize; + } + + const unsigned FrameSize = MFI.getStackSize(); + + if (!MFI.hasVarSizedObjects() && + TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset)) && + !TII.isSpillPredRegOp(&MI)) { + // Replace frame index with a stack pointer reference. + MI.getOperand(FIOperandNum).ChangeToRegister(getStackRegister(), false, + false, true); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(FrameSize+Offset); + } else { + // Replace frame index with a frame pointer reference. + if (!TII.isValidOffset(MI.getOpcode(), Offset)) { + + // If the offset overflows, then correct it. + // + // For loads, we do not need a reserved register + // r0 = memw(r30 + #10000) to: + // + // r0 = add(r30, #10000) + // r0 = memw(r0) + if ( (MI.getOpcode() == Hexagon::LDriw) || + (MI.getOpcode() == Hexagon::LDrid) || + (MI.getOpcode() == Hexagon::LDrih) || + (MI.getOpcode() == Hexagon::LDriuh) || + (MI.getOpcode() == Hexagon::LDrib) || + (MI.getOpcode() == Hexagon::LDriub) || + (MI.getOpcode() == Hexagon::LDriw_f) || + (MI.getOpcode() == Hexagon::LDrid_f)) { + unsigned dstReg = (MI.getOpcode() == Hexagon::LDrid) ? + getSubReg(MI.getOperand(0).getReg(), Hexagon::subreg_loreg) : + MI.getOperand(0).getReg(); + + // Check if offset can fit in addi. + if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) { + BuildMI(*MI.getParent(), II, MI.getDebugLoc(), + TII.get(Hexagon::CONST32_Int_Real), dstReg).addImm(Offset); + BuildMI(*MI.getParent(), II, MI.getDebugLoc(), + TII.get(Hexagon::ADD_rr), + dstReg).addReg(FrameReg).addReg(dstReg); + } else { + BuildMI(*MI.getParent(), II, MI.getDebugLoc(), + TII.get(Hexagon::ADD_ri), + dstReg).addReg(FrameReg).addImm(Offset); + } + + MI.getOperand(FIOperandNum).ChangeToRegister(dstReg, false, false,true); + MI.getOperand(FIOperandNum+1).ChangeToImmediate(0); + } else if ((MI.getOpcode() == Hexagon::STriw_indexed) || + (MI.getOpcode() == Hexagon::STriw) || + (MI.getOpcode() == Hexagon::STrid) || + (MI.getOpcode() == Hexagon::STrih) || + (MI.getOpcode() == Hexagon::STrib) || + (MI.getOpcode() == Hexagon::STrid_f) || + (MI.getOpcode() == Hexagon::STriw_f)) { + // For stores, we need a reserved register. Change + // memw(r30 + #10000) = r0 to: + // + // rs = add(r30, #10000); + // memw(rs) = r0 + unsigned resReg = HEXAGON_RESERVED_REG_1; + + // Check if offset can fit in addi. + if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) { + BuildMI(*MI.getParent(), II, MI.getDebugLoc(), + TII.get(Hexagon::CONST32_Int_Real), resReg).addImm(Offset); + BuildMI(*MI.getParent(), II, MI.getDebugLoc(), + TII.get(Hexagon::ADD_rr), + resReg).addReg(FrameReg).addReg(resReg); + } else { + BuildMI(*MI.getParent(), II, MI.getDebugLoc(), + TII.get(Hexagon::ADD_ri), + resReg).addReg(FrameReg).addImm(Offset); + } + MI.getOperand(FIOperandNum).ChangeToRegister(resReg, false, false,true); + MI.getOperand(FIOperandNum+1).ChangeToImmediate(0); + } else if (TII.isMemOp(&MI)) { + // use the constant extender if the instruction provides it + // and we are V4TOps. + if (Subtarget.hasV4TOps()) { + if (TII.isConstExtended(&MI)) { + MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false); + MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset); + TII.immediateExtend(&MI); + } else { + llvm_unreachable("Need to implement for memops"); + } + } else { + // Only V3 and older instructions here. + unsigned ResReg = HEXAGON_RESERVED_REG_1; + if (!MFI.hasVarSizedObjects() && + TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset))) { + MI.getOperand(FIOperandNum).ChangeToRegister(getStackRegister(), + false, false, false); + MI.getOperand(FIOperandNum+1).ChangeToImmediate(FrameSize+Offset); + } else if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) { + BuildMI(*MI.getParent(), II, MI.getDebugLoc(), + TII.get(Hexagon::CONST32_Int_Real), ResReg).addImm(Offset); + BuildMI(*MI.getParent(), II, MI.getDebugLoc(), + TII.get(Hexagon::ADD_rr), ResReg).addReg(FrameReg). + addReg(ResReg); + MI.getOperand(FIOperandNum).ChangeToRegister(ResReg, false, false, + true); + MI.getOperand(FIOperandNum+1).ChangeToImmediate(0); + } else { + BuildMI(*MI.getParent(), II, MI.getDebugLoc(), + TII.get(Hexagon::ADD_ri), ResReg).addReg(FrameReg). + addImm(Offset); + MI.getOperand(FIOperandNum).ChangeToRegister(ResReg, false, false, + true); + MI.getOperand(FIOperandNum+1).ChangeToImmediate(0); + } + } + } else { + unsigned dstReg = MI.getOperand(0).getReg(); + BuildMI(*MI.getParent(), II, MI.getDebugLoc(), + TII.get(Hexagon::CONST32_Int_Real), dstReg).addImm(Offset); + BuildMI(*MI.getParent(), II, MI.getDebugLoc(), + TII.get(Hexagon::ADD_rr), + dstReg).addReg(FrameReg).addReg(dstReg); + // Can we delete MI??? r2 = add (r2, #0). + MI.getOperand(FIOperandNum).ChangeToRegister(dstReg, false, false,true); + MI.getOperand(FIOperandNum+1).ChangeToImmediate(0); + } + } else { + // If the offset is small enough to fit in the immediate field, directly + // encode it. + MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false); + MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset); + } + } + +} + +unsigned HexagonRegisterInfo::getRARegister() const { + return Hexagon::R31; +} + +unsigned HexagonRegisterInfo::getFrameRegister(const MachineFunction + &MF) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + if (TFI->hasFP(MF)) { + return Hexagon::R30; + } + + return Hexagon::R29; +} + +unsigned HexagonRegisterInfo::getFrameRegister() const { + return Hexagon::R30; +} + +unsigned HexagonRegisterInfo::getStackRegister() const { + return Hexagon::R29; +} + +#define GET_REGINFO_TARGET_DESC +#include "HexagonGenRegisterInfo.inc" diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h new file mode 100644 index 000000000000..89af7c38cc6d --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h @@ -0,0 +1,85 @@ +//==- HexagonRegisterInfo.h - Hexagon Register Information Impl --*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Hexagon implementation of the TargetRegisterInfo +// class. +// +//===----------------------------------------------------------------------===// + +#ifndef HexagonREGISTERINFO_H +#define HexagonREGISTERINFO_H + +#include "llvm/MC/MachineLocation.h" +#include "llvm/Target/TargetRegisterInfo.h" + +#define GET_REGINFO_HEADER +#include "HexagonGenRegisterInfo.inc" + +// +// We try not to hard code the reserved registers in our code, +// so the following two macros were defined. However, there +// are still a few places that R11 and R10 are hard wired. +// See below. If, in the future, we decided to change the reserved +// register. Don't forget changing the following places. +// +// 1. the "Defs" set of STriw_pred in HexagonInstrInfo.td +// 2. the "Defs" set of LDri_pred in HexagonInstrInfo.td +// 3. the definition of "IntRegs" in HexagonRegisterInfo.td +// 4. the definition of "DoubleRegs" in HexagonRegisterInfo.td +// +#define HEXAGON_RESERVED_REG_1 Hexagon::R10 +#define HEXAGON_RESERVED_REG_2 Hexagon::R11 + +namespace llvm { + +class HexagonSubtarget; +class HexagonInstrInfo; +class Type; + +struct HexagonRegisterInfo : public HexagonGenRegisterInfo { + HexagonSubtarget &Subtarget; + + HexagonRegisterInfo(HexagonSubtarget &st); + + /// Code Generation virtual methods... + const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const; + + const TargetRegisterClass* const* getCalleeSavedRegClasses( + const MachineFunction *MF = 0) const; + + BitVector getReservedRegs(const MachineFunction &MF) const; + + void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS = NULL) const; + + /// determineFrameLayout - Determine the size of the frame and maximum call + /// frame size. + void determineFrameLayout(MachineFunction &MF) const; + + /// requiresRegisterScavenging - returns true since we may need scavenging for + /// a temporary register when generating hardware loop instructions. + bool requiresRegisterScavenging(const MachineFunction &MF) const { + return true; + } + + bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const { + return true; + } + + // Debug information queries. + unsigned getRARegister() const; + unsigned getFrameRegister(const MachineFunction &MF) const; + unsigned getFrameRegister() const; + unsigned getStackRegister() const; +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td new file mode 100644 index 000000000000..8ea1b7e75db7 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td @@ -0,0 +1,167 @@ +//===-- HexagonRegisterInfo.td - Hexagon Register defs -----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Declarations that describe the Hexagon register file. +//===----------------------------------------------------------------------===// + +let Namespace = "Hexagon" in { + + class HexagonReg<string n> : Register<n> { + field bits<5> Num; + } + + class HexagonDoubleReg<string n, list<Register> subregs> : + RegisterWithSubRegs<n, subregs> { + field bits<5> Num; + } + + // Registers are identified with 5-bit ID numbers. + // Ri - 32-bit integer registers. + class Ri<bits<5> num, string n> : HexagonReg<n> { + let Num = num; + } + + // Rf - 32-bit floating-point registers. + class Rf<bits<5> num, string n> : HexagonReg<n> { + let Num = num; + } + + + // Rd - 64-bit registers. + class Rd<bits<5> num, string n, list<Register> subregs> : + HexagonDoubleReg<n, subregs> { + let Num = num; + let SubRegs = subregs; + } + + // Rp - predicate registers + class Rp<bits<5> num, string n> : HexagonReg<n> { + let Num = num; + } + + // Rc - control registers + class Rc<bits<5> num, string n> : HexagonReg<n> { + let Num = num; + } + + // Rj - aliased integer registers + class Rj<string n, Ri R>: HexagonReg<n> { + let Num = R.Num; + let Aliases = [R]; + } + + def subreg_loreg : SubRegIndex<32>; + def subreg_hireg : SubRegIndex<32, 32>; + + // Integer registers. + def R0 : Ri< 0, "r0">, DwarfRegNum<[0]>; + def R1 : Ri< 1, "r1">, DwarfRegNum<[1]>; + def R2 : Ri< 2, "r2">, DwarfRegNum<[2]>; + def R3 : Ri< 3, "r3">, DwarfRegNum<[3]>; + def R4 : Ri< 4, "r4">, DwarfRegNum<[4]>; + def R5 : Ri< 5, "r5">, DwarfRegNum<[5]>; + def R6 : Ri< 6, "r6">, DwarfRegNum<[6]>; + def R7 : Ri< 7, "r7">, DwarfRegNum<[7]>; + def R8 : Ri< 8, "r8">, DwarfRegNum<[8]>; + def R9 : Ri< 9, "r9">, DwarfRegNum<[9]>; + def R10 : Ri<10, "r10">, DwarfRegNum<[10]>; + def R11 : Ri<11, "r11">, DwarfRegNum<[11]>; + def R12 : Ri<12, "r12">, DwarfRegNum<[12]>; + def R13 : Ri<13, "r13">, DwarfRegNum<[13]>; + def R14 : Ri<14, "r14">, DwarfRegNum<[14]>; + def R15 : Ri<15, "r15">, DwarfRegNum<[15]>; + def R16 : Ri<16, "r16">, DwarfRegNum<[16]>; + def R17 : Ri<17, "r17">, DwarfRegNum<[17]>; + def R18 : Ri<18, "r18">, DwarfRegNum<[18]>; + def R19 : Ri<19, "r19">, DwarfRegNum<[19]>; + def R20 : Ri<20, "r20">, DwarfRegNum<[20]>; + def R21 : Ri<21, "r21">, DwarfRegNum<[21]>; + def R22 : Ri<22, "r22">, DwarfRegNum<[22]>; + def R23 : Ri<23, "r23">, DwarfRegNum<[23]>; + def R24 : Ri<24, "r24">, DwarfRegNum<[24]>; + def R25 : Ri<25, "r25">, DwarfRegNum<[25]>; + def R26 : Ri<26, "r26">, DwarfRegNum<[26]>; + def R27 : Ri<27, "r27">, DwarfRegNum<[27]>; + def R28 : Ri<28, "r28">, DwarfRegNum<[28]>; + def R29 : Ri<29, "r29">, DwarfRegNum<[29]>; + def R30 : Ri<30, "r30">, DwarfRegNum<[30]>; + def R31 : Ri<31, "r31">, DwarfRegNum<[31]>; + + def SP : Rj<"sp", R29>, DwarfRegNum<[29]>; + def FP : Rj<"fp", R30>, DwarfRegNum<[30]>; + def LR : Rj<"lr", R31>, DwarfRegNum<[31]>; + + // Aliases of the R* registers used to hold 64-bit int values (doubles). + let SubRegIndices = [subreg_loreg, subreg_hireg], CoveredBySubRegs = 1 in { + def D0 : Rd< 0, "r1:0", [R0, R1]>, DwarfRegNum<[32]>; + def D1 : Rd< 2, "r3:2", [R2, R3]>, DwarfRegNum<[34]>; + def D2 : Rd< 4, "r5:4", [R4, R5]>, DwarfRegNum<[36]>; + def D3 : Rd< 6, "r7:6", [R6, R7]>, DwarfRegNum<[38]>; + def D4 : Rd< 8, "r9:8", [R8, R9]>, DwarfRegNum<[40]>; + def D5 : Rd<10, "r11:10", [R10, R11]>, DwarfRegNum<[42]>; + def D6 : Rd<12, "r13:12", [R12, R13]>, DwarfRegNum<[44]>; + def D7 : Rd<14, "r15:14", [R14, R15]>, DwarfRegNum<[46]>; + def D8 : Rd<16, "r17:16", [R16, R17]>, DwarfRegNum<[48]>; + def D9 : Rd<18, "r19:18", [R18, R19]>, DwarfRegNum<[50]>; + def D10 : Rd<20, "r21:20", [R20, R21]>, DwarfRegNum<[52]>; + def D11 : Rd<22, "r23:22", [R22, R23]>, DwarfRegNum<[54]>; + def D12 : Rd<24, "r25:24", [R24, R25]>, DwarfRegNum<[56]>; + def D13 : Rd<26, "r27:26", [R26, R27]>, DwarfRegNum<[58]>; + def D14 : Rd<28, "r29:28", [R28, R29]>, DwarfRegNum<[60]>; + def D15 : Rd<30, "r31:30", [R30, R31]>, DwarfRegNum<[62]>; + } + + // Predicate registers. + def P0 : Rp<0, "p0">, DwarfRegNum<[63]>; + def P1 : Rp<1, "p1">, DwarfRegNum<[64]>; + def P2 : Rp<2, "p2">, DwarfRegNum<[65]>; + def P3 : Rp<3, "p3">, DwarfRegNum<[66]>; + + // Control registers. + def SA0 : Rc<0, "sa0">, DwarfRegNum<[67]>; + def LC0 : Rc<1, "lc0">, DwarfRegNum<[68]>; + + def SA1 : Rc<2, "sa1">, DwarfRegNum<[69]>; + def LC1 : Rc<3, "lc1">, DwarfRegNum<[70]>; + + def M0 : Rc<6, "m0">, DwarfRegNum<[71]>; + def M1 : Rc<7, "m1">, DwarfRegNum<[72]>; + + def PC : Rc<9, "pc">, DwarfRegNum<[32]>; // is the Dwarf number correct? + def GP : Rc<11, "gp">, DwarfRegNum<[33]>; // is the Dwarf number correct? +} + +// Register classes. +// +// FIXME: the register order should be defined in terms of the preferred +// allocation order... +// +def IntRegs : RegisterClass<"Hexagon", [i32,f32], 32, + (add (sequence "R%u", 0, 9), + (sequence "R%u", 12, 28), + R10, R11, R29, R30, R31)> { +} + +def DoubleRegs : RegisterClass<"Hexagon", [i64,f64], 64, + (add (sequence "D%u", 0, 4), + (sequence "D%u", 6, 13), D5, D14, D15)>; + + +def PredRegs : RegisterClass<"Hexagon", [i1], 32, (add (sequence "P%u", 0, 3))> +{ + let Size = 32; +} + +def CRRegs : RegisterClass<"Hexagon", [i32], 32, + (add (sequence "LC%u", 0, 1), + (sequence "SA%u", 0, 1), + (sequence "M%u", 0, 1), PC, GP)> { + let Size = 32; +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp new file mode 100644 index 000000000000..44234e85dc69 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp @@ -0,0 +1,89 @@ +//===- HexagonRemoveExtendArgs.cpp - Remove unnecessary argument sign extends // +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Pass that removes sign extends for function parameters. These parameters +// are already sign extended by the caller per Hexagon's ABI +// +//===----------------------------------------------------------------------===// + +#include "Hexagon.h" +#include "HexagonTargetMachine.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Scalar.h" + +using namespace llvm; + +namespace llvm { + void initializeHexagonRemoveExtendArgsPass(PassRegistry&); +} + +namespace { + struct HexagonRemoveExtendArgs : public FunctionPass { + public: + static char ID; + HexagonRemoveExtendArgs() : FunctionPass(ID) { + initializeHexagonRemoveExtendArgsPass(*PassRegistry::getPassRegistry()); + } + virtual bool runOnFunction(Function &F); + + const char *getPassName() const { + return "Remove sign extends"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineFunctionAnalysis>(); + AU.addPreserved<MachineFunctionAnalysis>(); + FunctionPass::getAnalysisUsage(AU); + } + }; +} + +char HexagonRemoveExtendArgs::ID = 0; + +INITIALIZE_PASS(HexagonRemoveExtendArgs, "reargs", + "Remove Sign and Zero Extends for Args", false, false) + +bool HexagonRemoveExtendArgs::runOnFunction(Function &F) { + unsigned Idx = 1; + for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE; + ++AI, ++Idx) { + if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) { + Argument* Arg = AI; + if (!isa<PointerType>(Arg->getType())) { + for (Instruction::use_iterator UI = Arg->use_begin(); + UI != Arg->use_end();) { + if (isa<SExtInst>(*UI)) { + Instruction* Use = cast<Instruction>(*UI); + SExtInst* SI = new SExtInst(Arg, Use->getType()); + assert (EVT::getEVT(SI->getType()) == + (EVT::getEVT(Use->getType()))); + ++UI; + Use->replaceAllUsesWith(SI); + Instruction* First = F.getEntryBlock().begin(); + SI->insertBefore(First); + Use->eraseFromParent(); + } else { + ++UI; + } + } + } + } + } + return true; +} + + + +FunctionPass* +llvm::createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM) { + return new HexagonRemoveExtendArgs(); +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td b/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td new file mode 100644 index 000000000000..c2cfbb9710a6 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td @@ -0,0 +1,69 @@ +//===- HexagonSchedule.td - Hexagon Scheduling Definitions -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// Functional Units +def LSUNIT : FuncUnit; // SLOT0 +def LUNIT : FuncUnit; // SLOT1 +def MUNIT : FuncUnit; // SLOT2 +def SUNIT : FuncUnit; // SLOT3 +def LOOPUNIT : FuncUnit; + +// Itinerary classes +def ALU32 : InstrItinClass; +def ALU64 : InstrItinClass; +def CR : InstrItinClass; +def J : InstrItinClass; +def JR : InstrItinClass; +def LD : InstrItinClass; +def LD0 : InstrItinClass; +def M : InstrItinClass; +def ST : InstrItinClass; +def ST0 : InstrItinClass; +def S : InstrItinClass; +def SYS : InstrItinClass; +def ENDLOOP : InstrItinClass; +def PSEUDO : InstrItinClass; +def PSEUDOM : InstrItinClass; + +def HexagonItineraries : + ProcessorItineraries<[LSUNIT, LUNIT, MUNIT, SUNIT, LOOPUNIT], [], [ + InstrItinData<ALU32 , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]>, + InstrItinData<ALU64 , [InstrStage<1, [MUNIT, SUNIT]>]>, + InstrItinData<CR , [InstrStage<1, [SUNIT]>]>, + InstrItinData<J , [InstrStage<1, [SUNIT, MUNIT]>]>, + InstrItinData<JR , [InstrStage<1, [MUNIT]>]>, + InstrItinData<LD , [InstrStage<1, [LUNIT, LSUNIT]>]>, + InstrItinData<LD0 , [InstrStage<1, [LSUNIT]>]>, + InstrItinData<M , [InstrStage<1, [MUNIT, SUNIT]>]>, + InstrItinData<ST , [InstrStage<1, [LSUNIT]>]>, + InstrItinData<ST0 , [InstrStage<1, [LSUNIT]>]>, + InstrItinData<S , [InstrStage<1, [SUNIT, MUNIT]>]>, + InstrItinData<SYS , [InstrStage<1, [LSUNIT]>]>, + InstrItinData<ENDLOOP, [InstrStage<1, [LOOPUNIT]>]>, + InstrItinData<PSEUDO , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]>, + InstrItinData<PSEUDOM, [InstrStage<1, [MUNIT, SUNIT], 0>, + InstrStage<1, [MUNIT, SUNIT]>]> + ]>; + +def HexagonModel : SchedMachineModel { + // Max issue per cycle == bundle width. + let IssueWidth = 4; + let Itineraries = HexagonItineraries; + let LoadLatency = 1; +} + +//===----------------------------------------------------------------------===// +// V4 Machine Info + +//===----------------------------------------------------------------------===// + +include "HexagonScheduleV4.td" + +//===----------------------------------------------------------------------===// +// V4 Machine Info - +//===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td new file mode 100644 index 000000000000..ef72cf4068bf --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td @@ -0,0 +1,74 @@ +//=-HexagonScheduleV4.td - HexagonV4 Scheduling Definitions --*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// There are four SLOTS (four parallel pipelines) in Hexagon V4 machine. +// This file describes that machine information. + +// +// |===========|==================================================| +// | PIPELINE | Instruction Classes | +// |===========|==================================================| +// | SLOT0 | LD ST ALU32 MEMOP NV SYSTEM | +// |-----------|--------------------------------------------------| +// | SLOT1 | LD ST ALU32 | +// |-----------|--------------------------------------------------| +// | SLOT2 | XTYPE ALU32 J JR | +// |-----------|--------------------------------------------------| +// | SLOT3 | XTYPE ALU32 J CR | +// |===========|==================================================| + +// Functional Units. +def SLOT0 : FuncUnit; +def SLOT1 : FuncUnit; +def SLOT2 : FuncUnit; +def SLOT3 : FuncUnit; +// Endloop is a pseudo instruction that is encoded with 2 bits in a packet +// rather than taking an execution slot. This special unit is needed +// to schedule an ENDLOOP with 4 other instructions. +def SLOT_ENDLOOP: FuncUnit; + +// Itinerary classes. +def NV_V4 : InstrItinClass; +def MEM_V4 : InstrItinClass; +// ALU64/M/S Instruction classes of V2 are collectively knownn as XTYPE in V4. +def PREFIX : InstrItinClass; + +def HexagonItinerariesV4 : + ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP], [], [ + InstrItinData<ALU32 , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<ALU64 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<CR , [InstrStage<1, [SLOT3]>]>, + InstrItinData<J , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<JR , [InstrStage<1, [SLOT2]>]>, + InstrItinData<LD , [InstrStage<1, [SLOT0, SLOT1]>]>, + InstrItinData<LD0 , [InstrStage<1, [SLOT0]>]>, + InstrItinData<M , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<ST , [InstrStage<1, [SLOT0, SLOT1]>]>, + InstrItinData<ST0 , [InstrStage<1, [SLOT0]>]>, + InstrItinData<S , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<SYS , [InstrStage<1, [SLOT0]>]>, + InstrItinData<NV_V4 , [InstrStage<1, [SLOT0]>]>, + InstrItinData<MEM_V4 , [InstrStage<1, [SLOT0]>]>, + InstrItinData<ENDLOOP, [InstrStage<1, [SLOT_ENDLOOP]>]>, + InstrItinData<PREFIX , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<PSEUDO , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<PSEUDOM, [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [SLOT2, SLOT3]>]> + ]>; + +def HexagonModelV4 : SchedMachineModel { + // Max issue per cycle == bundle width. + let IssueWidth = 4; + let Itineraries = HexagonItinerariesV4; + let LoadLatency = 1; +} + +//===----------------------------------------------------------------------===// +// Hexagon V4 Resource Definitions - +//===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSelectCCInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonSelectCCInfo.td new file mode 100644 index 000000000000..d8feb89c0ab5 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonSelectCCInfo.td @@ -0,0 +1,121 @@ +//===-- HexagoSelectCCInfo.td - Selectcc mappings ----------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + +// +// selectcc mappings. +// +def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETEQ)), + (i32 (MUX_rr (i1 (CMPEQrr IntRegs:$lhs, IntRegs:$rhs)), + IntRegs:$tval, IntRegs:$fval))>; + +def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETNE)), + (i32 (MUX_rr (i1 (NOT_p (CMPEQrr IntRegs:$lhs, IntRegs:$rhs))), + IntRegs:$tval, IntRegs:$fval))>; + +def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETGT)), + (i32 (MUX_rr (i1 (CMPGTrr IntRegs:$lhs, IntRegs:$rhs)), + IntRegs:$tval, IntRegs:$fval))>; + +def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETUGT)), + (i32 (MUX_rr (i1 (CMPGTUrr IntRegs:$lhs, IntRegs:$rhs)), + IntRegs:$tval, IntRegs:$fval))>; + + + +def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETULT)), + (i32 (MUX_rr (i1 (NOT_p (CMPGTUrr IntRegs:$lhs, + (ADD_ri IntRegs:$rhs, -1)))), + IntRegs:$tval, IntRegs:$fval))>; + +def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETLT)), + (i32 (MUX_rr (i1 (NOT_p (CMPGTrr IntRegs:$lhs, + (ADD_ri IntRegs:$rhs, -1)))), + IntRegs:$tval, IntRegs:$fval))>; + +def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETLE)), + (i32 (MUX_rr (i1 (NOT_p (CMPGTrr IntRegs:$lhs, IntRegs:$rhs))), + IntRegs:$tval, IntRegs:$fval))>; + +def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETULE)), + (i32 (MUX_rr (i1 (NOT_p (CMPGTUrr IntRegs:$lhs, IntRegs:$rhs))), + IntRegs:$tval, IntRegs:$fval))>; + + +// +// selectcc mappings for greater-equal-to Rs => greater-than Rs-1. +// +def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETGE)), + (i32 (MUX_rr (i1 (CMPGTrr IntRegs:$lhs, (ADD_ri IntRegs:$rhs, -1))), + IntRegs:$tval, IntRegs:$fval))>; + +def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETUGE)), + (i32 (MUX_rr (i1 (CMPGTUrr IntRegs:$lhs, (ADD_ri IntRegs:$rhs, -1))), + IntRegs:$tval, IntRegs:$fval))>; + + + +// +// selectcc mappings for predicate comparisons. +// +// Convert Rd = selectcc(p0, p1, true_val, false_val, SETEQ) into: +// pt = not(p1 xor p2) +// Rd = mux(pt, true_val, false_val) +// and similarly for SETNE +// +def : Pat <(i32 (selectcc PredRegs:$lhs, PredRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETNE)), + (i32 (MUX_rr (i1 (XOR_pp PredRegs:$lhs, PredRegs:$rhs)), IntRegs:$tval, + IntRegs:$fval))>; + +def : Pat <(i32 (selectcc PredRegs:$lhs, PredRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETEQ)), + (i32 (MUX_rr (i1 (NOT_p (XOR_pp PredRegs:$lhs, PredRegs:$rhs))), + IntRegs:$tval, IntRegs:$fval))>; + + +// +// selectcc mappings for 64-bit operands are messy. Hexagon does not have a +// MUX64 o, use this: +// selectcc(Rss, Rdd, tval, fval, cond) -> +// combine(mux(cmp_cond(Rss, Rdd), tval.hi, fval.hi), +// mux(cmp_cond(Rss, Rdd), tval.lo, fval.lo)) + +// setgt-64. +def : Pat<(i64 (selectcc DoubleRegs:$lhs, DoubleRegs:$rhs, DoubleRegs:$tval, + DoubleRegs:$fval, SETGT)), + (COMBINE_rr (MUX_rr (CMPGT64rr DoubleRegs:$lhs, DoubleRegs:$rhs), + (EXTRACT_SUBREG DoubleRegs:$tval, subreg_hireg), + (EXTRACT_SUBREG DoubleRegs:$fval, subreg_hireg)), + (MUX_rr (CMPGT64rr DoubleRegs:$lhs, DoubleRegs:$rhs), + (EXTRACT_SUBREG DoubleRegs:$tval, subreg_loreg), + (EXTRACT_SUBREG DoubleRegs:$fval, subreg_loreg)))>; + + +// setlt-64 -> setgt-64. +def : Pat<(i64 (selectcc DoubleRegs:$lhs, DoubleRegs:$rhs, DoubleRegs:$tval, + DoubleRegs:$fval, SETLT)), + (COMBINE_rr (MUX_rr (CMPGT64rr DoubleRegs:$lhs, + (ADD64_rr DoubleRegs:$rhs, (TFRI64 -1))), + (EXTRACT_SUBREG DoubleRegs:$tval, subreg_hireg), + (EXTRACT_SUBREG DoubleRegs:$fval, subreg_hireg)), + (MUX_rr (CMPGT64rr DoubleRegs:$lhs, + (ADD64_rr DoubleRegs:$rhs, (TFRI64 -1))), + (EXTRACT_SUBREG DoubleRegs:$tval, subreg_loreg), + (EXTRACT_SUBREG DoubleRegs:$fval, subreg_loreg)))>; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp new file mode 100644 index 000000000000..c37bf9f0e800 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp @@ -0,0 +1,46 @@ +//===-- HexagonSelectionDAGInfo.cpp - Hexagon SelectionDAG Info -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the HexagonSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "hexagon-selectiondag-info" +#include "HexagonTargetMachine.h" +using namespace llvm; + +bool llvm::flag_aligned_memcpy; + +HexagonSelectionDAGInfo::HexagonSelectionDAGInfo(const HexagonTargetMachine + &TM) + : TargetSelectionDAGInfo(TM) { +} + +HexagonSelectionDAGInfo::~HexagonSelectionDAGInfo() { +} + +SDValue +HexagonSelectionDAGInfo:: +EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain, + SDValue Dst, SDValue Src, SDValue Size, unsigned Align, + bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const { + flag_aligned_memcpy = false; + if ((Align & 0x3) == 0) { + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (ConstantSize) { + uint64_t SizeVal = ConstantSize->getZExtValue(); + if ((SizeVal > 32) && ((SizeVal % 8) == 0)) + flag_aligned_memcpy = true; + } + } + + return SDValue(); +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h new file mode 100644 index 000000000000..31f278a18574 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h @@ -0,0 +1,40 @@ +//===-- HexagonSelectionDAGInfo.h - Hexagon SelectionDAG Info ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Hexagon subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef HexagonSELECTIONDAGINFO_H +#define HexagonSELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class HexagonTargetMachine; + +class HexagonSelectionDAGInfo : public TargetSelectionDAGInfo { +public: + explicit HexagonSelectionDAGInfo(const HexagonTargetMachine &TM); + ~HexagonSelectionDAGInfo(); + + virtual + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const; +}; + +} + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp new file mode 100644 index 000000000000..5166f8e1748c --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp @@ -0,0 +1,174 @@ +//=== HexagonSplitConst32AndConst64.cpp - split CONST32/Const64 into HI/LO ===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// When the compiler is invoked with no small data, for instance, with the -G0 +// command line option, then all CONST32_* opcodes should be broken down into +// appropriate LO and HI instructions. This splitting is done by this pass. +// The only reason this is not done in the DAG lowering itself is that there +// is no simple way of getting the register allocator to allot the same hard +// register to the result of LO and HI instructions. This pass is always +// scheduled after register allocation. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "xfer" + +#include "HexagonTargetMachine.h" +#include "HexagonSubtarget.h" +#include "HexagonMachineFunctionInfo.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LatencyPriorityQueue.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/MathExtras.h" +#include <map> + +using namespace llvm; + +namespace { + +class HexagonSplitConst32AndConst64 : public MachineFunctionPass { + const HexagonTargetMachine& QTM; + const HexagonSubtarget &QST; + + public: + static char ID; + HexagonSplitConst32AndConst64(const HexagonTargetMachine& TM) + : MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {} + + const char *getPassName() const { + return "Hexagon Split Const32s and Const64s"; + } + bool runOnMachineFunction(MachineFunction &Fn); +}; + + +char HexagonSplitConst32AndConst64::ID = 0; + + +bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) { + + const TargetInstrInfo *TII = QTM.getInstrInfo(); + + // Loop over all of the basic blocks + for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end(); + MBBb != MBBe; ++MBBb) { + MachineBasicBlock* MBB = MBBb; + // Traverse the basic block + MachineBasicBlock::iterator MII = MBB->begin(); + MachineBasicBlock::iterator MIE = MBB->end (); + while (MII != MIE) { + MachineInstr *MI = MII; + int Opc = MI->getOpcode(); + if (Opc == Hexagon::CONST32_set) { + int DestReg = MI->getOperand(0).getReg(); + MachineOperand &Symbol = MI->getOperand (1); + + BuildMI (*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::LO), DestReg).addOperand(Symbol); + BuildMI (*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::HI), DestReg).addOperand(Symbol); + // MBB->erase returns the iterator to the next instruction, which is the + // one we want to process next + MII = MBB->erase (MI); + continue; + } + else if (Opc == Hexagon::CONST32_set_jt) { + int DestReg = MI->getOperand(0).getReg(); + MachineOperand &Symbol = MI->getOperand (1); + + BuildMI (*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::LO_jt), DestReg).addOperand(Symbol); + BuildMI (*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::HI_jt), DestReg).addOperand(Symbol); + // MBB->erase returns the iterator to the next instruction, which is the + // one we want to process next + MII = MBB->erase (MI); + continue; + } + else if (Opc == Hexagon::CONST32_Label) { + int DestReg = MI->getOperand(0).getReg(); + MachineOperand &Symbol = MI->getOperand (1); + + BuildMI (*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::LO_label), DestReg).addOperand(Symbol); + BuildMI (*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::HI_label), DestReg).addOperand(Symbol); + // MBB->erase returns the iterator to the next instruction, which is the + // one we want to process next + MII = MBB->erase (MI); + continue; + } + else if (Opc == Hexagon::CONST32_Int_Real) { + int DestReg = MI->getOperand(0).getReg(); + int64_t ImmValue = MI->getOperand(1).getImm (); + + BuildMI (*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::LOi), DestReg).addImm(ImmValue); + BuildMI (*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::HIi), DestReg).addImm(ImmValue); + MII = MBB->erase (MI); + continue; + } + else if (Opc == Hexagon::CONST64_Int_Real) { + int DestReg = MI->getOperand(0).getReg(); + int64_t ImmValue = MI->getOperand(1).getImm (); + unsigned DestLo = + QTM.getRegisterInfo()->getSubReg (DestReg, Hexagon::subreg_loreg); + unsigned DestHi = + QTM.getRegisterInfo()->getSubReg (DestReg, Hexagon::subreg_hireg); + + int32_t LowWord = (ImmValue & 0xFFFFFFFF); + int32_t HighWord = (ImmValue >> 32) & 0xFFFFFFFF; + + // Lower Registers Lower Half + BuildMI (*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::LOi), DestLo).addImm(LowWord); + // Lower Registers Higher Half + BuildMI (*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::HIi), DestLo).addImm(LowWord); + // Higher Registers Lower Half + BuildMI (*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::LOi), DestHi).addImm(HighWord); + // Higher Registers Higher Half. + BuildMI (*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::HIi), DestHi).addImm(HighWord); + MII = MBB->erase (MI); + continue; + } + ++MII; + } + } + + return true; +} + +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +FunctionPass * +llvm::createHexagonSplitConst32AndConst64(const HexagonTargetMachine &TM) { + return new HexagonSplitConst32AndConst64(TM); +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp new file mode 100644 index 000000000000..8608e082f9df --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp @@ -0,0 +1,235 @@ +//===-- HexagonSplitTFRCondSets.cpp - split TFR condsets into xfers -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// +//===----------------------------------------------------------------------===// +// This pass tries to provide opportunities for better optimization of muxes. +// The default code generated for something like: flag = (a == b) ? 1 : 3; +// would be: +// +// {p0 = cmp.eq(r0,r1)} +// {r3 = mux(p0,#1,#3)} +// +// This requires two packets. If we use .new predicated immediate transfers, +// then we can do this in a single packet, e.g.: +// +// {p0 = cmp.eq(r0,r1) +// if (p0.new) r3 = #1 +// if (!p0.new) r3 = #3} +// +// Note that the conditional assignments are not generated in .new form here. +// We assume opptimisically that they will be formed later. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "xfer" +#include "Hexagon.h" +#include "HexagonMachineFunctionInfo.h" +#include "HexagonSubtarget.h" +#include "HexagonTargetMachine.h" +#include "llvm/CodeGen/LatencyPriorityQueue.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +namespace llvm { + void initializeHexagonSplitTFRCondSetsPass(PassRegistry&); +} + + +namespace { + +class HexagonSplitTFRCondSets : public MachineFunctionPass { + const HexagonTargetMachine &QTM; + const HexagonSubtarget &QST; + + public: + static char ID; + HexagonSplitTFRCondSets(const HexagonTargetMachine& TM) : + MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) { + initializeHexagonSplitTFRCondSetsPass(*PassRegistry::getPassRegistry()); + } + + const char *getPassName() const { + return "Hexagon Split TFRCondSets"; + } + bool runOnMachineFunction(MachineFunction &Fn); +}; + + +char HexagonSplitTFRCondSets::ID = 0; + + +bool HexagonSplitTFRCondSets::runOnMachineFunction(MachineFunction &Fn) { + + const TargetInstrInfo *TII = QTM.getInstrInfo(); + + // Loop over all of the basic blocks. + for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end(); + MBBb != MBBe; ++MBBb) { + MachineBasicBlock* MBB = MBBb; + // Traverse the basic block. + for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end(); + ++MII) { + MachineInstr *MI = MII; + int Opc1, Opc2; + switch(MI->getOpcode()) { + case Hexagon::TFR_condset_rr: + case Hexagon::TFR_condset_rr_f: + case Hexagon::TFR_condset_rr64_f: { + int DestReg = MI->getOperand(0).getReg(); + int SrcReg1 = MI->getOperand(2).getReg(); + int SrcReg2 = MI->getOperand(3).getReg(); + + if (MI->getOpcode() == Hexagon::TFR_condset_rr || + MI->getOpcode() == Hexagon::TFR_condset_rr_f) { + Opc1 = Hexagon::TFR_cPt; + Opc2 = Hexagon::TFR_cNotPt; + } + else if (MI->getOpcode() == Hexagon::TFR_condset_rr64_f) { + Opc1 = Hexagon::TFR64_cPt; + Opc2 = Hexagon::TFR64_cNotPt; + } + + // Minor optimization: do not emit the predicated copy if the source + // and the destination is the same register. + if (DestReg != SrcReg1) { + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Opc1), + DestReg).addReg(MI->getOperand(1).getReg()).addReg(SrcReg1); + } + if (DestReg != SrcReg2) { + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Opc2), + DestReg).addReg(MI->getOperand(1).getReg()).addReg(SrcReg2); + } + MII = MBB->erase(MI); + --MII; + break; + } + case Hexagon::TFR_condset_ri: + case Hexagon::TFR_condset_ri_f: { + int DestReg = MI->getOperand(0).getReg(); + int SrcReg1 = MI->getOperand(2).getReg(); + + // Do not emit the predicated copy if the source and the destination + // is the same register. + if (DestReg != SrcReg1) { + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::TFR_cPt), DestReg). + addReg(MI->getOperand(1).getReg()).addReg(SrcReg1); + } + if (MI->getOpcode() == Hexagon::TFR_condset_ri ) { + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::TFRI_cNotPt), DestReg). + addReg(MI->getOperand(1).getReg()). + addImm(MI->getOperand(3).getImm()); + } else if (MI->getOpcode() == Hexagon::TFR_condset_ri_f ) { + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::TFRI_cNotPt_f), DestReg). + addReg(MI->getOperand(1).getReg()). + addFPImm(MI->getOperand(3).getFPImm()); + } + + MII = MBB->erase(MI); + --MII; + break; + } + case Hexagon::TFR_condset_ir: + case Hexagon::TFR_condset_ir_f: { + int DestReg = MI->getOperand(0).getReg(); + int SrcReg2 = MI->getOperand(3).getReg(); + + if (MI->getOpcode() == Hexagon::TFR_condset_ir ) { + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::TFRI_cPt), DestReg). + addReg(MI->getOperand(1).getReg()). + addImm(MI->getOperand(2).getImm()); + } else if (MI->getOpcode() == Hexagon::TFR_condset_ir_f ) { + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::TFRI_cPt_f), DestReg). + addReg(MI->getOperand(1).getReg()). + addFPImm(MI->getOperand(2).getFPImm()); + } + + // Do not emit the predicated copy if the source and + // the destination is the same register. + if (DestReg != SrcReg2) { + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::TFR_cNotPt), DestReg). + addReg(MI->getOperand(1).getReg()).addReg(SrcReg2); + } + MII = MBB->erase(MI); + --MII; + break; + } + case Hexagon::TFR_condset_ii: + case Hexagon::TFR_condset_ii_f: { + int DestReg = MI->getOperand(0).getReg(); + int SrcReg1 = MI->getOperand(1).getReg(); + + if (MI->getOpcode() == Hexagon::TFR_condset_ii ) { + int Immed1 = MI->getOperand(2).getImm(); + int Immed2 = MI->getOperand(3).getImm(); + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::TFRI_cPt), + DestReg).addReg(SrcReg1).addImm(Immed1); + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::TFRI_cNotPt), + DestReg).addReg(SrcReg1).addImm(Immed2); + } else if (MI->getOpcode() == Hexagon::TFR_condset_ii_f ) { + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::TFRI_cPt_f), DestReg). + addReg(SrcReg1). + addFPImm(MI->getOperand(2).getFPImm()); + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::TFRI_cNotPt_f), DestReg). + addReg(SrcReg1). + addFPImm(MI->getOperand(3).getFPImm()); + } + MII = MBB->erase(MI); + --MII; + break; + } + } + } + } + return true; +} + +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +static void initializePassOnce(PassRegistry &Registry) { + const char *Name = "Hexagon Split TFRCondSets"; + PassInfo *PI = new PassInfo(Name, "hexagon-split-tfr", + &HexagonSplitTFRCondSets::ID, 0, false, false); + Registry.registerPass(*PI, true); +} + +void llvm::initializeHexagonSplitTFRCondSetsPass(PassRegistry &Registry) { + CALL_ONCE_INITIALIZATION(initializePassOnce) +} + +FunctionPass* +llvm::createHexagonSplitTFRCondSets(const HexagonTargetMachine &TM) { + return new HexagonSplitTFRCondSets(TM); +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp new file mode 100644 index 000000000000..fca67073ef38 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -0,0 +1,90 @@ +//===-- HexagonSubtarget.cpp - Hexagon Subtarget Information --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Hexagon specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#include "HexagonSubtarget.h" +#include "Hexagon.h" +#include "HexagonRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +using namespace llvm; + +#define GET_SUBTARGETINFO_CTOR +#define GET_SUBTARGETINFO_TARGET_DESC +#include "HexagonGenSubtargetInfo.inc" + +static cl::opt<bool> +EnableV3("enable-hexagon-v3", cl::Hidden, + cl::desc("Enable Hexagon V3 instructions.")); + +static cl::opt<bool> +EnableMemOps( + "enable-hexagon-memops", + cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(true), + cl::desc( + "Generate V4 MEMOP in code generation for Hexagon target")); + +static cl::opt<bool> +DisableMemOps( + "disable-hexagon-memops", + cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(false), + cl::desc( + "Do not generate V4 MEMOP in code generation for Hexagon target")); + +static cl::opt<bool> +EnableIEEERndNear( + "enable-hexagon-ieee-rnd-near", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Generate non-chopped conversion from fp to int.")); + +HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS): + HexagonGenSubtargetInfo(TT, CPU, FS), + CPUString(CPU.str()) { + + // If the programmer has not specified a Hexagon version, default to -mv4. + if (CPUString.empty()) + CPUString = "hexagonv4"; + + if (CPUString == "hexagonv2") { + HexagonArchVersion = V2; + } else if (CPUString == "hexagonv3") { + EnableV3 = true; + HexagonArchVersion = V3; + } else if (CPUString == "hexagonv4") { + HexagonArchVersion = V4; + } else if (CPUString == "hexagonv5") { + HexagonArchVersion = V5; + } else { + llvm_unreachable("Unrecognized Hexagon processor version"); + } + + ParseSubtargetFeatures(CPUString, FS); + + // Initialize scheduling itinerary for the specified CPU. + InstrItins = getInstrItineraryForCPU(CPUString); + + // UseMemOps on by default unless disabled explicitly + if (DisableMemOps) + UseMemOps = false; + else if (EnableMemOps) + UseMemOps = true; + else + UseMemOps = false; + + if (EnableIEEERndNear) + ModeIEEERndNear = true; + else + ModeIEEERndNear = false; +} + +// Pin the vtable to this file. +void HexagonSubtarget::anchor() {} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h new file mode 100644 index 000000000000..690bef0d7296 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h @@ -0,0 +1,80 @@ +//===-- HexagonSubtarget.h - Define Subtarget for the Hexagon ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the Hexagon specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#ifndef Hexagon_SUBTARGET_H +#define Hexagon_SUBTARGET_H + +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <string> + +#define GET_SUBTARGETINFO_HEADER +#include "HexagonGenSubtargetInfo.inc" + +#define Hexagon_SMALL_DATA_THRESHOLD 8 +#define Hexagon_SLOTS 4 + +namespace llvm { + +class HexagonSubtarget : public HexagonGenSubtargetInfo { + virtual void anchor(); + bool UseMemOps; + bool ModeIEEERndNear; + +public: + enum HexagonArchEnum { + V1, V2, V3, V4, V5 + }; + + HexagonArchEnum HexagonArchVersion; + std::string CPUString; + InstrItineraryData InstrItins; + +public: + HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS); + + /// getInstrItins - Return the instruction itineraies based on subtarget + /// selection. + const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } + + + /// ParseSubtargetFeatures - Parses features string setting specified + /// subtarget options. Definition of function is auto generated by tblgen. + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + + bool hasV2TOps () const { return HexagonArchVersion >= V2; } + bool hasV2TOpsOnly () const { return HexagonArchVersion == V2; } + bool hasV3TOps () const { return HexagonArchVersion >= V3; } + bool hasV3TOpsOnly () const { return HexagonArchVersion == V3; } + bool hasV4TOps () const { return HexagonArchVersion >= V4; } + bool hasV4TOpsOnly () const { return HexagonArchVersion == V4; } + bool useMemOps () const { return HexagonArchVersion >= V4 && UseMemOps; } + bool hasV5TOps () const { return HexagonArchVersion >= V5; } + bool hasV5TOpsOnly () const { return HexagonArchVersion == V5; } + bool modeIEEERndNear () const { return ModeIEEERndNear; } + + bool isSubtargetV2() const { return HexagonArchVersion == V2;} + const std::string &getCPUString () const { return CPUString; } + + // Threshold for small data section + unsigned getSmallDataThreshold() const { + return Hexagon_SMALL_DATA_THRESHOLD; + } + const HexagonArchEnum &getHexagonArchVersion() const { + return HexagonArchVersion; + } +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp new file mode 100644 index 000000000000..bb950a0ea75a --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -0,0 +1,204 @@ +//===-- HexagonTargetMachine.cpp - Define TargetMachine for Hexagon -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements the info about Hexagon target spec. +// +//===----------------------------------------------------------------------===// + +#include "HexagonTargetMachine.h" +#include "Hexagon.h" +#include "HexagonISelLowering.h" +#include "HexagonMachineScheduler.h" +#include "HexagonTargetObjectFile.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Module.h" +#include "llvm/PassManager.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/Scalar.h" + +using namespace llvm; + +static cl:: opt<bool> DisableHardwareLoops("disable-hexagon-hwloops", + cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target")); + +static cl::opt<bool> DisableHexagonMISched("disable-hexagon-misched", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Disable Hexagon MI Scheduling")); + +static cl::opt<bool> DisableHexagonCFGOpt("disable-hexagon-cfgopt", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Disable Hexagon CFG Optimization")); + + +/// HexagonTargetMachineModule - Note that this is used on hosts that +/// cannot link in a library unless there are references into the +/// library. In particular, it seems that it is not possible to get +/// things to work on Win32 without this. Though it is unused, do not +/// remove it. +extern "C" int HexagonTargetMachineModule; +int HexagonTargetMachineModule = 0; + +extern "C" void LLVMInitializeHexagonTarget() { + // Register the target. + RegisterTargetMachine<HexagonTargetMachine> X(TheHexagonTarget); +} + +static ScheduleDAGInstrs *createVLIWMachineSched(MachineSchedContext *C) { + return new VLIWMachineScheduler(C, new ConvergingVLIWScheduler()); +} + +static MachineSchedRegistry +SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler", + createVLIWMachineSched); + +/// HexagonTargetMachine ctor - Create an ILP32 architecture model. +/// + +/// Hexagon_TODO: Do I need an aggregate alignment? +/// +HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, + CodeModel::Model CM, + CodeGenOpt::Level OL) + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + DL("e-p:32:32:32-" + "i64:64:64-i32:32:32-i16:16:16-i1:32:32-" + "f64:64:64-f32:32:32-a0:0-n32") , + Subtarget(TT, CPU, FS), InstrInfo(Subtarget), TLInfo(*this), + TSInfo(*this), + FrameLowering(Subtarget), + InstrItins(&Subtarget.getInstrItineraryData()) { + setMCUseCFI(false); + initAsmInfo(); +} + +// addPassesForOptimizations - Allow the backend (target) to add Target +// Independent Optimization passes to the Pass Manager. +bool HexagonTargetMachine::addPassesForOptimizations(PassManagerBase &PM) { + if (getOptLevel() != CodeGenOpt::None) { + PM.add(createConstantPropagationPass()); + PM.add(createLoopSimplifyPass()); + PM.add(createDeadCodeEliminationPass()); + PM.add(createConstantPropagationPass()); + PM.add(createLoopUnrollPass()); + PM.add(createLoopStrengthReducePass()); + } + return true; +} + +namespace { +/// Hexagon Code Generator Pass Configuration Options. +class HexagonPassConfig : public TargetPassConfig { +public: + HexagonPassConfig(HexagonTargetMachine *TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) { + // FIXME: Rather than calling enablePass(&MachineSchedulerID) below, define + // HexagonSubtarget::enableMachineScheduler() { return true; }. + // That will bypass the SelectionDAG VLIW scheduler, which is probably just + // hurting compile time and will be removed eventually anyway. + if (DisableHexagonMISched) + disablePass(&MachineSchedulerID); + else + enablePass(&MachineSchedulerID); + } + + HexagonTargetMachine &getHexagonTargetMachine() const { + return getTM<HexagonTargetMachine>(); + } + + virtual ScheduleDAGInstrs * + createMachineScheduler(MachineSchedContext *C) const { + return createVLIWMachineSched(C); + } + + virtual bool addInstSelector(); + virtual bool addPreRegAlloc(); + virtual bool addPostRegAlloc(); + virtual bool addPreSched2(); + virtual bool addPreEmitPass(); +}; +} // namespace + +TargetPassConfig *HexagonTargetMachine::createPassConfig(PassManagerBase &PM) { + return new HexagonPassConfig(this, PM); +} + +bool HexagonPassConfig::addInstSelector() { + HexagonTargetMachine &TM = getHexagonTargetMachine(); + bool NoOpt = (getOptLevel() == CodeGenOpt::None); + + if (!NoOpt) + addPass(createHexagonRemoveExtendArgs(TM)); + + addPass(createHexagonISelDag(TM, getOptLevel())); + + if (!NoOpt) { + addPass(createHexagonPeephole()); + printAndVerify("After hexagon peephole pass"); + } + + return false; +} + +bool HexagonPassConfig::addPreRegAlloc() { + if (getOptLevel() != CodeGenOpt::None) + if (!DisableHardwareLoops) + addPass(createHexagonHardwareLoops()); + return false; +} + +bool HexagonPassConfig::addPostRegAlloc() { + const HexagonTargetMachine &TM = getHexagonTargetMachine(); + if (getOptLevel() != CodeGenOpt::None) + if (!DisableHexagonCFGOpt) + addPass(createHexagonCFGOptimizer(TM)); + return false; +} + +bool HexagonPassConfig::addPreSched2() { + const HexagonTargetMachine &TM = getHexagonTargetMachine(); + const HexagonTargetObjectFile &TLOF = + (const HexagonTargetObjectFile &)getTargetLowering()->getObjFileLowering(); + + addPass(createHexagonCopyToCombine()); + if (getOptLevel() != CodeGenOpt::None) + addPass(&IfConverterID); + if (!TLOF.IsSmallDataEnabled()) { + addPass(createHexagonSplitConst32AndConst64(TM)); + printAndVerify("After hexagon split const32/64 pass"); + } + return true; +} + +bool HexagonPassConfig::addPreEmitPass() { + const HexagonTargetMachine &TM = getHexagonTargetMachine(); + bool NoOpt = (getOptLevel() == CodeGenOpt::None); + + if (!NoOpt) + addPass(createHexagonNewValueJump()); + + // Expand Spill code for predicate registers. + addPass(createHexagonExpandPredSpillCode(TM)); + + // Split up TFRcondsets into conditional transfers. + addPass(createHexagonSplitTFRCondSets(TM)); + + // Create Packets. + if (!NoOpt) { + if (!DisableHardwareLoops) + addPass(createHexagonFixupHwLoops()); + addPass(createHexagonPacketizer()); + } + + return false; +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h new file mode 100644 index 000000000000..cf8f9aa3612f --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h @@ -0,0 +1,83 @@ +//=-- HexagonTargetMachine.h - Define TargetMachine for Hexagon ---*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the Hexagon specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef HexagonTARGETMACHINE_H +#define HexagonTARGETMACHINE_H + +#include "HexagonFrameLowering.h" +#include "HexagonISelLowering.h" +#include "HexagonInstrInfo.h" +#include "HexagonSelectionDAGInfo.h" +#include "HexagonSubtarget.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + +class Module; + +class HexagonTargetMachine : public LLVMTargetMachine { + const DataLayout DL; // Calculates type size & alignment. + HexagonSubtarget Subtarget; + HexagonInstrInfo InstrInfo; + HexagonTargetLowering TLInfo; + HexagonSelectionDAGInfo TSInfo; + HexagonFrameLowering FrameLowering; + const InstrItineraryData* InstrItins; + +public: + HexagonTargetMachine(const Target &T, StringRef TT,StringRef CPU, + StringRef FS, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); + + virtual const HexagonInstrInfo *getInstrInfo() const { + return &InstrInfo; + } + virtual const HexagonSubtarget *getSubtargetImpl() const { + return &Subtarget; + } + virtual const HexagonRegisterInfo *getRegisterInfo() const { + return &InstrInfo.getRegisterInfo(); + } + + virtual const InstrItineraryData* getInstrItineraryData() const { + return InstrItins; + } + + + virtual const HexagonTargetLowering* getTargetLowering() const { + return &TLInfo; + } + + virtual const HexagonFrameLowering* getFrameLowering() const { + return &FrameLowering; + } + + virtual const HexagonSelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } + + virtual const DataLayout *getDataLayout() const { return &DL; } + static unsigned getModuleMatchQuality(const Module &M); + + // Pass Pipeline Configuration. + virtual bool addPassesForOptimizations(PassManagerBase &PM); + virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); +}; + +extern bool flag_aligned_memcpy; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp new file mode 100644 index 000000000000..7773cff2d21a --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp @@ -0,0 +1,100 @@ +//===-- HexagonTargetObjectFile.cpp - Hexagon asm properties --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the HexagonTargetAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "HexagonTargetObjectFile.h" +#include "HexagonSubtarget.h" +#include "HexagonTargetMachine.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/MC/MCContext.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ELF.h" + +using namespace llvm; + +static cl::opt<int> SmallDataThreshold("hexagon-small-data-threshold", + cl::init(8), cl::Hidden, + cl::desc("The maximum size of an object in the sdata section")); + +void HexagonTargetObjectFile::Initialize(MCContext &Ctx, + const TargetMachine &TM) { + TargetLoweringObjectFileELF::Initialize(Ctx, TM); + + + SmallDataSection = + getContext().getELFSection(".sdata", ELF::SHT_PROGBITS, + ELF::SHF_WRITE | ELF::SHF_ALLOC, + SectionKind::getDataRel()); + SmallBSSSection = + getContext().getELFSection(".sbss", ELF::SHT_NOBITS, + ELF::SHF_WRITE | ELF::SHF_ALLOC, + SectionKind::getBSS()); +} + +// sdata/sbss support taken largely from the MIPS Backend. +static bool IsInSmallSection(uint64_t Size) { + return Size > 0 && Size <= (uint64_t)SmallDataThreshold; +} + +bool HexagonTargetObjectFile::IsSmallDataEnabled () const { + return SmallDataThreshold > 0; +} + +/// IsGlobalInSmallSection - Return true if this global value should be +/// placed into small data/bss section. +bool HexagonTargetObjectFile::IsGlobalInSmallSection(const GlobalValue *GV, + const TargetMachine &TM) const { + // If the primary definition of this global value is outside the current + // translation unit or the global value is available for inspection but not + // emission, then do nothing. + if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage()) + return false; + + // Otherwise, Check if GV should be in sdata/sbss, when normally it would end + // up in getKindForGlobal(GV, TM). + return IsGlobalInSmallSection(GV, TM, getKindForGlobal(GV, TM)); +} + +/// IsGlobalInSmallSection - Return true if this global value should be +/// placed into small data/bss section. +bool HexagonTargetObjectFile:: +IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM, + SectionKind Kind) const { + // Only global variables, not functions. + const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV); + if (!GVA) + return false; + + if (Kind.isBSS() || Kind.isDataNoRel() || Kind.isCommon()) { + Type *Ty = GV->getType()->getElementType(); + return IsInSmallSection(TM.getDataLayout()->getTypeAllocSize(Ty)); + } + + return false; +} + +const MCSection *HexagonTargetObjectFile:: +SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + + // Handle Small Section classification here. + if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind)) + return SmallBSSSection; + if (Kind.isDataNoRel() && IsGlobalInSmallSection(GV, TM, Kind)) + return SmallDataSection; + + // Otherwise, we work the same as ELF. + return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang,TM); +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h new file mode 100644 index 000000000000..41f6792ca8e5 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h @@ -0,0 +1,41 @@ +//===-- HexagonTargetAsmInfo.h - Hexagon asm properties --------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef HexagonTARGETOBJECTFILE_H +#define HexagonTARGETOBJECTFILE_H + +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/MC/MCSectionELF.h" + +namespace llvm { + + class HexagonTargetObjectFile : public TargetLoweringObjectFileELF { + const MCSectionELF *SmallDataSection; + const MCSectionELF *SmallBSSSection; + public: + virtual void Initialize(MCContext &Ctx, const TargetMachine &TM); + + /// IsGlobalInSmallSection - Return true if this global address should be + /// placed into small data/bss section. + bool IsGlobalInSmallSection(const GlobalValue *GV, + const TargetMachine &TM, + SectionKind Kind) const; + bool IsGlobalInSmallSection(const GlobalValue *GV, + const TargetMachine &TM) const; + + bool IsSmallDataEnabled () const; + const MCSection* SelectSectionForGlobal(const GlobalValue *GV, + SectionKind Kind, + Mangler *Mang, + const TargetMachine &TM) const; + }; + +} // namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp new file mode 100644 index 000000000000..41e382dc072a --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -0,0 +1,1427 @@ +//===----- HexagonPacketizer.cpp - vliw packetizer ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements a simple VLIW packetizer using DFA. The packetizer works on +// machine basic blocks. For each instruction I in BB, the packetizer consults +// the DFA to see if machine resources are available to execute I. If so, the +// packetizer checks if I depends on any instruction J in the current packet. +// If no dependency is found, I is added to current packet and machine resource +// is marked as taken. If any dependency is found, a target API call is made to +// prune the dependence. +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "packets" +#include "llvm/CodeGen/DFAPacketizer.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/CodeGen/LatencyPriorityQueue.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "Hexagon.h" +#include "HexagonTargetMachine.h" +#include "HexagonRegisterInfo.h" +#include "HexagonSubtarget.h" +#include "HexagonMachineFunctionInfo.h" + +#include <map> +#include <vector> + +using namespace llvm; + +static cl::opt<bool> PacketizeVolatiles("hexagon-packetize-volatiles", + cl::ZeroOrMore, cl::Hidden, cl::init(true), + cl::desc("Allow non-solo packetization of volatile memory references")); + +namespace llvm { + void initializeHexagonPacketizerPass(PassRegistry&); +} + + +namespace { + class HexagonPacketizer : public MachineFunctionPass { + + public: + static char ID; + HexagonPacketizer() : MachineFunctionPass(ID) { + initializeHexagonPacketizerPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<MachineDominatorTree>(); + AU.addRequired<MachineBranchProbabilityInfo>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + const char *getPassName() const { + return "Hexagon Packetizer"; + } + + bool runOnMachineFunction(MachineFunction &Fn); + }; + char HexagonPacketizer::ID = 0; + + class HexagonPacketizerList : public VLIWPacketizerList { + + private: + + // Has the instruction been promoted to a dot-new instruction. + bool PromotedToDotNew; + + // Has the instruction been glued to allocframe. + bool GlueAllocframeStore; + + // Has the feeder instruction been glued to new value jump. + bool GlueToNewValueJump; + + // Check if there is a dependence between some instruction already in this + // packet and this instruction. + bool Dependence; + + // Only check for dependence if there are resources available to + // schedule this instruction. + bool FoundSequentialDependence; + + /// \brief A handle to the branch probability pass. + const MachineBranchProbabilityInfo *MBPI; + + // Track MIs with ignored dependece. + std::vector<MachineInstr*> IgnoreDepMIs; + + public: + // Ctor. + HexagonPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI, + MachineDominatorTree &MDT, + const MachineBranchProbabilityInfo *MBPI); + + // initPacketizerState - initialize some internal flags. + void initPacketizerState(); + + // ignorePseudoInstruction - Ignore bundling of pseudo instructions. + bool ignorePseudoInstruction(MachineInstr *MI, MachineBasicBlock *MBB); + + // isSoloInstruction - return true if instruction MI can not be packetized + // with any other instruction, which means that MI itself is a packet. + bool isSoloInstruction(MachineInstr *MI); + + // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ + // together. + bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ); + + // isLegalToPruneDependencies - Is it legal to prune dependece between SUI + // and SUJ. + bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ); + + MachineBasicBlock::iterator addToPacket(MachineInstr *MI); + private: + bool IsCallDependent(MachineInstr* MI, SDep::Kind DepType, unsigned DepReg); + bool PromoteToDotNew(MachineInstr* MI, SDep::Kind DepType, + MachineBasicBlock::iterator &MII, + const TargetRegisterClass* RC); + bool CanPromoteToDotNew(MachineInstr* MI, SUnit* PacketSU, + unsigned DepReg, + std::map <MachineInstr*, SUnit*> MIToSUnit, + MachineBasicBlock::iterator &MII, + const TargetRegisterClass* RC); + bool CanPromoteToNewValue(MachineInstr* MI, SUnit* PacketSU, + unsigned DepReg, + std::map <MachineInstr*, SUnit*> MIToSUnit, + MachineBasicBlock::iterator &MII); + bool CanPromoteToNewValueStore(MachineInstr* MI, MachineInstr* PacketMI, + unsigned DepReg, + std::map <MachineInstr*, SUnit*> MIToSUnit); + bool DemoteToDotOld(MachineInstr* MI); + bool ArePredicatesComplements(MachineInstr* MI1, MachineInstr* MI2, + std::map <MachineInstr*, SUnit*> MIToSUnit); + bool RestrictingDepExistInPacket(MachineInstr*, + unsigned, std::map <MachineInstr*, SUnit*>); + bool isNewifiable(MachineInstr* MI); + bool isCondInst(MachineInstr* MI); + bool tryAllocateResourcesForConstExt(MachineInstr* MI); + bool canReserveResourcesForConstExt(MachineInstr *MI); + void reserveResourcesForConstExt(MachineInstr* MI); + bool isNewValueInst(MachineInstr* MI); + }; +} + +INITIALIZE_PASS_BEGIN(HexagonPacketizer, "packets", "Hexagon Packetizer", + false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(HexagonPacketizer, "packets", "Hexagon Packetizer", + false, false) + + +// HexagonPacketizerList Ctor. +HexagonPacketizerList::HexagonPacketizerList( + MachineFunction &MF, MachineLoopInfo &MLI,MachineDominatorTree &MDT, + const MachineBranchProbabilityInfo *MBPI) + : VLIWPacketizerList(MF, MLI, MDT, true){ + this->MBPI = MBPI; +} + +bool HexagonPacketizer::runOnMachineFunction(MachineFunction &Fn) { + const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo(); + MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); + MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); + const MachineBranchProbabilityInfo *MBPI = + &getAnalysis<MachineBranchProbabilityInfo>(); + // Instantiate the packetizer. + HexagonPacketizerList Packetizer(Fn, MLI, MDT, MBPI); + + // DFA state table should not be empty. + assert(Packetizer.getResourceTracker() && "Empty DFA table!"); + + // + // Loop over all basic blocks and remove KILL pseudo-instructions + // These instructions confuse the dependence analysis. Consider: + // D0 = ... (Insn 0) + // R0 = KILL R0, D0 (Insn 1) + // R0 = ... (Insn 2) + // Here, Insn 1 will result in the dependence graph not emitting an output + // dependence between Insn 0 and Insn 2. This can lead to incorrect + // packetization + // + for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); + MBB != MBBe; ++MBB) { + MachineBasicBlock::iterator End = MBB->end(); + MachineBasicBlock::iterator MI = MBB->begin(); + while (MI != End) { + if (MI->isKill()) { + MachineBasicBlock::iterator DeleteMI = MI; + ++MI; + MBB->erase(DeleteMI); + End = MBB->end(); + continue; + } + ++MI; + } + } + + // Loop over all of the basic blocks. + for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); + MBB != MBBe; ++MBB) { + // Find scheduling regions and schedule / packetize each region. + unsigned RemainingCount = MBB->size(); + for(MachineBasicBlock::iterator RegionEnd = MBB->end(); + RegionEnd != MBB->begin();) { + // The next region starts above the previous region. Look backward in the + // instruction stream until we find the nearest boundary. + MachineBasicBlock::iterator I = RegionEnd; + for(;I != MBB->begin(); --I, --RemainingCount) { + if (TII->isSchedulingBoundary(llvm::prior(I), MBB, Fn)) + break; + } + I = MBB->begin(); + + // Skip empty scheduling regions. + if (I == RegionEnd) { + RegionEnd = llvm::prior(RegionEnd); + --RemainingCount; + continue; + } + // Skip regions with one instruction. + if (I == llvm::prior(RegionEnd)) { + RegionEnd = llvm::prior(RegionEnd); + continue; + } + + Packetizer.PacketizeMIs(MBB, I, RegionEnd); + RegionEnd = I; + } + } + + return true; +} + + +static bool IsIndirectCall(MachineInstr* MI) { + return ((MI->getOpcode() == Hexagon::CALLR) || + (MI->getOpcode() == Hexagon::CALLRv3)); +} + +// Reserve resources for constant extender. Trigure an assertion if +// reservation fail. +void HexagonPacketizerList::reserveResourcesForConstExt(MachineInstr* MI) { + const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; + MachineFunction *MF = MI->getParent()->getParent(); + MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::IMMEXT_i), + MI->getDebugLoc()); + + if (ResourceTracker->canReserveResources(PseudoMI)) { + ResourceTracker->reserveResources(PseudoMI); + MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI); + } else { + MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI); + llvm_unreachable("can not reserve resources for constant extender."); + } + return; +} + +bool HexagonPacketizerList::canReserveResourcesForConstExt(MachineInstr *MI) { + const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; + assert((QII->isExtended(MI) || QII->isConstExtended(MI)) && + "Should only be called for constant extended instructions"); + MachineFunction *MF = MI->getParent()->getParent(); + MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::IMMEXT_i), + MI->getDebugLoc()); + bool CanReserve = ResourceTracker->canReserveResources(PseudoMI); + MF->DeleteMachineInstr(PseudoMI); + return CanReserve; +} + +// Allocate resources (i.e. 4 bytes) for constant extender. If succeed, return +// true, otherwise, return false. +bool HexagonPacketizerList::tryAllocateResourcesForConstExt(MachineInstr* MI) { + const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; + MachineFunction *MF = MI->getParent()->getParent(); + MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::IMMEXT_i), + MI->getDebugLoc()); + + if (ResourceTracker->canReserveResources(PseudoMI)) { + ResourceTracker->reserveResources(PseudoMI); + MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI); + return true; + } else { + MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI); + return false; + } +} + + +bool HexagonPacketizerList::IsCallDependent(MachineInstr* MI, + SDep::Kind DepType, + unsigned DepReg) { + + const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; + const HexagonRegisterInfo* QRI = + (const HexagonRegisterInfo *) TM.getRegisterInfo(); + + // Check for lr dependence + if (DepReg == QRI->getRARegister()) { + return true; + } + + if (QII->isDeallocRet(MI)) { + if (DepReg == QRI->getFrameRegister() || + DepReg == QRI->getStackRegister()) + return true; + } + + // Check if this is a predicate dependence + const TargetRegisterClass* RC = QRI->getMinimalPhysRegClass(DepReg); + if (RC == &Hexagon::PredRegsRegClass) { + return true; + } + + // + // Lastly check for an operand used in an indirect call + // If we had an attribute for checking if an instruction is an indirect call, + // then we could have avoided this relatively brittle implementation of + // IsIndirectCall() + // + // Assumes that the first operand of the CALLr is the function address + // + if (IsIndirectCall(MI) && (DepType == SDep::Data)) { + MachineOperand MO = MI->getOperand(0); + if (MO.isReg() && MO.isUse() && (MO.getReg() == DepReg)) { + return true; + } + } + + return false; +} + +static bool IsRegDependence(const SDep::Kind DepType) { + return (DepType == SDep::Data || DepType == SDep::Anti || + DepType == SDep::Output); +} + +static bool IsDirectJump(MachineInstr* MI) { + return (MI->getOpcode() == Hexagon::JMP); +} + +static bool IsSchedBarrier(MachineInstr* MI) { + switch (MI->getOpcode()) { + case Hexagon::BARRIER: + return true; + } + return false; +} + +static bool IsControlFlow(MachineInstr* MI) { + return (MI->getDesc().isTerminator() || MI->getDesc().isCall()); +} + +static bool IsLoopN(MachineInstr *MI) { + return (MI->getOpcode() == Hexagon::LOOP0_i || + MI->getOpcode() == Hexagon::LOOP0_r); +} + +/// DoesModifyCalleeSavedReg - Returns true if the instruction modifies a +/// callee-saved register. +static bool DoesModifyCalleeSavedReg(MachineInstr *MI, + const TargetRegisterInfo *TRI) { + for (const uint16_t *CSR = TRI->getCalleeSavedRegs(); *CSR; ++CSR) { + unsigned CalleeSavedReg = *CSR; + if (MI->modifiesRegister(CalleeSavedReg, TRI)) + return true; + } + return false; +} + +// Returns true if an instruction can be promoted to .new predicate +// or new-value store. +bool HexagonPacketizerList::isNewifiable(MachineInstr* MI) { + const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; + if ( isCondInst(MI) || QII->mayBeNewStore(MI)) + return true; + else + return false; +} + +bool HexagonPacketizerList::isCondInst (MachineInstr* MI) { + const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; + const MCInstrDesc& TID = MI->getDesc(); + // bug 5670: until that is fixed, + // this portion is disabled. + if ( TID.isConditionalBranch() // && !IsRegisterJump(MI)) || + || QII->isConditionalTransfer(MI) + || QII->isConditionalALU32(MI) + || QII->isConditionalLoad(MI) + || QII->isConditionalStore(MI)) { + return true; + } + return false; +} + + +// Promote an instructiont to its .new form. +// At this time, we have already made a call to CanPromoteToDotNew +// and made sure that it can *indeed* be promoted. +bool HexagonPacketizerList::PromoteToDotNew(MachineInstr* MI, + SDep::Kind DepType, MachineBasicBlock::iterator &MII, + const TargetRegisterClass* RC) { + + assert (DepType == SDep::Data); + const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; + + int NewOpcode; + if (RC == &Hexagon::PredRegsRegClass) + NewOpcode = QII->GetDotNewPredOp(MI, MBPI); + else + NewOpcode = QII->GetDotNewOp(MI); + MI->setDesc(QII->get(NewOpcode)); + + return true; +} + +bool HexagonPacketizerList::DemoteToDotOld(MachineInstr* MI) { + const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; + int NewOpcode = QII->GetDotOldOp(MI->getOpcode()); + MI->setDesc(QII->get(NewOpcode)); + return true; +} + +enum PredicateKind { + PK_False, + PK_True, + PK_Unknown +}; + +/// Returns true if an instruction is predicated on p0 and false if it's +/// predicated on !p0. +static PredicateKind getPredicateSense(MachineInstr* MI, + const HexagonInstrInfo *QII) { + if (!QII->isPredicated(MI)) + return PK_Unknown; + + if (QII->isPredicatedTrue(MI)) + return PK_True; + + return PK_False; +} + +static MachineOperand& GetPostIncrementOperand(MachineInstr *MI, + const HexagonInstrInfo *QII) { + assert(QII->isPostIncrement(MI) && "Not a post increment operation."); +#ifndef NDEBUG + // Post Increment means duplicates. Use dense map to find duplicates in the + // list. Caution: Densemap initializes with the minimum of 64 buckets, + // whereas there are at most 5 operands in the post increment. + DenseMap<unsigned, unsigned> DefRegsSet; + for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) + if (MI->getOperand(opNum).isReg() && + MI->getOperand(opNum).isDef()) { + DefRegsSet[MI->getOperand(opNum).getReg()] = 1; + } + + for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) + if (MI->getOperand(opNum).isReg() && + MI->getOperand(opNum).isUse()) { + if (DefRegsSet[MI->getOperand(opNum).getReg()]) { + return MI->getOperand(opNum); + } + } +#else + if (MI->getDesc().mayLoad()) { + // The 2nd operand is always the post increment operand in load. + assert(MI->getOperand(1).isReg() && + "Post increment operand has be to a register."); + return (MI->getOperand(1)); + } + if (MI->getDesc().mayStore()) { + // The 1st operand is always the post increment operand in store. + assert(MI->getOperand(0).isReg() && + "Post increment operand has be to a register."); + return (MI->getOperand(0)); + } +#endif + // we should never come here. + llvm_unreachable("mayLoad or mayStore not set for Post Increment operation"); +} + +// get the value being stored +static MachineOperand& GetStoreValueOperand(MachineInstr *MI) { + // value being stored is always the last operand. + return (MI->getOperand(MI->getNumOperands()-1)); +} + +// can be new value store? +// Following restrictions are to be respected in convert a store into +// a new value store. +// 1. If an instruction uses auto-increment, its address register cannot +// be a new-value register. Arch Spec 5.4.2.1 +// 2. If an instruction uses absolute-set addressing mode, +// its address register cannot be a new-value register. +// Arch Spec 5.4.2.1.TODO: This is not enabled as +// as absolute-set address mode patters are not implemented. +// 3. If an instruction produces a 64-bit result, its registers cannot be used +// as new-value registers. Arch Spec 5.4.2.2. +// 4. If the instruction that sets a new-value register is conditional, then +// the instruction that uses the new-value register must also be conditional, +// and both must always have their predicates evaluate identically. +// Arch Spec 5.4.2.3. +// 5. There is an implied restriction of a packet can not have another store, +// if there is a new value store in the packet. Corollary, if there is +// already a store in a packet, there can not be a new value store. +// Arch Spec: 3.4.4.2 +bool HexagonPacketizerList::CanPromoteToNewValueStore( MachineInstr *MI, + MachineInstr *PacketMI, unsigned DepReg, + std::map <MachineInstr*, SUnit*> MIToSUnit) { + const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; + // Make sure we are looking at the store, that can be promoted. + if (!QII->mayBeNewStore(MI)) + return false; + + // Make sure there is dependency and can be new value'ed + if (GetStoreValueOperand(MI).isReg() && + GetStoreValueOperand(MI).getReg() != DepReg) + return false; + + const HexagonRegisterInfo* QRI = + (const HexagonRegisterInfo *) TM.getRegisterInfo(); + const MCInstrDesc& MCID = PacketMI->getDesc(); + // first operand is always the result + + const TargetRegisterClass* PacketRC = QII->getRegClass(MCID, 0, QRI, MF); + + // if there is already an store in the packet, no can do new value store + // Arch Spec 3.4.4.2. + for (std::vector<MachineInstr*>::iterator VI = CurrentPacketMIs.begin(), + VE = CurrentPacketMIs.end(); + (VI != VE); ++VI) { + SUnit* PacketSU = MIToSUnit[*VI]; + if (PacketSU->getInstr()->getDesc().mayStore() || + // if we have mayStore = 1 set on ALLOCFRAME and DEALLOCFRAME, + // then we don't need this + PacketSU->getInstr()->getOpcode() == Hexagon::ALLOCFRAME || + PacketSU->getInstr()->getOpcode() == Hexagon::DEALLOCFRAME) + return false; + } + + if (PacketRC == &Hexagon::DoubleRegsRegClass) { + // new value store constraint: double regs can not feed into new value store + // arch spec section: 5.4.2.2 + return false; + } + + // Make sure it's NOT the post increment register that we are going to + // new value. + if (QII->isPostIncrement(MI) && + MI->getDesc().mayStore() && + GetPostIncrementOperand(MI, QII).getReg() == DepReg) { + return false; + } + + if (QII->isPostIncrement(PacketMI) && + PacketMI->getDesc().mayLoad() && + GetPostIncrementOperand(PacketMI, QII).getReg() == DepReg) { + // if source is post_inc, or absolute-set addressing, + // it can not feed into new value store + // r3 = memw(r2++#4) + // memw(r30 + #-1404) = r2.new -> can not be new value store + // arch spec section: 5.4.2.1 + return false; + } + + // If the source that feeds the store is predicated, new value store must + // also be predicated. + if (QII->isPredicated(PacketMI)) { + if (!QII->isPredicated(MI)) + return false; + + // Check to make sure that they both will have their predicates + // evaluate identically + unsigned predRegNumSrc = 0; + unsigned predRegNumDst = 0; + const TargetRegisterClass* predRegClass = NULL; + + // Get predicate register used in the source instruction + for(unsigned opNum = 0; opNum < PacketMI->getNumOperands(); opNum++) { + if ( PacketMI->getOperand(opNum).isReg()) + predRegNumSrc = PacketMI->getOperand(opNum).getReg(); + predRegClass = QRI->getMinimalPhysRegClass(predRegNumSrc); + if (predRegClass == &Hexagon::PredRegsRegClass) { + break; + } + } + assert ((predRegClass == &Hexagon::PredRegsRegClass ) && + ("predicate register not found in a predicated PacketMI instruction")); + + // Get predicate register used in new-value store instruction + for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) { + if ( MI->getOperand(opNum).isReg()) + predRegNumDst = MI->getOperand(opNum).getReg(); + predRegClass = QRI->getMinimalPhysRegClass(predRegNumDst); + if (predRegClass == &Hexagon::PredRegsRegClass) { + break; + } + } + assert ((predRegClass == &Hexagon::PredRegsRegClass ) && + ("predicate register not found in a predicated MI instruction")); + + // New-value register producer and user (store) need to satisfy these + // constraints: + // 1) Both instructions should be predicated on the same register. + // 2) If producer of the new-value register is .new predicated then store + // should also be .new predicated and if producer is not .new predicated + // then store should not be .new predicated. + // 3) Both new-value register producer and user should have same predicate + // sense, i.e, either both should be negated or both should be none negated. + + if (( predRegNumDst != predRegNumSrc) || + QII->isDotNewInst(PacketMI) != QII->isDotNewInst(MI) || + getPredicateSense(MI, QII) != getPredicateSense(PacketMI, QII)) { + return false; + } + } + + // Make sure that other than the new-value register no other store instruction + // register has been modified in the same packet. Predicate registers can be + // modified by they should not be modified between the producer and the store + // instruction as it will make them both conditional on different values. + // We already know this to be true for all the instructions before and + // including PacketMI. Howerver, we need to perform the check for the + // remaining instructions in the packet. + + std::vector<MachineInstr*>::iterator VI; + std::vector<MachineInstr*>::iterator VE; + unsigned StartCheck = 0; + + for (VI=CurrentPacketMIs.begin(), VE = CurrentPacketMIs.end(); + (VI != VE); ++VI) { + SUnit* TempSU = MIToSUnit[*VI]; + MachineInstr* TempMI = TempSU->getInstr(); + + // Following condition is true for all the instructions until PacketMI is + // reached (StartCheck is set to 0 before the for loop). + // StartCheck flag is 1 for all the instructions after PacketMI. + if (TempMI != PacketMI && !StartCheck) // start processing only after + continue; // encountering PacketMI + + StartCheck = 1; + if (TempMI == PacketMI) // We don't want to check PacketMI for dependence + continue; + + for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) { + if (MI->getOperand(opNum).isReg() && + TempSU->getInstr()->modifiesRegister(MI->getOperand(opNum).getReg(), + QRI)) + return false; + } + } + + // Make sure that for non POST_INC stores: + // 1. The only use of reg is DepReg and no other registers. + // This handles V4 base+index registers. + // The following store can not be dot new. + // Eg. r0 = add(r0, #3)a + // memw(r1+r0<<#2) = r0 + if (!QII->isPostIncrement(MI) && + GetStoreValueOperand(MI).isReg() && + GetStoreValueOperand(MI).getReg() == DepReg) { + for(unsigned opNum = 0; opNum < MI->getNumOperands()-1; opNum++) { + if (MI->getOperand(opNum).isReg() && + MI->getOperand(opNum).getReg() == DepReg) { + return false; + } + } + // 2. If data definition is because of implicit definition of the register, + // do not newify the store. Eg. + // %R9<def> = ZXTH %R12, %D6<imp-use>, %R12<imp-def> + // STrih_indexed %R8, 2, %R12<kill>; mem:ST2[%scevgep343] + for(unsigned opNum = 0; opNum < PacketMI->getNumOperands(); opNum++) { + if (PacketMI->getOperand(opNum).isReg() && + PacketMI->getOperand(opNum).getReg() == DepReg && + PacketMI->getOperand(opNum).isDef() && + PacketMI->getOperand(opNum).isImplicit()) { + return false; + } + } + } + + // Can be dot new store. + return true; +} + +// can this MI to promoted to either +// new value store or new value jump +bool HexagonPacketizerList::CanPromoteToNewValue( MachineInstr *MI, + SUnit *PacketSU, unsigned DepReg, + std::map <MachineInstr*, SUnit*> MIToSUnit, + MachineBasicBlock::iterator &MII) +{ + + const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; + const HexagonRegisterInfo* QRI = + (const HexagonRegisterInfo *) TM.getRegisterInfo(); + if (!QRI->Subtarget.hasV4TOps() || + !QII->mayBeNewStore(MI)) + return false; + + MachineInstr *PacketMI = PacketSU->getInstr(); + + // Check to see the store can be new value'ed. + if (CanPromoteToNewValueStore(MI, PacketMI, DepReg, MIToSUnit)) + return true; + + // Check to see the compare/jump can be new value'ed. + // This is done as a pass on its own. Don't need to check it here. + return false; +} + +// Check to see if an instruction can be dot new +// There are three kinds. +// 1. dot new on predicate - V2/V3/V4 +// 2. dot new on stores NV/ST - V4 +// 3. dot new on jump NV/J - V4 -- This is generated in a pass. +bool HexagonPacketizerList::CanPromoteToDotNew( MachineInstr *MI, + SUnit *PacketSU, unsigned DepReg, + std::map <MachineInstr*, SUnit*> MIToSUnit, + MachineBasicBlock::iterator &MII, + const TargetRegisterClass* RC ) +{ + const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; + // Already a dot new instruction. + if (QII->isDotNewInst(MI) && !QII->mayBeNewStore(MI)) + return false; + + if (!isNewifiable(MI)) + return false; + + // predicate .new + if (RC == &Hexagon::PredRegsRegClass && isCondInst(MI)) + return true; + else if (RC != &Hexagon::PredRegsRegClass && + !QII->mayBeNewStore(MI)) // MI is not a new-value store + return false; + else { + // Create a dot new machine instruction to see if resources can be + // allocated. If not, bail out now. + int NewOpcode = QII->GetDotNewOp(MI); + const MCInstrDesc &desc = QII->get(NewOpcode); + DebugLoc dl; + MachineInstr *NewMI = + MI->getParent()->getParent()->CreateMachineInstr(desc, dl); + bool ResourcesAvailable = ResourceTracker->canReserveResources(NewMI); + MI->getParent()->getParent()->DeleteMachineInstr(NewMI); + + if (!ResourcesAvailable) + return false; + + // new value store only + // new new value jump generated as a passes + if (!CanPromoteToNewValue(MI, PacketSU, DepReg, MIToSUnit, MII)) { + return false; + } + } + return true; +} + +// Go through the packet instructions and search for anti dependency +// between them and DepReg from MI +// Consider this case: +// Trying to add +// a) %R1<def> = TFRI_cdNotPt %P3, 2 +// to this packet: +// { +// b) %P0<def> = OR_pp %P3<kill>, %P0<kill> +// c) %P3<def> = TFR_PdRs %R23 +// d) %R1<def> = TFRI_cdnPt %P3, 4 +// } +// The P3 from a) and d) will be complements after +// a)'s P3 is converted to .new form +// Anti Dep between c) and b) is irrelevant for this case +bool HexagonPacketizerList::RestrictingDepExistInPacket (MachineInstr* MI, + unsigned DepReg, + std::map <MachineInstr*, SUnit*> MIToSUnit) { + + const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; + SUnit* PacketSUDep = MIToSUnit[MI]; + + for (std::vector<MachineInstr*>::iterator VIN = CurrentPacketMIs.begin(), + VEN = CurrentPacketMIs.end(); (VIN != VEN); ++VIN) { + + // We only care for dependencies to predicated instructions + if(!QII->isPredicated(*VIN)) continue; + + // Scheduling Unit for current insn in the packet + SUnit* PacketSU = MIToSUnit[*VIN]; + + // Look at dependencies between current members of the packet + // and predicate defining instruction MI. + // Make sure that dependency is on the exact register + // we care about. + if (PacketSU->isSucc(PacketSUDep)) { + for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) { + if ((PacketSU->Succs[i].getSUnit() == PacketSUDep) && + (PacketSU->Succs[i].getKind() == SDep::Anti) && + (PacketSU->Succs[i].getReg() == DepReg)) { + return true; + } + } + } + } + + return false; +} + + +/// Gets the predicate register of a predicated instruction. +static unsigned getPredicatedRegister(MachineInstr *MI, + const HexagonInstrInfo *QII) { + /// We use the following rule: The first predicate register that is a use is + /// the predicate register of a predicated instruction. + + assert(QII->isPredicated(MI) && "Must be predicated instruction"); + + for (MachineInstr::mop_iterator OI = MI->operands_begin(), + OE = MI->operands_end(); OI != OE; ++OI) { + MachineOperand &Op = *OI; + if (Op.isReg() && Op.getReg() && Op.isUse() && + Hexagon::PredRegsRegClass.contains(Op.getReg())) + return Op.getReg(); + } + + llvm_unreachable("Unknown instruction operand layout"); + + return 0; +} + +// Given two predicated instructions, this function detects whether +// the predicates are complements +bool HexagonPacketizerList::ArePredicatesComplements (MachineInstr* MI1, + MachineInstr* MI2, std::map <MachineInstr*, SUnit*> MIToSUnit) { + + const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; + + // If we don't know the predicate sense of the instructions bail out early, we + // need it later. + if (getPredicateSense(MI1, QII) == PK_Unknown || + getPredicateSense(MI2, QII) == PK_Unknown) + return false; + + // Scheduling unit for candidate + SUnit* SU = MIToSUnit[MI1]; + + // One corner case deals with the following scenario: + // Trying to add + // a) %R24<def> = TFR_cPt %P0, %R25 + // to this packet: + // + // { + // b) %R25<def> = TFR_cNotPt %P0, %R24 + // c) %P0<def> = CMPEQri %R26, 1 + // } + // + // On general check a) and b) are complements, but + // presence of c) will convert a) to .new form, and + // then it is not a complement + // We attempt to detect it by analyzing existing + // dependencies in the packet + + // Analyze relationships between all existing members of the packet. + // Look for Anti dependecy on the same predicate reg + // as used in the candidate + for (std::vector<MachineInstr*>::iterator VIN = CurrentPacketMIs.begin(), + VEN = CurrentPacketMIs.end(); (VIN != VEN); ++VIN) { + + // Scheduling Unit for current insn in the packet + SUnit* PacketSU = MIToSUnit[*VIN]; + + // If this instruction in the packet is succeeded by the candidate... + if (PacketSU->isSucc(SU)) { + for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) { + // The corner case exist when there is true data + // dependency between candidate and one of current + // packet members, this dep is on predicate reg, and + // there already exist anti dep on the same pred in + // the packet. + if (PacketSU->Succs[i].getSUnit() == SU && + PacketSU->Succs[i].getKind() == SDep::Data && + Hexagon::PredRegsRegClass.contains( + PacketSU->Succs[i].getReg()) && + // Here I know that *VIN is predicate setting instruction + // with true data dep to candidate on the register + // we care about - c) in the above example. + // Now I need to see if there is an anti dependency + // from c) to any other instruction in the + // same packet on the pred reg of interest + RestrictingDepExistInPacket(*VIN,PacketSU->Succs[i].getReg(), + MIToSUnit)) { + return false; + } + } + } + } + + // If the above case does not apply, check regular + // complement condition. + // Check that the predicate register is the same and + // that the predicate sense is different + // We also need to differentiate .old vs. .new: + // !p0 is not complimentary to p0.new + unsigned PReg1 = getPredicatedRegister(MI1, QII); + unsigned PReg2 = getPredicatedRegister(MI2, QII); + return ((PReg1 == PReg2) && + Hexagon::PredRegsRegClass.contains(PReg1) && + Hexagon::PredRegsRegClass.contains(PReg2) && + (getPredicateSense(MI1, QII) != getPredicateSense(MI2, QII)) && + (QII->isDotNewInst(MI1) == QII->isDotNewInst(MI2))); +} + +// initPacketizerState - Initialize packetizer flags +void HexagonPacketizerList::initPacketizerState() { + + Dependence = false; + PromotedToDotNew = false; + GlueToNewValueJump = false; + GlueAllocframeStore = false; + FoundSequentialDependence = false; + + return; +} + +// ignorePseudoInstruction - Ignore bundling of pseudo instructions. +bool HexagonPacketizerList::ignorePseudoInstruction(MachineInstr *MI, + MachineBasicBlock *MBB) { + if (MI->isDebugValue()) + return true; + + // We must print out inline assembly + if (MI->isInlineAsm()) + return false; + + // We check if MI has any functional units mapped to it. + // If it doesn't, we ignore the instruction. + const MCInstrDesc& TID = MI->getDesc(); + unsigned SchedClass = TID.getSchedClass(); + const InstrStage* IS = + ResourceTracker->getInstrItins()->beginStage(SchedClass); + unsigned FuncUnits = IS->getUnits(); + return !FuncUnits; +} + +// isSoloInstruction: - Returns true for instructions that must be +// scheduled in their own packet. +bool HexagonPacketizerList::isSoloInstruction(MachineInstr *MI) { + + if (MI->isInlineAsm()) + return true; + + if (MI->isEHLabel()) + return true; + + // From Hexagon V4 Programmer's Reference Manual 3.4.4 Grouping constraints: + // trap, pause, barrier, icinva, isync, and syncht are solo instructions. + // They must not be grouped with other instructions in a packet. + if (IsSchedBarrier(MI)) + return true; + + return false; +} + +// isLegalToPacketizeTogether: +// SUI is the current instruction that is out side of the current packet. +// SUJ is the current instruction inside the current packet against which that +// SUI will be packetized. +bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { + MachineInstr *I = SUI->getInstr(); + MachineInstr *J = SUJ->getInstr(); + assert(I && J && "Unable to packetize null instruction!"); + + const MCInstrDesc &MCIDI = I->getDesc(); + const MCInstrDesc &MCIDJ = J->getDesc(); + + MachineBasicBlock::iterator II = I; + + const unsigned FrameSize = MF.getFrameInfo()->getStackSize(); + const HexagonRegisterInfo* QRI = + (const HexagonRegisterInfo *) TM.getRegisterInfo(); + const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; + + // Inline asm cannot go in the packet. + if (I->getOpcode() == Hexagon::INLINEASM) + llvm_unreachable("Should not meet inline asm here!"); + + if (isSoloInstruction(I)) + llvm_unreachable("Should not meet solo instr here!"); + + // A save callee-save register function call can only be in a packet + // with instructions that don't write to the callee-save registers. + if ((QII->isSaveCalleeSavedRegsCall(I) && + DoesModifyCalleeSavedReg(J, QRI)) || + (QII->isSaveCalleeSavedRegsCall(J) && + DoesModifyCalleeSavedReg(I, QRI))) { + Dependence = true; + return false; + } + + // Two control flow instructions cannot go in the same packet. + if (IsControlFlow(I) && IsControlFlow(J)) { + Dependence = true; + return false; + } + + // A LoopN instruction cannot appear in the same packet as a jump or call. + if (IsLoopN(I) && + (IsDirectJump(J) || MCIDJ.isCall() || QII->isDeallocRet(J))) { + Dependence = true; + return false; + } + if (IsLoopN(J) && + (IsDirectJump(I) || MCIDI.isCall() || QII->isDeallocRet(I))) { + Dependence = true; + return false; + } + + // dealloc_return cannot appear in the same packet as a conditional or + // unconditional jump. + if (QII->isDeallocRet(I) && + (MCIDJ.isBranch() || MCIDJ.isCall() || MCIDJ.isBarrier())) { + Dependence = true; + return false; + } + + + // V4 allows dual store. But does not allow second store, if the + // first store is not in SLOT0. New value store, new value jump, + // dealloc_return and memop always take SLOT0. + // Arch spec 3.4.4.2 + if (QRI->Subtarget.hasV4TOps()) { + if (MCIDI.mayStore() && MCIDJ.mayStore() && + (QII->isNewValueInst(J) || QII->isMemOp(J) || QII->isMemOp(I))) { + Dependence = true; + return false; + } + + if ((QII->isMemOp(J) && MCIDI.mayStore()) + || (MCIDJ.mayStore() && QII->isMemOp(I)) + || (QII->isMemOp(J) && QII->isMemOp(I))) { + Dependence = true; + return false; + } + + //if dealloc_return + if (MCIDJ.mayStore() && QII->isDeallocRet(I)) { + Dependence = true; + return false; + } + + // If an instruction feeds new value jump, glue it. + MachineBasicBlock::iterator NextMII = I; + ++NextMII; + if (NextMII != I->getParent()->end() && QII->isNewValueJump(NextMII)) { + MachineInstr *NextMI = NextMII; + + bool secondRegMatch = false; + bool maintainNewValueJump = false; + + if (NextMI->getOperand(1).isReg() && + I->getOperand(0).getReg() == NextMI->getOperand(1).getReg()) { + secondRegMatch = true; + maintainNewValueJump = true; + } + + if (!secondRegMatch && + I->getOperand(0).getReg() == NextMI->getOperand(0).getReg()) { + maintainNewValueJump = true; + } + + for (std::vector<MachineInstr*>::iterator + VI = CurrentPacketMIs.begin(), + VE = CurrentPacketMIs.end(); + (VI != VE && maintainNewValueJump); ++VI) { + SUnit* PacketSU = MIToSUnit[*VI]; + + // NVJ can not be part of the dual jump - Arch Spec: section 7.8 + if (PacketSU->getInstr()->getDesc().isCall()) { + Dependence = true; + break; + } + // Validate + // 1. Packet does not have a store in it. + // 2. If the first operand of the nvj is newified, and the second + // operand is also a reg, it (second reg) is not defined in + // the same packet. + // 3. If the second operand of the nvj is newified, (which means + // first operand is also a reg), first reg is not defined in + // the same packet. + if (PacketSU->getInstr()->getDesc().mayStore() || + PacketSU->getInstr()->getOpcode() == Hexagon::ALLOCFRAME || + // Check #2. + (!secondRegMatch && NextMI->getOperand(1).isReg() && + PacketSU->getInstr()->modifiesRegister( + NextMI->getOperand(1).getReg(), QRI)) || + // Check #3. + (secondRegMatch && + PacketSU->getInstr()->modifiesRegister( + NextMI->getOperand(0).getReg(), QRI))) { + Dependence = true; + break; + } + } + if (!Dependence) + GlueToNewValueJump = true; + else + return false; + } + } + + if (SUJ->isSucc(SUI)) { + for (unsigned i = 0; + (i < SUJ->Succs.size()) && !FoundSequentialDependence; + ++i) { + + if (SUJ->Succs[i].getSUnit() != SUI) { + continue; + } + + SDep::Kind DepType = SUJ->Succs[i].getKind(); + + // For direct calls: + // Ignore register dependences for call instructions for + // packetization purposes except for those due to r31 and + // predicate registers. + // + // For indirect calls: + // Same as direct calls + check for true dependences to the register + // used in the indirect call. + // + // We completely ignore Order dependences for call instructions + // + // For returns: + // Ignore register dependences for return instructions like jumpr, + // dealloc return unless we have dependencies on the explicit uses + // of the registers used by jumpr (like r31) or dealloc return + // (like r29 or r30). + // + // TODO: Currently, jumpr is handling only return of r31. So, the + // following logic (specificaly IsCallDependent) is working fine. + // We need to enable jumpr for register other than r31 and then, + // we need to rework the last part, where it handles indirect call + // of that (IsCallDependent) function. Bug 6216 is opened for this. + // + unsigned DepReg = 0; + const TargetRegisterClass* RC = NULL; + if (DepType == SDep::Data) { + DepReg = SUJ->Succs[i].getReg(); + RC = QRI->getMinimalPhysRegClass(DepReg); + } + if ((MCIDI.isCall() || MCIDI.isReturn()) && + (!IsRegDependence(DepType) || + !IsCallDependent(I, DepType, SUJ->Succs[i].getReg()))) { + /* do nothing */ + } + + // For instructions that can be promoted to dot-new, try to promote. + else if ((DepType == SDep::Data) && + CanPromoteToDotNew(I, SUJ, DepReg, MIToSUnit, II, RC) && + PromoteToDotNew(I, DepType, II, RC)) { + PromotedToDotNew = true; + /* do nothing */ + } + + else if ((DepType == SDep::Data) && + (QII->isNewValueJump(I))) { + /* do nothing */ + } + + // For predicated instructions, if the predicates are complements + // then there can be no dependence. + else if (QII->isPredicated(I) && + QII->isPredicated(J) && + ArePredicatesComplements(I, J, MIToSUnit)) { + /* do nothing */ + + } + else if (IsDirectJump(I) && + !MCIDJ.isBranch() && + !MCIDJ.isCall() && + (DepType == SDep::Order)) { + // Ignore Order dependences between unconditional direct branches + // and non-control-flow instructions + /* do nothing */ + } + else if (MCIDI.isConditionalBranch() && (DepType != SDep::Data) && + (DepType != SDep::Output)) { + // Ignore all dependences for jumps except for true and output + // dependences + /* do nothing */ + } + + // Ignore output dependences due to superregs. We can + // write to two different subregisters of R1:0 for instance + // in the same cycle + // + + // + // Let the + // If neither I nor J defines DepReg, then this is a + // superfluous output dependence. The dependence must be of the + // form: + // R0 = ... + // R1 = ... + // and there is an output dependence between the two instructions + // with + // DepReg = D0 + // We want to ignore these dependences. + // Ideally, the dependence constructor should annotate such + // dependences. We can then avoid this relatively expensive check. + // + else if (DepType == SDep::Output) { + // DepReg is the register that's responsible for the dependence. + unsigned DepReg = SUJ->Succs[i].getReg(); + + // Check if I and J really defines DepReg. + if (I->definesRegister(DepReg) || + J->definesRegister(DepReg)) { + FoundSequentialDependence = true; + break; + } + } + + // We ignore Order dependences for + // 1. Two loads unless they are volatile. + // 2. Two stores in V4 unless they are volatile. + else if ((DepType == SDep::Order) && + !I->hasOrderedMemoryRef() && + !J->hasOrderedMemoryRef()) { + if (QRI->Subtarget.hasV4TOps() && + // hexagonv4 allows dual store. + MCIDI.mayStore() && MCIDJ.mayStore()) { + /* do nothing */ + } + // store followed by store-- not OK on V2 + // store followed by load -- not OK on all (OK if addresses + // are not aliased) + // load followed by store -- OK on all + // load followed by load -- OK on all + else if ( !MCIDJ.mayStore()) { + /* do nothing */ + } + else { + FoundSequentialDependence = true; + break; + } + } + + // For V4, special case ALLOCFRAME. Even though there is dependency + // between ALLOCAFRAME and subsequent store, allow it to be + // packetized in a same packet. This implies that the store is using + // caller's SP. Hense, offset needs to be updated accordingly. + else if (DepType == SDep::Data + && QRI->Subtarget.hasV4TOps() + && J->getOpcode() == Hexagon::ALLOCFRAME + && (I->getOpcode() == Hexagon::STrid + || I->getOpcode() == Hexagon::STriw + || I->getOpcode() == Hexagon::STrib) + && I->getOperand(0).getReg() == QRI->getStackRegister() + && QII->isValidOffset(I->getOpcode(), + I->getOperand(1).getImm() - + (FrameSize + HEXAGON_LRFP_SIZE))) + { + GlueAllocframeStore = true; + // Since this store is to be glued with allocframe in the same + // packet, it will use SP of the previous stack frame, i.e + // caller's SP. Therefore, we need to recalculate offset according + // to this change. + I->getOperand(1).setImm(I->getOperand(1).getImm() - + (FrameSize + HEXAGON_LRFP_SIZE)); + } + + // + // Skip over anti-dependences. Two instructions that are + // anti-dependent can share a packet + // + else if (DepType != SDep::Anti) { + FoundSequentialDependence = true; + break; + } + } + + if (FoundSequentialDependence) { + Dependence = true; + return false; + } + } + + return true; +} + +// isLegalToPruneDependencies +bool HexagonPacketizerList::isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) { + MachineInstr *I = SUI->getInstr(); + assert(I && SUJ->getInstr() && "Unable to packetize null instruction!"); + + const unsigned FrameSize = MF.getFrameInfo()->getStackSize(); + + if (Dependence) { + + // Check if the instruction was promoted to a dot-new. If so, demote it + // back into a dot-old. + if (PromotedToDotNew) { + DemoteToDotOld(I); + } + + // Check if the instruction (must be a store) was glued with an Allocframe + // instruction. If so, restore its offset to its original value, i.e. use + // curent SP instead of caller's SP. + if (GlueAllocframeStore) { + I->getOperand(1).setImm(I->getOperand(1).getImm() + + FrameSize + HEXAGON_LRFP_SIZE); + } + + return false; + } + return true; +} + +MachineBasicBlock::iterator +HexagonPacketizerList::addToPacket(MachineInstr *MI) { + + MachineBasicBlock::iterator MII = MI; + MachineBasicBlock *MBB = MI->getParent(); + + const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; + + if (GlueToNewValueJump) { + + ++MII; + MachineInstr *nvjMI = MII; + assert(ResourceTracker->canReserveResources(MI)); + ResourceTracker->reserveResources(MI); + if ((QII->isExtended(MI) || QII->isConstExtended(MI)) && + !tryAllocateResourcesForConstExt(MI)) { + endPacket(MBB, MI); + ResourceTracker->reserveResources(MI); + assert(canReserveResourcesForConstExt(MI) && + "Ensure that there is a slot"); + reserveResourcesForConstExt(MI); + // Reserve resources for new value jump constant extender. + assert(canReserveResourcesForConstExt(MI) && + "Ensure that there is a slot"); + reserveResourcesForConstExt(nvjMI); + assert(ResourceTracker->canReserveResources(nvjMI) && + "Ensure that there is a slot"); + + } else if ( // Extended instruction takes two slots in the packet. + // Try reserve and allocate 4-byte in the current packet first. + (QII->isExtended(nvjMI) + && (!tryAllocateResourcesForConstExt(nvjMI) + || !ResourceTracker->canReserveResources(nvjMI))) + || // For non-extended instruction, no need to allocate extra 4 bytes. + (!QII->isExtended(nvjMI) && + !ResourceTracker->canReserveResources(nvjMI))) + { + endPacket(MBB, MI); + // A new and empty packet starts. + // We are sure that the resources requirements can be satisfied. + // Therefore, do not need to call "canReserveResources" anymore. + ResourceTracker->reserveResources(MI); + if (QII->isExtended(nvjMI)) + reserveResourcesForConstExt(nvjMI); + } + // Here, we are sure that "reserveResources" would succeed. + ResourceTracker->reserveResources(nvjMI); + CurrentPacketMIs.push_back(MI); + CurrentPacketMIs.push_back(nvjMI); + } else { + if ( (QII->isExtended(MI) || QII->isConstExtended(MI)) + && ( !tryAllocateResourcesForConstExt(MI) + || !ResourceTracker->canReserveResources(MI))) + { + endPacket(MBB, MI); + // Check if the instruction was promoted to a dot-new. If so, demote it + // back into a dot-old + if (PromotedToDotNew) { + DemoteToDotOld(MI); + } + reserveResourcesForConstExt(MI); + } + // In case that "MI" is not an extended insn, + // the resource availability has already been checked. + ResourceTracker->reserveResources(MI); + CurrentPacketMIs.push_back(MI); + } + return MII; +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +FunctionPass *llvm::createHexagonPacketizer() { + return new HexagonPacketizer(); +} + diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonVarargsCallingConvention.h b/contrib/llvm/lib/Target/Hexagon/HexagonVarargsCallingConvention.h new file mode 100644 index 000000000000..c607b5d35649 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonVarargsCallingConvention.h @@ -0,0 +1,141 @@ +//===-- HexagonVarargsCallingConvention.h - Calling Conventions -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the functions that assign locations to outgoing function +// arguments. Adapted from the target independent version but this handles +// calls to varargs functions +// +//===----------------------------------------------------------------------===// +// + + + + +static bool RetCC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT, + EVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, + Hexagon_CCState &State, + int NonVarArgsParams, + int CurrentParam, + bool ForceMem); + + +static bool CC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT, + EVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, + Hexagon_CCState &State, + int NonVarArgsParams, + int CurrentParam, + bool ForceMem) { + unsigned ByValSize = 0; + if (ArgFlags.isByVal() && + ((ByValSize = ArgFlags.getByValSize()) > + (MVT(MVT::i64).getSizeInBits() / 8))) { + ForceMem = true; + } + + + // Only assign registers for named (non varargs) arguments + if ( !ForceMem && ((NonVarArgsParams == -1) || (CurrentParam <= + NonVarArgsParams))) { + + if (LocVT == MVT::i32 || + LocVT == MVT::i16 || + LocVT == MVT::i8 || + LocVT == MVT::f32) { + static const unsigned RegList1[] = { + Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4, + Hexagon::R5 + }; + if (unsigned Reg = State.AllocateReg(RegList1, 6)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg, + LocVT.getSimpleVT(), LocInfo)); + return false; + } + } + + if (LocVT == MVT::i64 || + LocVT == MVT::f64) { + static const unsigned RegList2[] = { + Hexagon::D0, Hexagon::D1, Hexagon::D2 + }; + if (unsigned Reg = State.AllocateReg(RegList2, 3)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg, + LocVT.getSimpleVT(), LocInfo)); + return false; + } + } + } + + const Type* ArgTy = LocVT.getTypeForEVT(State.getContext()); + unsigned Alignment = + State.getTarget().getDataLayout()->getABITypeAlignment(ArgTy); + unsigned Size = + State.getTarget().getDataLayout()->getTypeSizeInBits(ArgTy) / 8; + + // If it's passed by value, then we need the size of the aggregate not of + // the pointer. + if (ArgFlags.isByVal()) { + Size = ByValSize; + + // Hexagon_TODO: Get the alignment of the contained type here. + Alignment = 8; + } + + unsigned Offset3 = State.AllocateStack(Size, Alignment); + State.addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset3, + LocVT.getSimpleVT(), LocInfo)); + return false; +} + + +static bool RetCC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT, + EVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, + Hexagon_CCState &State, + int NonVarArgsParams, + int CurrentParam, + bool ForceMem) { + + if (LocVT == MVT::i32 || + LocVT == MVT::f32) { + static const unsigned RegList1[] = { + Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4, + Hexagon::R5 + }; + if (unsigned Reg = State.AllocateReg(RegList1, 6)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg, + LocVT.getSimpleVT(), LocInfo)); + return false; + } + } + + if (LocVT == MVT::i64 || + LocVT == MVT::f64) { + static const unsigned RegList2[] = { + Hexagon::D0, Hexagon::D1, Hexagon::D2 + }; + if (unsigned Reg = State.AllocateReg(RegList2, 3)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg, + LocVT.getSimpleVT(), LocInfo)); + return false; + } + } + + const Type* ArgTy = LocVT.getTypeForEVT(State.getContext()); + unsigned Alignment = + State.getTarget().getDataLayout()->getABITypeAlignment(ArgTy); + unsigned Size = + State.getTarget().getDataLayout()->getTypeSizeInBits(ArgTy) / 8; + + unsigned Offset3 = State.AllocateStack(Size, Alignment); + State.addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset3, + LocVT.getSimpleVT(), LocInfo)); + return false; +} diff --git a/contrib/llvm/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp b/contrib/llvm/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp new file mode 100644 index 000000000000..7c41507ede74 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp @@ -0,0 +1,203 @@ +//===- HexagonInstPrinter.cpp - Convert Hexagon MCInst to assembly syntax -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an Hexagon MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "HexagonAsmPrinter.h" +#include "Hexagon.h" +#include "HexagonInstPrinter.h" +#include "MCTargetDesc/HexagonMCInst.h" +#include "llvm/MC/MCInst.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define GET_INSTRUCTION_NAME +#include "HexagonGenAsmWriter.inc" + +const char HexagonInstPrinter::PacketPadding = '\t'; + +StringRef HexagonInstPrinter::getOpcodeName(unsigned Opcode) const { + return MII.getName(Opcode); +} + +StringRef HexagonInstPrinter::getRegName(unsigned RegNo) const { + return getRegisterName(RegNo); +} + +void HexagonInstPrinter::printInst(const MCInst *MI, raw_ostream &O, + StringRef Annot) { + printInst((const HexagonMCInst*)(MI), O, Annot); +} + +void HexagonInstPrinter::printInst(const HexagonMCInst *MI, raw_ostream &O, + StringRef Annot) { + const char startPacket = '{', + endPacket = '}'; + // TODO: add outer HW loop when it's supported too. + if (MI->getOpcode() == Hexagon::ENDLOOP0) { + // Ending a harware loop is different from ending an regular packet. + assert(MI->isPacketEnd() && "Loop-end must also end the packet"); + + if (MI->isPacketStart()) { + // There must be a packet to end a loop. + // FIXME: when shuffling is always run, this shouldn't be needed. + HexagonMCInst Nop; + StringRef NoAnnot; + + Nop.setOpcode (Hexagon::NOP); + Nop.setPacketStart (MI->isPacketStart()); + printInst (&Nop, O, NoAnnot); + } + + // Close the packet. + if (MI->isPacketEnd()) + O << PacketPadding << endPacket; + + printInstruction(MI, O); + } + else { + // Prefix the insn opening the packet. + if (MI->isPacketStart()) + O << PacketPadding << startPacket << '\n'; + + printInstruction(MI, O); + + // Suffix the insn closing the packet. + if (MI->isPacketEnd()) + // Suffix the packet in a new line always, since the GNU assembler has + // issues with a closing brace on the same line as CONST{32,64}. + O << '\n' << PacketPadding << endPacket; + } + + printAnnotation(O, Annot); +} + +void HexagonInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const { + const MCOperand& MO = MI->getOperand(OpNo); + + if (MO.isReg()) { + O << getRegisterName(MO.getReg()); + } else if(MO.isExpr()) { + O << *MO.getExpr(); + } else if(MO.isImm()) { + printImmOperand(MI, OpNo, O); + } else { + llvm_unreachable("Unknown operand"); + } +} + +void HexagonInstPrinter::printImmOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const { + const MCOperand& MO = MI->getOperand(OpNo); + + if(MO.isExpr()) { + O << *MO.getExpr(); + } else if(MO.isImm()) { + O << MI->getOperand(OpNo).getImm(); + } else { + llvm_unreachable("Unknown operand"); + } +} + +void HexagonInstPrinter::printExtOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const { + const HexagonMCInst *HMCI = static_cast<const HexagonMCInst*>(MI); + if (HMCI->isConstExtended()) + O << "#"; + printOperand(MI, OpNo, O); +} + +void HexagonInstPrinter::printUnsignedImmOperand(const MCInst *MI, + unsigned OpNo, raw_ostream &O) const { + O << MI->getOperand(OpNo).getImm(); +} + +void HexagonInstPrinter::printNegImmOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const { + O << -MI->getOperand(OpNo).getImm(); +} + +void HexagonInstPrinter::printNOneImmOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const { + O << -1; +} + +void HexagonInstPrinter::printMEMriOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const { + const MCOperand& MO0 = MI->getOperand(OpNo); + const MCOperand& MO1 = MI->getOperand(OpNo + 1); + + O << getRegisterName(MO0.getReg()); + O << " + #" << MO1.getImm(); +} + +void HexagonInstPrinter::printFrameIndexOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const { + const MCOperand& MO0 = MI->getOperand(OpNo); + const MCOperand& MO1 = MI->getOperand(OpNo + 1); + + O << getRegisterName(MO0.getReg()) << ", #" << MO1.getImm(); +} + +void HexagonInstPrinter::printGlobalOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const { + assert(MI->getOperand(OpNo).isExpr() && "Expecting expression"); + + printOperand(MI, OpNo, O); +} + +void HexagonInstPrinter::printJumpTable(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const { + assert(MI->getOperand(OpNo).isExpr() && "Expecting expression"); + + printOperand(MI, OpNo, O); +} + +void HexagonInstPrinter::printConstantPool(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const { + assert(MI->getOperand(OpNo).isExpr() && "Expecting expression"); + + printOperand(MI, OpNo, O); +} + +void HexagonInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const { + // Branches can take an immediate operand. This is used by the branch + // selection pass to print $+8, an eight byte displacement from the PC. + llvm_unreachable("Unknown branch operand."); +} + +void HexagonInstPrinter::printCallOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const { +} + +void HexagonInstPrinter::printAbsAddrOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const { +} + +void HexagonInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const { +} + +void HexagonInstPrinter::printSymbol(const MCInst *MI, unsigned OpNo, + raw_ostream &O, bool hi) const { + assert(MI->getOperand(OpNo).isImm() && "Unknown symbol operand"); + + O << '#' << (hi ? "HI" : "LO") << "(#"; + printOperand(MI, OpNo, O); + O << ')'; +} diff --git a/contrib/llvm/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h b/contrib/llvm/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h new file mode 100644 index 000000000000..d0cef683da95 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h @@ -0,0 +1,87 @@ +//===-- HexagonInstPrinter.h - Convert Hexagon MCInst to assembly syntax --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an Hexagon MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#ifndef HEXAGONINSTPRINTER_H +#define HEXAGONINSTPRINTER_H + +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrInfo.h" + +namespace llvm { + class HexagonMCInst; + + class HexagonInstPrinter : public MCInstPrinter { + public: + explicit HexagonInstPrinter(const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI), MII(MII) {} + + virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); + void printInst(const HexagonMCInst *MI, raw_ostream &O, StringRef Annot); + virtual StringRef getOpcodeName(unsigned Opcode) const; + void printInstruction(const MCInst *MI, raw_ostream &O); + StringRef getRegName(unsigned RegNo) const; + static const char *getRegisterName(unsigned RegNo); + + void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; + void printImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; + void printExtOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; + void printUnsignedImmOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const; + void printNegImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) + const; + void printNOneImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) + const; + void printMEMriOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) + const; + void printFrameIndexOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const; + void printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) + const; + void printCallOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) + const; + void printAbsAddrOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) + const; + void printPredicateOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) + const; + void printGlobalOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) + const; + void printJumpTable(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; + + void printConstantPool(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const; + + void printSymbolHi(const MCInst *MI, unsigned OpNo, raw_ostream &O) const + { printSymbol(MI, OpNo, O, true); } + void printSymbolLo(const MCInst *MI, unsigned OpNo, raw_ostream &O) const + { printSymbol(MI, OpNo, O, false); } + + const MCInstrInfo &getMII() const { + return MII; + } + + protected: + void printSymbol(const MCInst *MI, unsigned OpNo, raw_ostream &O, bool hi) + const; + + static const char PacketPadding; + + private: + const MCInstrInfo &MII; + + }; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h new file mode 100644 index 000000000000..8519cf314e6f --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h @@ -0,0 +1,184 @@ +//===-- HexagonBaseInfo.h - Top level definitions for Hexagon --*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains small standalone helper functions and enum definitions for +// the Hexagon target useful for the compiler back-end and the MC libraries. +// As such, it deliberately does not include references to LLVM core +// code gen types, passes, etc.. +// +//===----------------------------------------------------------------------===// + +#ifndef HEXAGONBASEINFO_H +#define HEXAGONBASEINFO_H + +#include "HexagonMCTargetDesc.h" +#include "llvm/Support/ErrorHandling.h" + +namespace llvm { + +/// HexagonII - This namespace holds all of the target specific flags that +/// instruction info tracks. +/// +namespace HexagonII { + // *** The code below must match HexagonInstrFormat*.td *** // + + // Insn types. + // *** Must match HexagonInstrFormat*.td *** + enum Type { + TypePSEUDO = 0, + TypeALU32 = 1, + TypeCR = 2, + TypeJR = 3, + TypeJ = 4, + TypeLD = 5, + TypeST = 6, + TypeSYSTEM = 7, + TypeXTYPE = 8, + TypeMEMOP = 9, + TypeNV = 10, + TypePREFIX = 30, // Such as extenders. + TypeENDLOOP = 31 // Such as end of a HW loop. + }; + + enum SubTarget { + HasV2SubT = 0xf, + HasV2SubTOnly = 0x1, + NoV2SubT = 0x0, + HasV3SubT = 0xe, + HasV3SubTOnly = 0x2, + NoV3SubT = 0x1, + HasV4SubT = 0xc, + NoV4SubT = 0x3, + HasV5SubT = 0x8, + NoV5SubT = 0x7 + }; + + enum AddrMode { + NoAddrMode = 0, // No addressing mode + Absolute = 1, // Absolute addressing mode + AbsoluteSet = 2, // Absolute set addressing mode + BaseImmOffset = 3, // Indirect with offset + BaseLongOffset = 4, // Indirect with long offset + BaseRegOffset = 5, // Indirect with register offset + PostInc = 6 // Post increment addressing mode + }; + + enum MemAccessSize { + NoMemAccess = 0, // Not a memory acces instruction. + ByteAccess = 1, // Byte access instruction (memb). + HalfWordAccess = 2, // Half word access instruction (memh). + WordAccess = 3, // Word access instruction (memw). + DoubleWordAccess = 4 // Double word access instruction (memd) + }; + + // MCInstrDesc TSFlags + // *** Must match HexagonInstrFormat*.td *** + enum { + // This 5-bit field describes the insn type. + TypePos = 0, + TypeMask = 0x1f, + + // Solo instructions. + SoloPos = 5, + SoloMask = 0x1, + + // Predicated instructions. + PredicatedPos = 6, + PredicatedMask = 0x1, + PredicatedFalsePos = 7, + PredicatedFalseMask = 0x1, + PredicatedNewPos = 8, + PredicatedNewMask = 0x1, + + // New-Value consumer instructions. + NewValuePos = 9, + NewValueMask = 0x1, + + // New-Value producer instructions. + hasNewValuePos = 10, + hasNewValueMask = 0x1, + + // Which operand consumes or produces a new value. + NewValueOpPos = 11, + NewValueOpMask = 0x7, + + // Which bits encode the new value. + NewValueBitsPos = 14, + NewValueBitsMask = 0x3, + + // Stores that can become new-value stores. + mayNVStorePos = 16, + mayNVStoreMask = 0x1, + + // New-value store instructions. + NVStorePos = 17, + NVStoreMask = 0x1, + + // Extendable insns. + ExtendablePos = 18, + ExtendableMask = 0x1, + + // Insns must be extended. + ExtendedPos = 19, + ExtendedMask = 0x1, + + // Which operand may be extended. + ExtendableOpPos = 20, + ExtendableOpMask = 0x7, + + // Signed or unsigned range. + ExtentSignedPos = 23, + ExtentSignedMask = 0x1, + + // Number of bits of range before extending operand. + ExtentBitsPos = 24, + ExtentBitsMask = 0x1f, + + // Valid subtargets + validSubTargetPos = 29, + validSubTargetMask = 0xf, + + // Addressing mode for load/store instructions. + AddrModePos = 33, + AddrModeMask = 0x7, + + // Access size of memory access instructions (load/store). + MemAccessSizePos = 36, + MemAccesSizeMask = 0x7 + }; + + // *** The code above must match HexagonInstrFormat*.td *** // + + // Hexagon specific MO operand flag mask. + enum HexagonMOTargetFlagVal { + //===------------------------------------------------------------------===// + // Hexagon Specific MachineOperand flags. + MO_NO_FLAG, + + HMOTF_ConstExtended = 1, + + /// MO_PCREL - On a symbol operand, indicates a PC-relative relocation + /// Used for computing a global address for PIC compilations + MO_PCREL, + + /// MO_GOT - Indicates a GOT-relative relocation + MO_GOT, + + // Low or high part of a symbol. + MO_LO16, MO_HI16, + + // Offset from the base of the SDA. + MO_GPREL + }; + +} // End namespace HexagonII. + +} // End namespace llvm. + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp new file mode 100644 index 000000000000..3f9415b94df9 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp @@ -0,0 +1,39 @@ +//===-- HexagonMCAsmInfo.cpp - Hexagon asm properties ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the HexagonMCAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "HexagonMCAsmInfo.h" + +using namespace llvm; + +// Pin the vtable to this file. +void HexagonMCAsmInfo::anchor() {} + +HexagonMCAsmInfo::HexagonMCAsmInfo(StringRef TT) { + Data16bitsDirective = "\t.half\t"; + Data32bitsDirective = "\t.word\t"; + Data64bitsDirective = 0; // .xword is only supported by V9. + ZeroDirective = "\t.skip\t"; + CommentString = "//"; + HasLEB128 = true; + + PrivateGlobalPrefix = ".L"; + LCOMMDirectiveAlignmentType = LCOMM::ByteAlignment; + InlineAsmStart = "# InlineAsm Start"; + InlineAsmEnd = "# InlineAsm End"; + ZeroDirective = "\t.space\t"; + AscizDirective = "\t.string\t"; + + SupportsDebugInformation = true; + UsesELFSectionDirectiveForBSS = true; + ExceptionsType = ExceptionHandling::DwarfCFI; +} diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h new file mode 100644 index 000000000000..bd8cb7637af7 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h @@ -0,0 +1,29 @@ +//===-- HexagonTargetAsmInfo.h - Hexagon asm properties --------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the HexagonMCAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef HexagonMCASMINFO_H +#define HexagonMCASMINFO_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCAsmInfoELF.h" + +namespace llvm { + class HexagonMCAsmInfo : public MCAsmInfoELF { + virtual void anchor(); + public: + explicit HexagonMCAsmInfo(StringRef TT); + }; + +} // namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp new file mode 100644 index 000000000000..9260b4a27661 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp @@ -0,0 +1,175 @@ +//===- HexagonMCInst.cpp - Hexagon sub-class of MCInst --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class extends MCInst to allow some Hexagon VLIW annotations. +// +//===----------------------------------------------------------------------===// + +#include "HexagonInstrInfo.h" +#include "MCTargetDesc/HexagonBaseInfo.h" +#include "MCTargetDesc/HexagonMCInst.h" +#include "MCTargetDesc/HexagonMCTargetDesc.h" + +using namespace llvm; + +// Return the slots used by the insn. +unsigned HexagonMCInst::getUnits(const HexagonTargetMachine* TM) const { + const HexagonInstrInfo* QII = TM->getInstrInfo(); + const InstrItineraryData* II = TM->getInstrItineraryData(); + const InstrStage* + IS = II->beginStage(QII->get(this->getOpcode()).getSchedClass()); + + return (IS->getUnits()); +} + +// Return the Hexagon ISA class for the insn. +unsigned HexagonMCInst::getType() const { + const uint64_t F = MCID->TSFlags; + + return ((F >> HexagonII::TypePos) & HexagonII::TypeMask); +} + +// Return whether the insn is an actual insn. +bool HexagonMCInst::isCanon() const { + return (!MCID->isPseudo() && + !isPrefix() && + getType() != HexagonII::TypeENDLOOP); +} + +// Return whether the insn is a prefix. +bool HexagonMCInst::isPrefix() const { + return (getType() == HexagonII::TypePREFIX); +} + +// Return whether the insn is solo, i.e., cannot be in a packet. +bool HexagonMCInst::isSolo() const { + const uint64_t F = MCID->TSFlags; + return ((F >> HexagonII::SoloPos) & HexagonII::SoloMask); +} + +// Return whether the insn is a new-value consumer. +bool HexagonMCInst::isNewValue() const { + const uint64_t F = MCID->TSFlags; + return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask); +} + +// Return whether the instruction is a legal new-value producer. +bool HexagonMCInst::hasNewValue() const { + const uint64_t F = MCID->TSFlags; + return ((F >> HexagonII::hasNewValuePos) & HexagonII::hasNewValueMask); +} + +// Return the operand that consumes or produces a new value. +const MCOperand& HexagonMCInst::getNewValue() const { + const uint64_t F = MCID->TSFlags; + const unsigned O = (F >> HexagonII::NewValueOpPos) & + HexagonII::NewValueOpMask; + const MCOperand& MCO = getOperand(O); + + assert ((isNewValue() || hasNewValue()) && MCO.isReg()); + return (MCO); +} + +// Return whether the instruction needs to be constant extended. +// 1) Always return true if the instruction has 'isExtended' flag set. +// +// isExtendable: +// 2) For immediate extended operands, return true only if the value is +// out-of-range. +// 3) For global address, always return true. + +bool HexagonMCInst::isConstExtended(void) const { + if (isExtended()) + return true; + + if (!isExtendable()) + return false; + + short ExtOpNum = getCExtOpNum(); + int MinValue = getMinValue(); + int MaxValue = getMaxValue(); + const MCOperand& MO = getOperand(ExtOpNum); + + // We could be using an instruction with an extendable immediate and shoehorn + // a global address into it. If it is a global address it will be constant + // extended. We do this for COMBINE. + // We currently only handle isGlobal() because it is the only kind of + // object we are going to end up with here for now. + // In the future we probably should add isSymbol(), etc. + if (MO.isExpr()) + return true; + + // If the extendable operand is not 'Immediate' type, the instruction should + // have 'isExtended' flag set. + assert(MO.isImm() && "Extendable operand must be Immediate type"); + + int ImmValue = MO.getImm(); + return (ImmValue < MinValue || ImmValue > MaxValue); +} + +// Return whether the instruction must be always extended. +bool HexagonMCInst::isExtended(void) const { + const uint64_t F = MCID->TSFlags; + return (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask; +} + +// Return true if the instruction may be extended based on the operand value. +bool HexagonMCInst::isExtendable(void) const { + const uint64_t F = MCID->TSFlags; + return (F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask; +} + +// Return number of bits in the constant extended operand. +unsigned HexagonMCInst::getBitCount(void) const { + const uint64_t F = MCID->TSFlags; + return ((F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask); +} + +// Return constant extended operand number. +unsigned short HexagonMCInst::getCExtOpNum(void) const { + const uint64_t F = MCID->TSFlags; + return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask); +} + +// Return whether the operand can be constant extended. +bool HexagonMCInst::isOperandExtended(const unsigned short OperandNum) const { + const uint64_t F = MCID->TSFlags; + return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask) + == OperandNum; +} + +// Return the min value that a constant extendable operand can have +// without being extended. +int HexagonMCInst::getMinValue(void) const { + const uint64_t F = MCID->TSFlags; + unsigned isSigned = (F >> HexagonII::ExtentSignedPos) + & HexagonII::ExtentSignedMask; + unsigned bits = (F >> HexagonII::ExtentBitsPos) + & HexagonII::ExtentBitsMask; + + if (isSigned) // if value is signed + return -1 << (bits - 1); + else + return 0; +} + +// Return the max value that a constant extendable operand can have +// without being extended. +int HexagonMCInst::getMaxValue(void) const { + const uint64_t F = MCID->TSFlags; + unsigned isSigned = (F >> HexagonII::ExtentSignedPos) + & HexagonII::ExtentSignedMask; + unsigned bits = (F >> HexagonII::ExtentBitsPos) + & HexagonII::ExtentBitsMask; + + if (isSigned) // if value is signed + return ~(-1 << (bits - 1)); + else + return ~(-1 << bits); +} diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h new file mode 100644 index 000000000000..3ca71f00b241 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h @@ -0,0 +1,100 @@ +//===- HexagonMCInst.h - Hexagon sub-class of MCInst ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class extends MCInst to allow some VLIW annotations. +// +//===----------------------------------------------------------------------===// + +#ifndef HEXAGONMCINST_H +#define HEXAGONMCINST_H + +#include "HexagonTargetMachine.h" +#include "llvm/MC/MCInst.h" + +namespace llvm { + class MCOperand; + + class HexagonMCInst: public MCInst { + // MCID is set during instruction lowering. + // It is needed in order to access TSFlags for + // use in checking MC instruction properties. + const MCInstrDesc *MCID; + + // Packet start and end markers + unsigned packetStart: 1, packetEnd: 1; + + public: + explicit HexagonMCInst(): + MCInst(), MCID(0), packetStart(0), packetEnd(0) {}; + HexagonMCInst(const MCInstrDesc& mcid): + MCInst(), MCID(&mcid), packetStart(0), packetEnd(0) {}; + + bool isPacketStart() const { return (packetStart); }; + bool isPacketEnd() const { return (packetEnd); }; + void setPacketStart(bool Y) { packetStart = Y; }; + void setPacketEnd(bool Y) { packetEnd = Y; }; + void resetPacket() { setPacketStart(false); setPacketEnd(false); }; + + // Return the slots used by the insn. + unsigned getUnits(const HexagonTargetMachine* TM) const; + + // Return the Hexagon ISA class for the insn. + unsigned getType() const; + + void setDesc(const MCInstrDesc& mcid) { MCID = &mcid; }; + const MCInstrDesc& getDesc(void) const { return *MCID; }; + + // Return whether the insn is an actual insn. + bool isCanon() const; + + // Return whether the insn is a prefix. + bool isPrefix() const; + + // Return whether the insn is solo, i.e., cannot be in a packet. + bool isSolo() const; + + // Return whether the instruction needs to be constant extended. + bool isConstExtended() const; + + // Return constant extended operand number. + unsigned short getCExtOpNum(void) const; + + // Return whether the insn is a new-value consumer. + bool isNewValue() const; + + // Return whether the instruction is a legal new-value producer. + bool hasNewValue() const; + + // Return the operand that consumes or produces a new value. + const MCOperand& getNewValue() const; + + // Return number of bits in the constant extended operand. + unsigned getBitCount(void) const; + + private: + // Return whether the instruction must be always extended. + bool isExtended() const; + + // Return true if the insn may be extended based on the operand value. + bool isExtendable() const; + + // Return true if the operand can be constant extended. + bool isOperandExtended(const unsigned short OperandNum) const; + + // Return the min value that a constant extendable operand can have + // without being extended. + int getMinValue() const; + + // Return the max value that a constant extendable operand can have + // without being extended. + int getMaxValue() const; + }; +} + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp new file mode 100644 index 000000000000..2f93a5299c91 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -0,0 +1,98 @@ +//===-- HexagonMCTargetDesc.cpp - Hexagon Target Descriptions -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides Hexagon specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "HexagonMCTargetDesc.h" +#include "HexagonMCAsmInfo.h" +#include "InstPrinter/HexagonInstPrinter.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/MC/MCCodeGenInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" + +#define GET_INSTRINFO_MC_DESC +#include "HexagonGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "HexagonGenSubtargetInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "HexagonGenRegisterInfo.inc" + +using namespace llvm; + +static MCInstrInfo *createHexagonMCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitHexagonMCInstrInfo(X); + return X; +} + +static MCRegisterInfo *createHexagonMCRegisterInfo(StringRef TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitHexagonMCRegisterInfo(X, Hexagon::R0); + return X; +} + +static MCSubtargetInfo *createHexagonMCSubtargetInfo(StringRef TT, + StringRef CPU, + StringRef FS) { + MCSubtargetInfo *X = new MCSubtargetInfo(); + InitHexagonMCSubtargetInfo(X, TT, CPU, FS); + return X; +} + +static MCAsmInfo *createHexagonMCAsmInfo(const MCRegisterInfo &MRI, + StringRef TT) { + MCAsmInfo *MAI = new HexagonMCAsmInfo(TT); + + // VirtualFP = (R30 + #0). + MCCFIInstruction Inst = MCCFIInstruction::createDefCfa( + 0, Hexagon::R30, 0); + MAI->addInitialFrameState(Inst); + + return MAI; +} + +static MCCodeGenInfo *createHexagonMCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM, + CodeGenOpt::Level OL) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + // For the time being, use static relocations, since there's really no + // support for PIC yet. + X->InitMCCodeGenInfo(Reloc::Static, CM, OL); + return X; +} + +// Force static initialization. +extern "C" void LLVMInitializeHexagonTargetMC() { + // Register the MC asm info. + RegisterMCAsmInfoFn X(TheHexagonTarget, createHexagonMCAsmInfo); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(TheHexagonTarget, + createHexagonMCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(TheHexagonTarget, createHexagonMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(TheHexagonTarget, + createHexagonMCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(TheHexagonTarget, + createHexagonMCSubtargetInfo); +} diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h new file mode 100644 index 000000000000..2238b1ae5f35 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h @@ -0,0 +1,39 @@ +//===-- HexagonMCTargetDesc.h - Hexagon Target Descriptions -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides Hexagon specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef HEXAGONMCTARGETDESC_H +#define HEXAGONMCTARGETDESC_H + +namespace llvm { +class MCSubtargetInfo; +class Target; + +extern Target TheHexagonTarget; + +} // End llvm namespace + +// Define symbolic names for Hexagon registers. This defines a mapping from +// register name to register number. +// +#define GET_REGINFO_ENUM +#include "HexagonGenRegisterInfo.inc" + +// Defines symbolic names for the Hexagon instructions. +// +#define GET_INSTRINFO_ENUM +#include "HexagonGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "HexagonGenSubtargetInfo.inc" + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp b/contrib/llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp new file mode 100644 index 000000000000..40f6c8d23ea8 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp @@ -0,0 +1,19 @@ +//===-- HexagonTargetInfo.cpp - Hexagon Target Implementation ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Hexagon.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/TargetRegistry.h" +using namespace llvm; + +Target llvm::TheHexagonTarget; + +extern "C" void LLVMInitializeHexagonTargetInfo() { + RegisterTarget<Triple::hexagon, /*HasJIT=*/false> X(TheHexagonTarget, "hexagon", "Hexagon"); +} |