diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2015-05-27 18:44:32 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2015-05-27 18:44:32 +0000 |
commit | 5a5ac124e1efaf208671f01c46edb15f29ed2a0b (patch) | |
tree | a6140557876943cdd800ee997c9317283394b22c /lib/Target/Hexagon | |
parent | f03b5bed27d0d2eafd68562ce14f8b5e3f1f0801 (diff) |
Vendor import of llvm trunk r238337:vendor/llvm/llvm-trunk-r238337
Notes
Notes:
svn path=/vendor/llvm/dist/; revision=283625
svn path=/vendor/llvm/llvm-trunk-r238337/; revision=283626; tag=vendor/llvm/llvm-trunk-r238337
Diffstat (limited to 'lib/Target/Hexagon')
69 files changed, 13813 insertions, 12301 deletions
diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt index af7914f30366..758ccc741007 100644 --- a/lib/Target/Hexagon/CMakeLists.txt +++ b/lib/Target/Hexagon/CMakeLists.txt @@ -13,9 +13,9 @@ add_public_tablegen_target(HexagonCommonTableGen) add_llvm_target(HexagonCodeGen HexagonAsmPrinter.cpp - HexagonCallingConvLower.cpp HexagonCFGOptimizer.cpp HexagonCopyToCombine.cpp + HexagonExpandCondsets.cpp HexagonExpandPredSpillCode.cpp HexagonFixupHwLoops.cpp HexagonFrameLowering.cpp @@ -32,7 +32,6 @@ add_llvm_target(HexagonCodeGen HexagonRemoveSZExtArgs.cpp HexagonSelectionDAGInfo.cpp HexagonSplitConst32AndConst64.cpp - HexagonSplitTFRCondSets.cpp HexagonSubtarget.cpp HexagonTargetMachine.cpp HexagonTargetObjectFile.cpp diff --git a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp index 44f9d93e8fc7..a60d1e471944 100644 --- a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp +++ b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/HexagonBaseInfo.h" -#include "MCTargetDesc/HexagonMCInst.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" #include "MCTargetDesc/HexagonMCTargetDesc.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler.h" @@ -51,6 +51,8 @@ static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, void const *Decoder); static const uint16_t IntRegDecoderTable[] = { Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4, @@ -67,7 +69,7 @@ Hexagon::P2, Hexagon::P3 }; static DecodeStatus DecodeRegisterClass(MCInst &Inst, unsigned RegNo, const uint16_t Table[], size_t Size) { if (RegNo < Size) { - Inst.addOperand(MCOperand::CreateReg(Table[RegNo])); + Inst.addOperand(MCOperand::createReg(Table[RegNo])); return MCDisassembler::Success; } else @@ -81,7 +83,7 @@ static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo, return MCDisassembler::Fail; unsigned Register = IntRegDecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); + Inst.addOperand(MCOperand::createReg(Register)); return MCDisassembler::Success; } @@ -101,7 +103,31 @@ static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo, return MCDisassembler::Fail; unsigned Register = CtrlRegDecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); + Inst.addOperand(MCOperand::createReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t /*Address*/, void const *Decoder) { + static const uint16_t CtrlReg64DecoderTable[] = { + Hexagon::C1_0, Hexagon::NoRegister, + Hexagon::C3_2, Hexagon::NoRegister, + Hexagon::NoRegister, Hexagon::NoRegister, + Hexagon::C7_6, Hexagon::NoRegister, + Hexagon::C9_8, Hexagon::NoRegister, + Hexagon::C11_10, Hexagon::NoRegister, + Hexagon::CS, Hexagon::NoRegister, + Hexagon::UPC, Hexagon::NoRegister + }; + + if (RegNo >= sizeof(CtrlReg64DecoderTable) / sizeof(CtrlReg64DecoderTable[0])) + return MCDisassembler::Fail; + + if (CtrlReg64DecoderTable[RegNo] == Hexagon::NoRegister) + return MCDisassembler::Fail; + + unsigned Register = CtrlReg64DecoderTable[RegNo]; + Inst.addOperand(MCOperand::createReg(Register)); return MCDisassembler::Success; } @@ -118,7 +144,7 @@ static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo, default: return MCDisassembler::Fail; } - Inst.addOperand(MCOperand::CreateReg(Register)); + Inst.addOperand(MCOperand::createReg(Register)); return MCDisassembler::Success; } @@ -143,7 +169,7 @@ static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo, return MCDisassembler::Fail; unsigned Register = PredRegDecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); + Inst.addOperand(MCOperand::createReg(Register)); return MCDisassembler::Success; } @@ -176,6 +202,6 @@ DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size, // Remove parse bits. insn &= ~static_cast<uint32_t>(HexagonII::InstParseBits::INST_PARSE_MASK); DecodeStatus Result = decodeInstruction(DecoderTable32, MI, insn, Address, this, STI); - HexagonMCInst::AppendImplicitOperands(MI); + HexagonMCInstrInfo::AppendImplicitOperands(MI); return Result; } diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h index 64ae69c60e5d..dfe79f9ff7b0 100644 --- a/lib/Target/Hexagon/Hexagon.h +++ b/lib/Target/Hexagon/Hexagon.h @@ -21,26 +21,23 @@ namespace llvm { class FunctionPass; - class ModulePass; - class TargetMachine; - class MachineInstr; - class HexagonMCInst; class HexagonAsmPrinter; class HexagonTargetMachine; + class MachineInstr; + class MCInst; + class ModulePass; class raw_ostream; + class TargetMachine; FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM, CodeGenOpt::Level OptLevel); FunctionPass *createHexagonDelaySlotFillerPass(const TargetMachine &TM); FunctionPass *createHexagonFPMoverPass(const TargetMachine &TM); FunctionPass *createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM); - FunctionPass *createHexagonCFGOptimizer(const HexagonTargetMachine &TM); + FunctionPass *createHexagonCFGOptimizer(); - FunctionPass *createHexagonSplitTFRCondSets(const HexagonTargetMachine &TM); - FunctionPass *createHexagonSplitConst32AndConst64( - const HexagonTargetMachine &TM); - FunctionPass *createHexagonExpandPredSpillCode( - const HexagonTargetMachine &TM); + FunctionPass *createHexagonSplitConst32AndConst64(); + FunctionPass *createHexagonExpandPredSpillCode(); FunctionPass *createHexagonHardwareLoops(); FunctionPass *createHexagonPeephole(); FunctionPass *createHexagonFixupHwLoops(); @@ -58,7 +55,7 @@ namespace llvm { TargetAsmBackend *createHexagonAsmBackend(const Target &, const std::string &); */ - void HexagonLowerToMC(const MachineInstr *MI, HexagonMCInst &MCI, + void HexagonLowerToMC(MachineInstr const *MI, MCInst &MCI, HexagonAsmPrinter &AP); } // end namespace llvm; diff --git a/lib/Target/Hexagon/Hexagon.td b/lib/Target/Hexagon/Hexagon.td index 2068c6da4e48..53a687c337ec 100644 --- a/lib/Target/Hexagon/Hexagon.td +++ b/lib/Target/Hexagon/Hexagon.td @@ -21,31 +21,17 @@ include "llvm/Target/Target.td" // Hexagon Subtarget features. //===----------------------------------------------------------------------===// -// Hexagon Archtectures -def ArchV2 : SubtargetFeature<"v2", "HexagonArchVersion", "V2", - "Hexagon v2">; -def ArchV3 : SubtargetFeature<"v3", "HexagonArchVersion", "V3", - "Hexagon v3">; -def ArchV4 : SubtargetFeature<"v4", "HexagonArchVersion", "V4", - "Hexagon v4">; -def ArchV5 : SubtargetFeature<"v5", "HexagonArchVersion", "V5", - "Hexagon v5">; +// Hexagon Architectures +def ArchV4: SubtargetFeature<"v4", "HexagonArchVersion", "V4", "Hexagon V4">; +def ArchV5: SubtargetFeature<"v5", "HexagonArchVersion", "V5", "Hexagon V5">; //===----------------------------------------------------------------------===// // Hexagon Instruction Predicate Definitions. //===----------------------------------------------------------------------===// -def HasV2T : Predicate<"Subtarget.hasV2TOps()">; -def HasV2TOnly : Predicate<"Subtarget.hasV2TOpsOnly()">; -def NoV2T : Predicate<"!Subtarget.hasV2TOps()">; -def HasV3T : Predicate<"Subtarget.hasV3TOps()">; -def HasV3TOnly : Predicate<"Subtarget.hasV3TOpsOnly()">; -def NoV3T : Predicate<"!Subtarget.hasV3TOps()">; -def HasV4T : Predicate<"Subtarget.hasV4TOps()">; -def NoV4T : Predicate<"!Subtarget.hasV4TOps()">; -def HasV5T : Predicate<"Subtarget.hasV5TOps()">; -def NoV5T : Predicate<"!Subtarget.hasV5TOps()">; -def UseMEMOP : Predicate<"Subtarget.useMemOps()">; -def IEEERndNearV5T : Predicate<"Subtarget.modeIEEERndNear()">; +def HasV5T : Predicate<"HST->hasV5TOps()">; +def NoV5T : Predicate<"!HST->hasV5TOps()">; +def UseMEMOP : Predicate<"HST->useMemOps()">; +def IEEERndNearV5T : Predicate<"HST->modeIEEERndNear()">; //===----------------------------------------------------------------------===// // Classes used for relation maps. @@ -182,14 +168,6 @@ def getRegForm : InstrMapping { let ValueCols = [["reg"]]; } -def getRegShlForm : InstrMapping { - let FilterClass = "ImmRegShl"; - let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore"]; - let ColFields = ["InputType"]; - let KeyCol = ["imm"]; - let ValueCols = [["reg"]]; -} - //===----------------------------------------------------------------------===// // Register File, Calling Conv, Instruction Descriptions //===----------------------------------------------------------------------===// @@ -210,8 +188,10 @@ class Proc<string Name, SchedMachineModel Model, list<SubtargetFeature> Features> : ProcessorModel<Name, Model, Features>; -def : Proc<"hexagonv4", HexagonModelV4, [ArchV2, ArchV3, ArchV4]>; -def : Proc<"hexagonv5", HexagonModelV4, [ArchV2, ArchV3, ArchV4, ArchV5]>; +def : Proc<"hexagonv4", HexagonModelV4, + [ArchV4]>; +def : Proc<"hexagonv5", HexagonModelV4, + [ArchV4, ArchV5]>; //===----------------------------------------------------------------------===// // Declare the target which we are implementing diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp index 50f2eca63693..e9491baf29ef 100644 --- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -19,7 +19,7 @@ #include "HexagonSubtarget.h" #include "HexagonTargetMachine.h" #include "MCTargetDesc/HexagonInstPrinter.h" -#include "MCTargetDesc/HexagonMCInst.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" @@ -61,6 +61,10 @@ static cl::opt<bool> AlignCalls( "hexagon-align-calls", cl::Hidden, cl::init(true), cl::desc("Insert falign after call instruction for Hexagon target")); +HexagonAsmPrinter::HexagonAsmPrinter(TargetMachine &TM, + std::unique_ptr<MCStreamer> Streamer) + : AsmPrinter(TM, std::move(Streamer)), Subtarget(nullptr) {} + void HexagonAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { const MachineOperand &MO = MI->getOperand(OpNo); @@ -195,44 +199,29 @@ void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) { unsigned Size = BundleMIs.size(); assert((Size + IgnoreCount) == MI->getBundleSize() && "Corrupt Bundle!"); for (unsigned Index = 0; Index < Size; Index++) { - HexagonMCInst MCI; + MCInst MCI; HexagonLowerToMC(BundleMIs[Index], MCI, *this); - HexagonMCInst::AppendImplicitOperands(MCI); - MCI.setPacketBegin(Index == 0); - MCI.setPacketEnd(Index == (Size - 1)); - EmitToStreamer(OutStreamer, MCI); + HexagonMCInstrInfo::AppendImplicitOperands(MCI); + HexagonMCInstrInfo::setPacketBegin(MCI, Index == 0); + HexagonMCInstrInfo::setPacketEnd(MCI, Index == (Size - 1)); + EmitToStreamer(*OutStreamer, MCI); } } else { - HexagonMCInst MCI; + MCInst MCI; HexagonLowerToMC(MI, MCI, *this); - HexagonMCInst::AppendImplicitOperands(MCI); + HexagonMCInstrInfo::AppendImplicitOperands(MCI); if (MI->getOpcode() == Hexagon::ENDLOOP0) { - MCI.setPacketBegin(true); - MCI.setPacketEnd(true); + HexagonMCInstrInfo::setPacketBegin(MCI, true); + HexagonMCInstrInfo::setPacketEnd(MCI, true); } - EmitToStreamer(OutStreamer, MCI); + EmitToStreamer(*OutStreamer, MCI); } return; } -static MCInstPrinter *createHexagonMCInstPrinter(const Target &T, - unsigned SyntaxVariant, - const MCAsmInfo &MAI, - const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { - if (SyntaxVariant == 0) - return(new HexagonInstPrinter(MAI, MII, MRI)); - else - return nullptr; -} - extern "C" void LLVMInitializeHexagonAsmPrinter() { RegisterAsmPrinter<HexagonAsmPrinter> X(TheHexagonTarget); - - TargetRegistry::RegisterMCInstPrinter(TheHexagonTarget, - createHexagonMCInstPrinter); } diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.h b/lib/Target/Hexagon/HexagonAsmPrinter.h index 5f4c162b0070..792fc8b7af3a 100755 --- a/lib/Target/Hexagon/HexagonAsmPrinter.h +++ b/lib/Target/Hexagon/HexagonAsmPrinter.h @@ -25,9 +25,12 @@ namespace llvm { const HexagonSubtarget *Subtarget; public: - explicit HexagonAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer) { - Subtarget = &TM.getSubtarget<HexagonSubtarget>(); + explicit HexagonAsmPrinter(TargetMachine &TM, + std::unique_ptr<MCStreamer> Streamer); + + bool runOnMachineFunction(MachineFunction &Fn) override { + Subtarget = &Fn.getSubtarget<HexagonSubtarget>(); + return AsmPrinter::runOnMachineFunction(Fn); } const char *getPassName() const override { diff --git a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp index 307adad095c8..703e691e612f 100644 --- a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp +++ b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp @@ -37,15 +37,11 @@ namespace { class HexagonCFGOptimizer : public MachineFunctionPass { private: - const HexagonTargetMachine& QTM; - const HexagonSubtarget &QST; - void InvertAndChangeJumpTarget(MachineInstr*, MachineBasicBlock*); public: static char ID; - HexagonCFGOptimizer(const HexagonTargetMachine& TM) - : MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) { + HexagonCFGOptimizer() : MachineFunctionPass(ID) { initializeHexagonCFGOptimizerPass(*PassRegistry::getPassRegistry()); } @@ -72,7 +68,8 @@ static bool IsUnconditionalJump(int Opc) { void HexagonCFGOptimizer::InvertAndChangeJumpTarget(MachineInstr* MI, MachineBasicBlock* NewTarget) { - const HexagonInstrInfo *QII = QTM.getSubtargetImpl()->getInstrInfo(); + const TargetInstrInfo *TII = + MI->getParent()->getParent()->getSubtarget().getInstrInfo(); int NewOpcode = 0; switch(MI->getOpcode()) { case Hexagon::J2_jumpt: @@ -95,13 +92,12 @@ HexagonCFGOptimizer::InvertAndChangeJumpTarget(MachineInstr* MI, llvm_unreachable("Cannot handle this case"); } - MI->setDesc(QII->get(NewOpcode)); + MI->setDesc(TII->get(NewOpcode)); MI->getOperand(1).setMBB(NewTarget); } bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) { - // Loop over all of the basic blocks. for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end(); MBBb != MBBe; ++MBBb) { @@ -248,6 +244,6 @@ void llvm::initializeHexagonCFGOptimizerPass(PassRegistry &Registry) { CALL_ONCE_INITIALIZATION(initializePassOnce) } -FunctionPass *llvm::createHexagonCFGOptimizer(const HexagonTargetMachine &TM) { - return new HexagonCFGOptimizer(TM); +FunctionPass *llvm::createHexagonCFGOptimizer() { + return new HexagonCFGOptimizer(); } diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.cpp b/lib/Target/Hexagon/HexagonCallingConvLower.cpp deleted file mode 100644 index 8d78409aa01d..000000000000 --- a/lib/Target/Hexagon/HexagonCallingConvLower.cpp +++ /dev/null @@ -1,206 +0,0 @@ -//===-- llvm/CallingConvLower.cpp - Calling Convention lowering -----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the Hexagon_CCState class, used for lowering and -// implementing calling conventions. Adapted from the machine independent -// version of the class (CCState) but this handles calls to varargs functions -// -//===----------------------------------------------------------------------===// - -#include "HexagonCallingConvLower.h" -#include "Hexagon.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" -using namespace llvm; - -Hexagon_CCState::Hexagon_CCState(CallingConv::ID CC, bool isVarArg, - const TargetMachine &tm, - SmallVectorImpl<CCValAssign> &locs, - LLVMContext &c) - : CallingConv(CC), IsVarArg(isVarArg), TM(tm), Locs(locs), Context(c) { - // No stack is used. - StackOffset = 0; - - UsedRegs.resize( - (TM.getSubtargetImpl()->getRegisterInfo()->getNumRegs() + 31) / 32); -} - -// HandleByVal - Allocate a stack slot large enough to pass an argument by -// value. The size and alignment information of the argument is encoded in its -// parameter attribute. -void Hexagon_CCState::HandleByVal(unsigned ValNo, EVT ValVT, - EVT LocVT, CCValAssign::LocInfo LocInfo, - int MinSize, int MinAlign, - ISD::ArgFlagsTy ArgFlags) { - unsigned Align = ArgFlags.getByValAlign(); - unsigned Size = ArgFlags.getByValSize(); - if (MinSize > (int)Size) - Size = MinSize; - if (MinAlign > (int)Align) - Align = MinAlign; - unsigned Offset = AllocateStack(Size, Align); - - addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset, - LocVT.getSimpleVT(), LocInfo)); -} - -/// MarkAllocated - Mark a register and all of its aliases as allocated. -void Hexagon_CCState::MarkAllocated(unsigned Reg) { - const TargetRegisterInfo &TRI = *TM.getSubtargetImpl()->getRegisterInfo(); - for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI) - UsedRegs[*AI/32] |= 1 << (*AI&31); -} - -/// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node, -/// incorporating info about the formals into this state. -void -Hexagon_CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> - &Ins, - Hexagon_CCAssignFn Fn, - unsigned SretValueInRegs) { - unsigned NumArgs = Ins.size(); - unsigned i = 0; - - // If the function returns a small struct in registers, skip - // over the first (dummy) argument. - if (SretValueInRegs != 0) { - ++i; - } - - - for (; i != NumArgs; ++i) { - EVT ArgVT = Ins[i].VT; - ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; - if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this, 0, 0, false)) { - dbgs() << "Formal argument #" << i << " has unhandled type " - << ArgVT.getEVTString() << "\n"; - abort(); - } - } -} - -/// AnalyzeReturn - Analyze the returned values of an ISD::RET node, -/// incorporating info about the result values into this state. -void -Hexagon_CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, - Hexagon_CCAssignFn Fn, - unsigned SretValueInRegs) { - - // For Hexagon, Return small structures in registers. - if (SretValueInRegs != 0) { - if (SretValueInRegs <= 32) { - unsigned Reg = Hexagon::R0; - addLoc(CCValAssign::getReg(0, MVT::i32, Reg, MVT::i32, - CCValAssign::Full)); - return; - } - if (SretValueInRegs <= 64) { - unsigned Reg = Hexagon::D0; - addLoc(CCValAssign::getReg(0, MVT::i64, Reg, MVT::i64, - CCValAssign::Full)); - return; - } - } - - - // Determine which register each value should be copied into. - for (unsigned i = 0, e = Outs.size(); i != e; ++i) { - EVT VT = Outs[i].VT; - ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; - if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this, -1, -1, false)){ - dbgs() << "Return operand #" << i << " has unhandled type " - << VT.getEVTString() << "\n"; - abort(); - } - } -} - - -/// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info -/// about the passed values into this state. -void -Hexagon_CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> - &Outs, - Hexagon_CCAssignFn Fn, - int NonVarArgsParams, - unsigned SretValueSize) { - unsigned NumOps = Outs.size(); - - unsigned i = 0; - // If the called function returns a small struct in registers, skip - // the first actual parameter. We do not want to pass a pointer to - // the stack location. - if (SretValueSize != 0) { - ++i; - } - - for (; i != NumOps; ++i) { - EVT ArgVT = Outs[i].VT; - ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; - if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this, - NonVarArgsParams, i+1, false)) { - dbgs() << "Call operand #" << i << " has unhandled type " - << ArgVT.getEVTString() << "\n"; - abort(); - } - } -} - -/// AnalyzeCallOperands - Same as above except it takes vectors of types -/// and argument flags. -void -Hexagon_CCState::AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs, - SmallVectorImpl<ISD::ArgFlagsTy> &Flags, - Hexagon_CCAssignFn Fn) { - unsigned NumOps = ArgVTs.size(); - for (unsigned i = 0; i != NumOps; ++i) { - EVT ArgVT = ArgVTs[i]; - ISD::ArgFlagsTy ArgFlags = Flags[i]; - if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this, -1, -1, - false)) { - dbgs() << "Call operand #" << i << " has unhandled type " - << ArgVT.getEVTString() << "\n"; - abort(); - } - } -} - -/// AnalyzeCallResult - Analyze the return values of an ISD::CALL node, -/// incorporating info about the passed values into this state. -void -Hexagon_CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins, - Hexagon_CCAssignFn Fn, - unsigned SretValueInRegs) { - - for (unsigned i = 0, e = Ins.size(); i != e; ++i) { - EVT VT = Ins[i].VT; - ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); - if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this, -1, -1, false)) { - dbgs() << "Call result #" << i << " has unhandled type " - << VT.getEVTString() << "\n"; - abort(); - } - } -} - -/// AnalyzeCallResult - Same as above except it's specialized for calls which -/// produce a single value. -void Hexagon_CCState::AnalyzeCallResult(EVT VT, Hexagon_CCAssignFn Fn) { - if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this, -1, -1, - false)) { - dbgs() << "Call result has unhandled type " - << VT.getEVTString() << "\n"; - abort(); - } -} diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.h b/lib/Target/Hexagon/HexagonCallingConvLower.h deleted file mode 100644 index 738ed1a52a09..000000000000 --- a/lib/Target/Hexagon/HexagonCallingConvLower.h +++ /dev/null @@ -1,187 +0,0 @@ -//===-- HexagonCallingConvLower.h - Calling Conventions ---------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the Hexagon_CCState class, used for lowering -// and implementing calling conventions. Adapted from the target independent -// version but this handles calls to varargs functions -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONCALLINGCONVLOWER_H -#define LLVM_LIB_TARGET_HEXAGON_HEXAGONCALLINGCONVLOWER_H - -#include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/CallingConvLower.h" -#include "llvm/CodeGen/SelectionDAGNodes.h" - -// -// Need to handle varargs. -// -namespace llvm { - class TargetRegisterInfo; - class TargetMachine; - class Hexagon_CCState; - class SDNode; - struct EVT; - -/// Hexagon_CCAssignFn - This function assigns a location for Val, updating -/// State to reflect the change. -typedef bool Hexagon_CCAssignFn(unsigned ValNo, EVT ValVT, - EVT LocVT, CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, Hexagon_CCState &State, - int NonVarArgsParams, - int CurrentParam, - bool ForceMem); - - -/// CCState - This class holds information needed while lowering arguments and -/// return values. It captures which registers are already assigned and which -/// stack slots are used. It provides accessors to allocate these values. -class Hexagon_CCState { - CallingConv::ID CallingConv; - bool IsVarArg; - const TargetMachine &TM; - SmallVectorImpl<CCValAssign> &Locs; - LLVMContext &Context; - - unsigned StackOffset; - SmallVector<uint32_t, 16> UsedRegs; -public: - Hexagon_CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &TM, - SmallVectorImpl<CCValAssign> &locs, LLVMContext &c); - - void addLoc(const CCValAssign &V) { - Locs.push_back(V); - } - - LLVMContext &getContext() const { return Context; } - const TargetMachine &getTarget() const { return TM; } - unsigned getCallingConv() const { return CallingConv; } - bool isVarArg() const { return IsVarArg; } - - unsigned getNextStackOffset() const { return StackOffset; } - - /// isAllocated - Return true if the specified register (or an alias) is - /// allocated. - bool isAllocated(unsigned Reg) const { - return UsedRegs[Reg/32] & (1 << (Reg&31)); - } - - /// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node, - /// incorporating info about the formals into this state. - void AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins, - Hexagon_CCAssignFn Fn, unsigned SretValueInRegs); - - /// AnalyzeReturn - Analyze the returned values of an ISD::RET node, - /// incorporating info about the result values into this state. - void AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, - Hexagon_CCAssignFn Fn, unsigned SretValueInRegs); - - /// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info - /// about the passed values into this state. - void AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs, - Hexagon_CCAssignFn Fn, int NonVarArgsParams, - unsigned SretValueSize); - - /// AnalyzeCallOperands - Same as above except it takes vectors of types - /// and argument flags. - void AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs, - SmallVectorImpl<ISD::ArgFlagsTy> &Flags, - Hexagon_CCAssignFn Fn); - - /// AnalyzeCallResult - Analyze the return values of an ISD::CALL node, - /// incorporating info about the passed values into this state. - void AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins, - Hexagon_CCAssignFn Fn, unsigned SretValueInRegs); - - /// AnalyzeCallResult - Same as above except it's specialized for calls which - /// produce a single value. - void AnalyzeCallResult(EVT VT, Hexagon_CCAssignFn Fn); - - /// getFirstUnallocated - Return the first unallocated register in the set, or - /// NumRegs if they are all allocated. - unsigned getFirstUnallocated(const unsigned *Regs, unsigned NumRegs) const { - for (unsigned i = 0; i != NumRegs; ++i) - if (!isAllocated(Regs[i])) - return i; - return NumRegs; - } - - /// AllocateReg - Attempt to allocate one register. If it is not available, - /// return zero. Otherwise, return the register, marking it and any aliases - /// as allocated. - unsigned AllocateReg(unsigned Reg) { - if (isAllocated(Reg)) return 0; - MarkAllocated(Reg); - return Reg; - } - - /// Version of AllocateReg with extra register to be shadowed. - unsigned AllocateReg(unsigned Reg, unsigned ShadowReg) { - if (isAllocated(Reg)) return 0; - MarkAllocated(Reg); - MarkAllocated(ShadowReg); - return Reg; - } - - /// AllocateReg - Attempt to allocate one of the specified registers. If none - /// are available, return zero. Otherwise, return the first one available, - /// marking it and any aliases as allocated. - unsigned AllocateReg(const unsigned *Regs, unsigned NumRegs) { - unsigned FirstUnalloc = getFirstUnallocated(Regs, NumRegs); - if (FirstUnalloc == NumRegs) - return 0; // Didn't find the reg. - - // Mark the register and any aliases as allocated. - unsigned Reg = Regs[FirstUnalloc]; - MarkAllocated(Reg); - return Reg; - } - - /// Version of AllocateReg with list of registers to be shadowed. - unsigned AllocateReg(const unsigned *Regs, const unsigned *ShadowRegs, - unsigned NumRegs) { - unsigned FirstUnalloc = getFirstUnallocated(Regs, NumRegs); - if (FirstUnalloc == NumRegs) - return 0; // Didn't find the reg. - - // Mark the register and any aliases as allocated. - unsigned Reg = Regs[FirstUnalloc], ShadowReg = ShadowRegs[FirstUnalloc]; - MarkAllocated(Reg); - MarkAllocated(ShadowReg); - return Reg; - } - - /// AllocateStack - Allocate a chunk of stack space with the specified size - /// and alignment. - unsigned AllocateStack(unsigned Size, unsigned Align) { - assert(Align && ((Align-1) & Align) == 0); // Align is power of 2. - StackOffset = ((StackOffset + Align-1) & ~(Align-1)); - unsigned Result = StackOffset; - StackOffset += Size; - return Result; - } - - // HandleByVal - Allocate a stack slot large enough to pass an argument by - // value. The size and alignment information of the argument is encoded in its - // parameter attribute. - void HandleByVal(unsigned ValNo, EVT ValVT, - EVT LocVT, CCValAssign::LocInfo LocInfo, - int MinSize, int MinAlign, ISD::ArgFlagsTy ArgFlags); - -private: - /// MarkAllocated - Mark a register and all of its aliases as allocated. - void MarkAllocated(unsigned Reg); -}; - - - -} // end namespace llvm - -#endif diff --git a/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/lib/Target/Hexagon/HexagonCopyToCombine.cpp index 1883ad8f2e51..1d6455c66fa5 100644 --- a/lib/Target/Hexagon/HexagonCopyToCombine.cpp +++ b/lib/Target/Hexagon/HexagonCopyToCombine.cpp @@ -116,38 +116,34 @@ static bool isCombinableInstType(MachineInstr *MI, switch(MI->getOpcode()) { case Hexagon::A2_tfr: { // A COPY instruction can be combined if its arguments are IntRegs (32bit). - assert(MI->getOperand(0).isReg() && MI->getOperand(1).isReg()); + const MachineOperand &Op0 = MI->getOperand(0); + const MachineOperand &Op1 = MI->getOperand(1); + assert(Op0.isReg() && Op1.isReg()); - unsigned DestReg = MI->getOperand(0).getReg(); - unsigned SrcReg = MI->getOperand(1).getReg(); + unsigned DestReg = Op0.getReg(); + unsigned SrcReg = Op1.getReg(); return Hexagon::IntRegsRegClass.contains(DestReg) && - Hexagon::IntRegsRegClass.contains(SrcReg); + Hexagon::IntRegsRegClass.contains(SrcReg); } case Hexagon::A2_tfrsi: { // A transfer-immediate can be combined if its argument is a signed 8bit // value. - assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm()); - unsigned DestReg = MI->getOperand(0).getReg(); - - // Only combine constant extended TFRI if we are in aggressive mode. - return Hexagon::IntRegsRegClass.contains(DestReg) && - (ShouldCombineAggressively || isInt<8>(MI->getOperand(1).getImm())); - } - - case Hexagon::TFRI_V4: { - if (!ShouldCombineAggressively) - return false; - assert(MI->getOperand(0).isReg() && MI->getOperand(1).isGlobal()); + const MachineOperand &Op0 = MI->getOperand(0); + const MachineOperand &Op1 = MI->getOperand(1); + assert(Op0.isReg()); + unsigned DestReg = Op0.getReg(); // Ensure that TargetFlags are MO_NO_FLAG for a global. This is a // workaround for an ABI bug that prevents GOT relocations on combine // instructions - if (MI->getOperand(1).getTargetFlags() != HexagonII::MO_NO_FLAG) + if (!Op1.isImm() && Op1.getTargetFlags() != HexagonII::MO_NO_FLAG) return false; - unsigned DestReg = MI->getOperand(0).getReg(); - return Hexagon::IntRegsRegClass.contains(DestReg); + // Only combine constant extended A2_tfrsi if we are in aggressive mode. + bool NotExt = Op1.isImm() && isInt<8>(Op1.getImm()); + return Hexagon::IntRegsRegClass.contains(DestReg) && + (ShouldCombineAggressively || NotExt); } default: @@ -157,13 +153,14 @@ static bool isCombinableInstType(MachineInstr *MI, return false; } -static bool isGreaterThan8BitTFRI(MachineInstr *I) { - return I->getOpcode() == Hexagon::A2_tfrsi && - !isInt<8>(I->getOperand(1).getImm()); -} -static bool isGreaterThan6BitTFRI(MachineInstr *I) { - return I->getOpcode() == Hexagon::A2_tfrsi && - !isUInt<6>(I->getOperand(1).getImm()); +template <unsigned N> +static bool isGreaterThanNBitTFRI(const MachineInstr *I) { + if (I->getOpcode() == Hexagon::TFRI64_V4 || + I->getOpcode() == Hexagon::A2_tfrsi) { + const MachineOperand &Op = I->getOperand(1); + return !Op.isImm() || !isInt<N>(Op.getImm()); + } + return false; } /// areCombinableOperations - Returns true if the two instruction can be merge @@ -171,31 +168,17 @@ static bool isGreaterThan6BitTFRI(MachineInstr *I) { static bool areCombinableOperations(const TargetRegisterInfo *TRI, MachineInstr *HighRegInst, MachineInstr *LowRegInst) { - assert((HighRegInst->getOpcode() == Hexagon::A2_tfr || - HighRegInst->getOpcode() == Hexagon::A2_tfrsi || - HighRegInst->getOpcode() == Hexagon::TFRI_V4) && - (LowRegInst->getOpcode() == Hexagon::A2_tfr || - LowRegInst->getOpcode() == Hexagon::A2_tfrsi || - LowRegInst->getOpcode() == Hexagon::TFRI_V4) && + unsigned HiOpc = HighRegInst->getOpcode(); + unsigned LoOpc = LowRegInst->getOpcode(); + (void)HiOpc; // Fix compiler warning + (void)LoOpc; // Fix compiler warning + assert((HiOpc == Hexagon::A2_tfr || HiOpc == Hexagon::A2_tfrsi) && + (LoOpc == Hexagon::A2_tfr || LoOpc == Hexagon::A2_tfrsi) && "Assume individual instructions are of a combinable type"); - const HexagonRegisterInfo *QRI = - static_cast<const HexagonRegisterInfo *>(TRI); - - // V4 added some combine variations (mixed immediate and register source - // operands), if we are on < V4 we can only combine 2 register-to-register - // moves and 2 immediate-to-register moves. We also don't have - // constant-extenders. - if (!QRI->Subtarget.hasV4TOps()) - return HighRegInst->getOpcode() == LowRegInst->getOpcode() && - !isGreaterThan8BitTFRI(HighRegInst) && - !isGreaterThan6BitTFRI(LowRegInst); - // There is no combine of two constant extended values. - if ((HighRegInst->getOpcode() == Hexagon::TFRI_V4 || - isGreaterThan8BitTFRI(HighRegInst)) && - (LowRegInst->getOpcode() == Hexagon::TFRI_V4 || - isGreaterThan6BitTFRI(LowRegInst))) + if (isGreaterThanNBitTFRI<8>(HighRegInst) && + isGreaterThanNBitTFRI<6>(LowRegInst)) return false; return true; @@ -222,10 +205,14 @@ static bool isUnsafeToMoveAcross(MachineInstr *I, unsigned UseReg, unsigned DestReg, const TargetRegisterInfo *TRI) { return (UseReg && (I->modifiesRegister(UseReg, TRI))) || - I->modifiesRegister(DestReg, TRI) || - I->readsRegister(DestReg, TRI) || - I->hasUnmodeledSideEffects() || - I->isInlineAsm() || I->isDebugValue(); + I->modifiesRegister(DestReg, TRI) || + I->readsRegister(DestReg, TRI) || + I->hasUnmodeledSideEffects() || + I->isInlineAsm() || I->isDebugValue(); +} + +static unsigned UseReg(const MachineOperand& MO) { + return MO.isReg() ? MO.getReg() : 0; } /// isSafeToMoveTogether - Returns true if it is safe to move I1 next to I2 such @@ -235,9 +222,7 @@ bool HexagonCopyToCombine::isSafeToMoveTogether(MachineInstr *I1, unsigned I1DestReg, unsigned I2DestReg, bool &DoInsertAtI1) { - - bool IsImmUseReg = I2->getOperand(1).isImm() || I2->getOperand(1).isGlobal(); - unsigned I2UseReg = IsImmUseReg ? 0 : I2->getOperand(1).getReg(); + unsigned I2UseReg = UseReg(I2->getOperand(1)); // It is not safe to move I1 and I2 into one combine if I2 has a true // dependence on I1. @@ -301,8 +286,7 @@ bool HexagonCopyToCombine::isSafeToMoveTogether(MachineInstr *I1, // At O3 we got better results (dhrystone) by being more conservative here. if (!ShouldCombineAggressively) End = std::next(MachineBasicBlock::iterator(I2)); - IsImmUseReg = I1->getOperand(1).isImm() || I1->getOperand(1).isGlobal(); - unsigned I1UseReg = IsImmUseReg ? 0 : I1->getOperand(1).getReg(); + unsigned I1UseReg = UseReg(I1->getOperand(1)); // Track killed operands. If we move across an instruction that kills our // operand, we need to update the kill information on the moved I1. It kills // the operand now. @@ -418,7 +402,7 @@ bool HexagonCopyToCombine::runOnMachineFunction(MachineFunction &MF) { // Get target info. TRI = MF.getSubtarget().getRegisterInfo(); - TII = static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo()); + TII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); // Combine aggressively (for code size) ShouldCombineAggressively = @@ -561,7 +545,7 @@ void HexagonCopyToCombine::emitCombineII(MachineBasicBlock::iterator &InsertPt, DebugLoc DL = InsertPt->getDebugLoc(); MachineBasicBlock *BB = InsertPt->getParent(); - // Handle globals. + // Handle globals. if (HiOperand.isGlobal()) { BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg) .addGlobalAddress(HiOperand.getGlobal(), HiOperand.getOffset(), @@ -577,17 +561,64 @@ void HexagonCopyToCombine::emitCombineII(MachineBasicBlock::iterator &InsertPt, return; } - // Handle constant extended immediates. - if (!isInt<8>(HiOperand.getImm())) { - assert(isInt<8>(LoOperand.getImm())); + // Handle block addresses. + if (HiOperand.isBlockAddress()) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg) + .addBlockAddress(HiOperand.getBlockAddress(), HiOperand.getOffset(), + HiOperand.getTargetFlags()) + .addImm(LoOperand.getImm()); + return; + } + if (LoOperand.isBlockAddress()) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineii), DoubleDestReg) + .addImm(HiOperand.getImm()) + .addBlockAddress(LoOperand.getBlockAddress(), LoOperand.getOffset(), + LoOperand.getTargetFlags()); + return; + } + + // Handle jump tables. + if (HiOperand.isJTI()) { BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg) + .addJumpTableIndex(HiOperand.getIndex(), HiOperand.getTargetFlags()) + .addImm(LoOperand.getImm()); + return; + } + if (LoOperand.isJTI()) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineii), DoubleDestReg) .addImm(HiOperand.getImm()) + .addJumpTableIndex(LoOperand.getIndex(), LoOperand.getTargetFlags()); + return; + } + + // Handle constant pools. + if (HiOperand.isCPI()) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg) + .addConstantPoolIndex(HiOperand.getIndex(), HiOperand.getOffset(), + HiOperand.getTargetFlags()) .addImm(LoOperand.getImm()); return; } + if (LoOperand.isCPI()) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineii), DoubleDestReg) + .addImm(HiOperand.getImm()) + .addConstantPoolIndex(LoOperand.getIndex(), LoOperand.getOffset(), + LoOperand.getTargetFlags()); + return; + } + + // First preference should be given to Hexagon::A2_combineii instruction + // as it can include U6 (in Hexagon::A4_combineii) as well. + // In this instruction, HiOperand is const extended, if required. + if (isInt<8>(LoOperand.getImm())) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg) + .addImm(HiOperand.getImm()) + .addImm(LoOperand.getImm()); + return; + } - if (!isUInt<6>(LoOperand.getImm())) { - assert(isInt<8>(HiOperand.getImm())); + // In this instruction, LoOperand is const extended, if required. + if (isInt<8>(HiOperand.getImm())) { BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineii), DoubleDestReg) .addImm(HiOperand.getImm()) .addImm(LoOperand.getImm()); @@ -611,7 +642,7 @@ void HexagonCopyToCombine::emitCombineIR(MachineBasicBlock::iterator &InsertPt, DebugLoc DL = InsertPt->getDebugLoc(); MachineBasicBlock *BB = InsertPt->getParent(); - // Handle global. + // Handle globals. if (HiOperand.isGlobal()) { BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineir), DoubleDestReg) .addGlobalAddress(HiOperand.getGlobal(), HiOperand.getOffset(), @@ -619,6 +650,29 @@ void HexagonCopyToCombine::emitCombineIR(MachineBasicBlock::iterator &InsertPt, .addReg(LoReg, LoRegKillFlag); return; } + // Handle block addresses. + if (HiOperand.isBlockAddress()) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineir), DoubleDestReg) + .addBlockAddress(HiOperand.getBlockAddress(), HiOperand.getOffset(), + HiOperand.getTargetFlags()) + .addReg(LoReg, LoRegKillFlag); + return; + } + // Handle jump tables. + if (HiOperand.isJTI()) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineir), DoubleDestReg) + .addJumpTableIndex(HiOperand.getIndex(), HiOperand.getTargetFlags()) + .addReg(LoReg, LoRegKillFlag); + return; + } + // Handle constant pools. + if (HiOperand.isCPI()) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineir), DoubleDestReg) + .addConstantPoolIndex(HiOperand.getIndex(), HiOperand.getOffset(), + HiOperand.getTargetFlags()) + .addReg(LoReg, LoRegKillFlag); + return; + } // Insert new combine instruction. // DoubleRegDest = combine #HiImm, LoReg BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineir), DoubleDestReg) @@ -644,6 +698,29 @@ void HexagonCopyToCombine::emitCombineRI(MachineBasicBlock::iterator &InsertPt, LoOperand.getTargetFlags()); return; } + // Handle block addresses. + if (LoOperand.isBlockAddress()) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineri), DoubleDestReg) + .addReg(HiReg, HiRegKillFlag) + .addBlockAddress(LoOperand.getBlockAddress(), LoOperand.getOffset(), + LoOperand.getTargetFlags()); + return; + } + // Handle jump tables. + if (LoOperand.isJTI()) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineri), DoubleDestReg) + .addReg(HiOperand.getReg(), HiRegKillFlag) + .addJumpTableIndex(LoOperand.getIndex(), LoOperand.getTargetFlags()); + return; + } + // Handle constant pools. + if (LoOperand.isCPI()) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineri), DoubleDestReg) + .addReg(HiOperand.getReg(), HiRegKillFlag) + .addConstantPoolIndex(LoOperand.getIndex(), LoOperand.getOffset(), + LoOperand.getTargetFlags()); + return; + } // Insert new combine instruction. // DoubleRegDest = combine HiReg, #LoImm diff --git a/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/lib/Target/Hexagon/HexagonExpandCondsets.cpp new file mode 100644 index 000000000000..37ed173a79cd --- /dev/null +++ b/lib/Target/Hexagon/HexagonExpandCondsets.cpp @@ -0,0 +1,1348 @@ +// Replace mux instructions with the corresponding legal instructions. +// It is meant to work post-SSA, but still on virtual registers. It was +// originally placed between register coalescing and machine instruction +// scheduler. +// In this place in the optimization sequence, live interval analysis had +// been performed, and the live intervals should be preserved. A large part +// of the code deals with preserving the liveness information. +// +// Liveness tracking aside, the main functionality of this pass is divided +// into two steps. The first step is to replace an instruction +// vreg0 = C2_mux vreg0, vreg1, vreg2 +// with a pair of conditional transfers +// vreg0 = A2_tfrt vreg0, vreg1 +// vreg0 = A2_tfrf vreg0, vreg2 +// It is the intention that the execution of this pass could be terminated +// after this step, and the code generated would be functionally correct. +// +// If the uses of the source values vreg1 and vreg2 are kills, and their +// definitions are predicable, then in the second step, the conditional +// transfers will then be rewritten as predicated instructions. E.g. +// vreg0 = A2_or vreg1, vreg2 +// vreg3 = A2_tfrt vreg99, vreg0<kill> +// will be rewritten as +// vreg3 = A2_port vreg99, vreg1, vreg2 +// +// This replacement has two variants: "up" and "down". Consider this case: +// vreg0 = A2_or vreg1, vreg2 +// ... [intervening instructions] ... +// vreg3 = A2_tfrt vreg99, vreg0<kill> +// variant "up": +// vreg3 = A2_port vreg99, vreg1, vreg2 +// ... [intervening instructions, vreg0->vreg3] ... +// [deleted] +// variant "down": +// [deleted] +// ... [intervening instructions] ... +// vreg3 = A2_port vreg99, vreg1, vreg2 +// +// Both, one or none of these variants may be valid, and checks are made +// to rule out inapplicable variants. +// +// As an additional optimization, before either of the two steps above is +// executed, the pass attempts to coalesce the target register with one of +// the source registers, e.g. given an instruction +// vreg3 = C2_mux vreg0, vreg1, vreg2 +// vreg3 will be coalesced with either vreg1 or vreg2. If this succeeds, +// the instruction would then be (for example) +// vreg3 = C2_mux vreg0, vreg3, vreg2 +// and, under certain circumstances, this could result in only one predicated +// instruction: +// vreg3 = A2_tfrf vreg0, vreg2 +// + +#define DEBUG_TYPE "expand-condsets" +#include "HexagonTargetMachine.h" + +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +static cl::opt<unsigned> OptTfrLimit("expand-condsets-tfr-limit", + cl::init(~0U), cl::Hidden, cl::desc("Max number of mux expansions")); +static cl::opt<unsigned> OptCoaLimit("expand-condsets-coa-limit", + cl::init(~0U), cl::Hidden, cl::desc("Max number of segment coalescings")); + +namespace llvm { + void initializeHexagonExpandCondsetsPass(PassRegistry&); + FunctionPass *createHexagonExpandCondsets(); +} + +namespace { + class HexagonExpandCondsets : public MachineFunctionPass { + public: + static char ID; + HexagonExpandCondsets() : + MachineFunctionPass(ID), HII(0), TRI(0), MRI(0), + LIS(0), CoaLimitActive(false), + TfrLimitActive(false), CoaCounter(0), TfrCounter(0) { + if (OptCoaLimit.getPosition()) + CoaLimitActive = true, CoaLimit = OptCoaLimit; + if (OptTfrLimit.getPosition()) + TfrLimitActive = true, TfrLimit = OptTfrLimit; + initializeHexagonExpandCondsetsPass(*PassRegistry::getPassRegistry()); + } + + virtual const char *getPassName() const { + return "Hexagon Expand Condsets"; + } + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<LiveIntervals>(); + AU.addPreserved<LiveIntervals>(); + AU.addPreserved<SlotIndexes>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + virtual bool runOnMachineFunction(MachineFunction &MF); + + private: + const HexagonInstrInfo *HII; + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + LiveIntervals *LIS; + + bool CoaLimitActive, TfrLimitActive; + unsigned CoaLimit, TfrLimit, CoaCounter, TfrCounter; + + struct RegisterRef { + RegisterRef(const MachineOperand &Op) : Reg(Op.getReg()), + Sub(Op.getSubReg()) {} + RegisterRef(unsigned R = 0, unsigned S = 0) : Reg(R), Sub(S) {} + bool operator== (RegisterRef RR) const { + return Reg == RR.Reg && Sub == RR.Sub; + } + bool operator!= (RegisterRef RR) const { return !operator==(RR); } + unsigned Reg, Sub; + }; + + typedef DenseMap<unsigned,unsigned> ReferenceMap; + enum { Sub_Low = 0x1, Sub_High = 0x2, Sub_None = (Sub_Low | Sub_High) }; + enum { Exec_Then = 0x10, Exec_Else = 0x20 }; + unsigned getMaskForSub(unsigned Sub); + bool isCondset(const MachineInstr *MI); + + void addRefToMap(RegisterRef RR, ReferenceMap &Map, unsigned Exec); + bool isRefInMap(RegisterRef, ReferenceMap &Map, unsigned Exec); + + LiveInterval::iterator nextSegment(LiveInterval &LI, SlotIndex S); + LiveInterval::iterator prevSegment(LiveInterval &LI, SlotIndex S); + void makeDefined(unsigned Reg, SlotIndex S, bool SetDef); + void makeUndead(unsigned Reg, SlotIndex S); + void shrinkToUses(unsigned Reg, LiveInterval &LI); + void updateKillFlags(unsigned Reg, LiveInterval &LI); + void terminateSegment(LiveInterval::iterator LT, SlotIndex S, + LiveInterval &LI); + void addInstrToLiveness(MachineInstr *MI); + void removeInstrFromLiveness(MachineInstr *MI); + + unsigned getCondTfrOpcode(const MachineOperand &SO, bool Cond); + MachineInstr *genTfrFor(MachineOperand &SrcOp, unsigned DstR, + unsigned DstSR, const MachineOperand &PredOp, bool Cond); + bool split(MachineInstr *MI); + bool splitInBlock(MachineBasicBlock &B); + + bool isPredicable(MachineInstr *MI); + MachineInstr *getReachingDefForPred(RegisterRef RD, + MachineBasicBlock::iterator UseIt, unsigned PredR, bool Cond); + bool canMoveOver(MachineInstr *MI, ReferenceMap &Defs, ReferenceMap &Uses); + bool canMoveMemTo(MachineInstr *MI, MachineInstr *ToI, bool IsDown); + void predicateAt(RegisterRef RD, MachineInstr *MI, + MachineBasicBlock::iterator Where, unsigned PredR, bool Cond); + void renameInRange(RegisterRef RO, RegisterRef RN, unsigned PredR, + bool Cond, MachineBasicBlock::iterator First, + MachineBasicBlock::iterator Last); + bool predicate(MachineInstr *TfrI, bool Cond); + bool predicateInBlock(MachineBasicBlock &B); + + void postprocessUndefImplicitUses(MachineBasicBlock &B); + void removeImplicitUses(MachineInstr *MI); + void removeImplicitUses(MachineBasicBlock &B); + + bool isIntReg(RegisterRef RR, unsigned &BW); + bool isIntraBlocks(LiveInterval &LI); + bool coalesceRegisters(RegisterRef R1, RegisterRef R2); + bool coalesceSegments(MachineFunction &MF); + }; +} + +char HexagonExpandCondsets::ID = 0; + + +unsigned HexagonExpandCondsets::getMaskForSub(unsigned Sub) { + switch (Sub) { + case Hexagon::subreg_loreg: + return Sub_Low; + case Hexagon::subreg_hireg: + return Sub_High; + case Hexagon::NoSubRegister: + return Sub_None; + } + llvm_unreachable("Invalid subregister"); +} + + +bool HexagonExpandCondsets::isCondset(const MachineInstr *MI) { + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case Hexagon::C2_mux: + case Hexagon::C2_muxii: + case Hexagon::C2_muxir: + case Hexagon::C2_muxri: + case Hexagon::MUX64_rr: + return true; + break; + } + return false; +} + + +void HexagonExpandCondsets::addRefToMap(RegisterRef RR, ReferenceMap &Map, + unsigned Exec) { + unsigned Mask = getMaskForSub(RR.Sub) | Exec; + ReferenceMap::iterator F = Map.find(RR.Reg); + if (F == Map.end()) + Map.insert(std::make_pair(RR.Reg, Mask)); + else + F->second |= Mask; +} + + +bool HexagonExpandCondsets::isRefInMap(RegisterRef RR, ReferenceMap &Map, + unsigned Exec) { + ReferenceMap::iterator F = Map.find(RR.Reg); + if (F == Map.end()) + return false; + unsigned Mask = getMaskForSub(RR.Sub) | Exec; + if (Mask & F->second) + return true; + return false; +} + + +LiveInterval::iterator HexagonExpandCondsets::nextSegment(LiveInterval &LI, + SlotIndex S) { + for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) { + if (I->start >= S) + return I; + } + return LI.end(); +} + + +LiveInterval::iterator HexagonExpandCondsets::prevSegment(LiveInterval &LI, + SlotIndex S) { + LiveInterval::iterator P = LI.end(); + for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) { + if (I->end > S) + return P; + P = I; + } + return P; +} + + +/// Find the implicit use of register Reg in slot index S, and make sure +/// that the "defined" flag is set to SetDef. While the mux expansion is +/// going on, predicated instructions will have implicit uses of the +/// registers that are being defined. This is to keep any preceding +/// definitions live. If there is no preceding definition, the implicit +/// use will be marked as "undef", otherwise it will be "defined". This +/// function is used to update the flag. +void HexagonExpandCondsets::makeDefined(unsigned Reg, SlotIndex S, + bool SetDef) { + if (!S.isRegister()) + return; + MachineInstr *MI = LIS->getInstructionFromIndex(S); + assert(MI && "Expecting instruction"); + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isUse() || Op.getReg() != Reg) + continue; + bool IsDef = !Op.isUndef(); + if (Op.isImplicit() && IsDef != SetDef) + Op.setIsUndef(!SetDef); + } +} + + +void HexagonExpandCondsets::makeUndead(unsigned Reg, SlotIndex S) { + // If S is a block boundary, then there can still be a dead def reaching + // this point. Instead of traversing the CFG, queue start points of all + // live segments that begin with a register, and end at a block boundary. + // This may "resurrect" some truly dead definitions, but doing so is + // harmless. + SmallVector<MachineInstr*,8> Defs; + if (S.isBlock()) { + LiveInterval &LI = LIS->getInterval(Reg); + for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) { + if (!I->start.isRegister() || !I->end.isBlock()) + continue; + MachineInstr *MI = LIS->getInstructionFromIndex(I->start); + Defs.push_back(MI); + } + } else if (S.isRegister()) { + MachineInstr *MI = LIS->getInstructionFromIndex(S); + Defs.push_back(MI); + } + + for (unsigned i = 0, n = Defs.size(); i < n; ++i) { + MachineInstr *MI = Defs[i]; + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg) + continue; + Op.setIsDead(false); + } + } +} + + +/// Shrink the segments in the live interval for a given register to the last +/// use before each subsequent def. Unlike LiveIntervals::shrinkToUses, this +/// function will not mark any definitions of Reg as dead. The reason for this +/// is that this function is used while a MUX instruction is being expanded, +/// or while a conditional copy is undergoing predication. During these +/// processes, there may be defs present in the instruction sequence that have +/// not yet been removed, or there may be missing uses that have not yet been +/// added. We want to utilize LiveIntervals::shrinkToUses as much as possible, +/// but since it does not extend any intervals that are too short, we need to +/// pre-emptively extend them here in anticipation of further changes. +void HexagonExpandCondsets::shrinkToUses(unsigned Reg, LiveInterval &LI) { + SmallVector<MachineInstr*,4> Deads; + LIS->shrinkToUses(&LI, &Deads); + // Need to undo the deadification made by "shrinkToUses". It's easier to + // do it here, since we have a list of all instructions that were just + // marked as dead. + for (unsigned i = 0, n = Deads.size(); i < n; ++i) { + MachineInstr *MI = Deads[i]; + // Clear the "dead" flag. + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg) + continue; + Op.setIsDead(false); + } + // Extend the live segment to the beginning of the next one. + LiveInterval::iterator End = LI.end(); + SlotIndex S = LIS->getInstructionIndex(MI).getRegSlot(); + LiveInterval::iterator T = LI.FindSegmentContaining(S); + assert(T != End); + LiveInterval::iterator N = std::next(T); + if (N != End) + T->end = N->start; + else + T->end = LIS->getMBBEndIdx(MI->getParent()); + } + updateKillFlags(Reg, LI); +} + + +/// Given an updated live interval LI for register Reg, update the kill flags +/// in instructions using Reg to reflect the liveness changes. +void HexagonExpandCondsets::updateKillFlags(unsigned Reg, LiveInterval &LI) { + MRI->clearKillFlags(Reg); + for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) { + SlotIndex EX = I->end; + if (!EX.isRegister()) + continue; + MachineInstr *MI = LIS->getInstructionFromIndex(EX); + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isUse() || Op.getReg() != Reg) + continue; + // Only set the kill flag on the first encountered use of Reg in this + // instruction. + Op.setIsKill(true); + break; + } + } +} + + +/// When adding a new instruction to liveness, the newly added definition +/// will start a new live segment. This may happen at a position that falls +/// within an existing live segment. In such case that live segment needs to +/// be truncated to make room for the new segment. Ultimately, the truncation +/// will occur at the last use, but for now the segment can be terminated +/// right at the place where the new segment will start. The segments will be +/// shrunk-to-uses later. +void HexagonExpandCondsets::terminateSegment(LiveInterval::iterator LT, + SlotIndex S, LiveInterval &LI) { + // Terminate the live segment pointed to by LT within a live interval LI. + if (LT == LI.end()) + return; + + VNInfo *OldVN = LT->valno; + SlotIndex EX = LT->end; + LT->end = S; + // If LT does not end at a block boundary, the termination is done. + if (!EX.isBlock()) + return; + + // If LT ended at a block boundary, it's possible that its value number + // is picked up at the beginning other blocks. Create a new value number + // and change such blocks to use it instead. + VNInfo *NewVN = 0; + for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) { + if (!I->start.isBlock() || I->valno != OldVN) + continue; + // Generate on-demand a new value number that is defined by the + // block beginning (i.e. -phi). + if (!NewVN) + NewVN = LI.getNextValue(I->start, LIS->getVNInfoAllocator()); + I->valno = NewVN; + } +} + + +/// Add the specified instruction to live intervals. This function is used +/// to update the live intervals while the program code is being changed. +/// Neither the expansion of a MUX, nor the predication are atomic, and this +/// function is used to update the live intervals while these transformations +/// are being done. +void HexagonExpandCondsets::addInstrToLiveness(MachineInstr *MI) { + SlotIndex MX = LIS->isNotInMIMap(MI) ? LIS->InsertMachineInstrInMaps(MI) + : LIS->getInstructionIndex(MI); + DEBUG(dbgs() << "adding liveness info for instr\n " << MX << " " << *MI); + + MX = MX.getRegSlot(); + bool Predicated = HII->isPredicated(MI); + MachineBasicBlock *MB = MI->getParent(); + + // Strip all implicit uses from predicated instructions. They will be + // added again, according to the updated information. + if (Predicated) + removeImplicitUses(MI); + + // For each def in MI we need to insert a new live segment starting at MX + // into the interval. If there already exists a live segment in the interval + // that contains MX, we need to terminate it at MX. + SmallVector<RegisterRef,2> Defs; + for (auto &Op : MI->operands()) + if (Op.isReg() && Op.isDef()) + Defs.push_back(RegisterRef(Op)); + + for (unsigned i = 0, n = Defs.size(); i < n; ++i) { + unsigned DefR = Defs[i].Reg; + LiveInterval &LID = LIS->getInterval(DefR); + DEBUG(dbgs() << "adding def " << PrintReg(DefR, TRI) + << " with interval\n " << LID << "\n"); + // If MX falls inside of an existing live segment, terminate it. + LiveInterval::iterator LT = LID.FindSegmentContaining(MX); + if (LT != LID.end()) + terminateSegment(LT, MX, LID); + DEBUG(dbgs() << "after terminating segment\n " << LID << "\n"); + + // Create a new segment starting from MX. + LiveInterval::iterator P = prevSegment(LID, MX), N = nextSegment(LID, MX); + SlotIndex EX; + VNInfo *VN = LID.getNextValue(MX, LIS->getVNInfoAllocator()); + if (N == LID.end()) { + // There is no live segment after MX. End this segment at the end of + // the block. + EX = LIS->getMBBEndIdx(MB); + } else { + // If the next segment starts at the block boundary, end the new segment + // at the boundary of the preceding block (i.e. the previous index). + // Otherwise, end the segment at the beginning of the next segment. In + // either case it will be "shrunk-to-uses" later. + EX = N->start.isBlock() ? N->start.getPrevIndex() : N->start; + } + if (Predicated) { + // Predicated instruction will have an implicit use of the defined + // register. This is necessary so that this definition will not make + // any previous definitions dead. If there are no previous live + // segments, still add the implicit use, but make it "undef". + // Because of the implicit use, the preceding definition is not + // dead. Mark is as such (if necessary). + MachineOperand ImpUse = MachineOperand::CreateReg(DefR, false, true); + ImpUse.setSubReg(Defs[i].Sub); + bool Undef = false; + if (P == LID.end()) + Undef = true; + else { + // If the previous segment extends to the end of the previous block, + // the end index may actually be the beginning of this block. If + // the previous segment ends at a block boundary, move it back by one, + // to get the proper block for it. + SlotIndex PE = P->end.isBlock() ? P->end.getPrevIndex() : P->end; + MachineBasicBlock *PB = LIS->getMBBFromIndex(PE); + if (PB != MB && !LIS->isLiveInToMBB(LID, MB)) + Undef = true; + } + if (!Undef) { + makeUndead(DefR, P->valno->def); + // We are adding a live use, so extend the previous segment to + // include it. + P->end = MX; + } else { + ImpUse.setIsUndef(true); + } + + if (!MI->readsRegister(DefR)) + MI->addOperand(ImpUse); + if (N != LID.end()) + makeDefined(DefR, N->start, true); + } + LiveRange::Segment NR = LiveRange::Segment(MX, EX, VN); + LID.addSegment(NR); + DEBUG(dbgs() << "added a new segment " << NR << "\n " << LID << "\n"); + shrinkToUses(DefR, LID); + DEBUG(dbgs() << "updated imp-uses: " << *MI); + LID.verify(); + } + + // For each use in MI: + // - If there is no live segment that contains MX for the used register, + // extend the previous one. Ignore implicit uses. + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isUse() || Op.isImplicit() || Op.isUndef()) + continue; + unsigned UseR = Op.getReg(); + LiveInterval &LIU = LIS->getInterval(UseR); + // Find the last segment P that starts before MX. + LiveInterval::iterator P = LIU.FindSegmentContaining(MX); + if (P == LIU.end()) + P = prevSegment(LIU, MX); + + assert(P != LIU.end() && "MI uses undefined register?"); + SlotIndex EX = P->end; + // If P contains MX, there is not much to do. + if (EX > MX) { + Op.setIsKill(false); + continue; + } + // Otherwise, extend P to "next(MX)". + P->end = MX.getNextIndex(); + Op.setIsKill(true); + // Get the old "kill" instruction, and remove the kill flag. + if (MachineInstr *KI = LIS->getInstructionFromIndex(MX)) + KI->clearRegisterKills(UseR, nullptr); + shrinkToUses(UseR, LIU); + LIU.verify(); + } +} + + +/// Update the live interval information to reflect the removal of the given +/// instruction from the program. As with "addInstrToLiveness", this function +/// is called while the program code is being changed. +void HexagonExpandCondsets::removeInstrFromLiveness(MachineInstr *MI) { + SlotIndex MX = LIS->getInstructionIndex(MI).getRegSlot(); + DEBUG(dbgs() << "removing instr\n " << MX << " " << *MI); + + // For each def in MI: + // If MI starts a live segment, merge this segment with the previous segment. + // + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isDef()) + continue; + unsigned DefR = Op.getReg(); + LiveInterval &LID = LIS->getInterval(DefR); + LiveInterval::iterator LT = LID.FindSegmentContaining(MX); + assert(LT != LID.end() && "Expecting live segments"); + DEBUG(dbgs() << "removing def at " << MX << " of " << PrintReg(DefR, TRI) + << " with interval\n " << LID << "\n"); + if (LT->start != MX) + continue; + + VNInfo *MVN = LT->valno; + if (LT != LID.begin()) { + // If the current live segment is not the first, the task is easy. If + // the previous segment continues into the current block, extend it to + // the end of the current one, and merge the value numbers. + // Otherwise, remove the current segment, and make the end of it "undef". + LiveInterval::iterator P = std::prev(LT); + SlotIndex PE = P->end.isBlock() ? P->end.getPrevIndex() : P->end; + MachineBasicBlock *MB = MI->getParent(); + MachineBasicBlock *PB = LIS->getMBBFromIndex(PE); + if (PB != MB && !LIS->isLiveInToMBB(LID, MB)) { + makeDefined(DefR, LT->end, false); + LID.removeSegment(*LT); + } else { + // Make the segments adjacent, so that merge-vn can also merge the + // segments. + P->end = LT->start; + makeUndead(DefR, P->valno->def); + LID.MergeValueNumberInto(MVN, P->valno); + } + } else { + LiveInterval::iterator N = std::next(LT); + LiveInterval::iterator RmB = LT, RmE = N; + while (N != LID.end()) { + // Iterate until the first register-based definition is found + // (i.e. skip all block-boundary entries). + LiveInterval::iterator Next = std::next(N); + if (N->start.isRegister()) { + makeDefined(DefR, N->start, false); + break; + } + if (N->end.isRegister()) { + makeDefined(DefR, N->end, false); + RmE = Next; + break; + } + RmE = Next; + N = Next; + } + // Erase the segments in one shot to avoid invalidating iterators. + LID.segments.erase(RmB, RmE); + } + + bool VNUsed = false; + for (LiveInterval::iterator I = LID.begin(), E = LID.end(); I != E; ++I) { + if (I->valno != MVN) + continue; + VNUsed = true; + break; + } + if (!VNUsed) + MVN->markUnused(); + + DEBUG(dbgs() << "new interval: "); + if (!LID.empty()) { + DEBUG(dbgs() << LID << "\n"); + LID.verify(); + } else { + DEBUG(dbgs() << "<empty>\n"); + LIS->removeInterval(DefR); + } + } + + // For uses there is nothing to do. The intervals will be updated via + // shrinkToUses. + SmallVector<unsigned,4> Uses; + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isUse()) + continue; + unsigned R = Op.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(R)) + continue; + Uses.push_back(R); + } + LIS->RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + for (unsigned i = 0, n = Uses.size(); i < n; ++i) { + LiveInterval &LI = LIS->getInterval(Uses[i]); + shrinkToUses(Uses[i], LI); + } +} + + +/// Get the opcode for a conditional transfer of the value in SO (source +/// operand). The condition (true/false) is given in Cond. +unsigned HexagonExpandCondsets::getCondTfrOpcode(const MachineOperand &SO, + bool Cond) { + using namespace Hexagon; + if (SO.isReg()) { + unsigned PhysR; + RegisterRef RS = SO; + if (TargetRegisterInfo::isVirtualRegister(RS.Reg)) { + const TargetRegisterClass *VC = MRI->getRegClass(RS.Reg); + assert(VC->begin() != VC->end() && "Empty register class"); + PhysR = *VC->begin(); + } else { + assert(TargetRegisterInfo::isPhysicalRegister(RS.Reg)); + PhysR = RS.Reg; + } + unsigned PhysS = (RS.Sub == 0) ? PhysR : TRI->getSubReg(PhysR, RS.Sub); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysS); + switch (RC->getSize()) { + case 4: + return Cond ? A2_tfrt : A2_tfrf; + case 8: + return Cond ? A2_tfrpt : A2_tfrpf; + } + llvm_unreachable("Invalid register operand"); + } + if (SO.isImm() || SO.isFPImm()) + return Cond ? C2_cmoveit : C2_cmoveif; + llvm_unreachable("Unexpected source operand"); +} + + +/// Generate a conditional transfer, copying the value SrcOp to the +/// destination register DstR:DstSR, and using the predicate register from +/// PredOp. The Cond argument specifies whether the predicate is to be +/// if(PredOp), or if(!PredOp). +MachineInstr *HexagonExpandCondsets::genTfrFor(MachineOperand &SrcOp, + unsigned DstR, unsigned DstSR, const MachineOperand &PredOp, bool Cond) { + MachineInstr *MI = SrcOp.getParent(); + MachineBasicBlock &B = *MI->getParent(); + MachineBasicBlock::iterator At = MI; + DebugLoc DL = MI->getDebugLoc(); + + // Don't avoid identity copies here (i.e. if the source and the destination + // are the same registers). It is actually better to generate them here, + // since this would cause the copy to potentially be predicated in the next + // step. The predication will remove such a copy if it is unable to + /// predicate. + + unsigned Opc = getCondTfrOpcode(SrcOp, Cond); + MachineInstr *TfrI = BuildMI(B, At, DL, HII->get(Opc)) + .addReg(DstR, RegState::Define, DstSR) + .addOperand(PredOp) + .addOperand(SrcOp); + // We don't want any kills yet. + TfrI->clearKillInfo(); + DEBUG(dbgs() << "created an initial copy: " << *TfrI); + return TfrI; +} + + +/// Replace a MUX instruction MI with a pair A2_tfrt/A2_tfrf. This function +/// performs all necessary changes to complete the replacement. +bool HexagonExpandCondsets::split(MachineInstr *MI) { + if (TfrLimitActive) { + if (TfrCounter >= TfrLimit) + return false; + TfrCounter++; + } + DEBUG(dbgs() << "\nsplitting BB#" << MI->getParent()->getNumber() + << ": " << *MI); + MachineOperand &MD = MI->getOperand(0); // Definition + MachineOperand &MP = MI->getOperand(1); // Predicate register + assert(MD.isDef()); + unsigned DR = MD.getReg(), DSR = MD.getSubReg(); + + // First, create the two invididual conditional transfers, and add each + // of them to the live intervals information. Do that first and then remove + // the old instruction from live intervals. + if (MachineInstr *TfrT = genTfrFor(MI->getOperand(2), DR, DSR, MP, true)) + addInstrToLiveness(TfrT); + if (MachineInstr *TfrF = genTfrFor(MI->getOperand(3), DR, DSR, MP, false)) + addInstrToLiveness(TfrF); + removeInstrFromLiveness(MI); + + return true; +} + + +/// Split all MUX instructions in the given block into pairs of contitional +/// transfers. +bool HexagonExpandCondsets::splitInBlock(MachineBasicBlock &B) { + bool Changed = false; + MachineBasicBlock::iterator I, E, NextI; + for (I = B.begin(), E = B.end(); I != E; I = NextI) { + NextI = std::next(I); + if (isCondset(I)) + Changed |= split(I); + } + return Changed; +} + + +bool HexagonExpandCondsets::isPredicable(MachineInstr *MI) { + if (HII->isPredicated(MI) || !HII->isPredicable(MI)) + return false; + if (MI->hasUnmodeledSideEffects() || MI->mayStore()) + return false; + // Reject instructions with multiple defs (e.g. post-increment loads). + bool HasDef = false; + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isDef()) + continue; + if (HasDef) + return false; + HasDef = true; + } + for (auto &Mo : MI->memoperands()) + if (Mo->isVolatile()) + return false; + return true; +} + + +/// Find the reaching definition for a predicated use of RD. The RD is used +/// under the conditions given by PredR and Cond, and this function will ignore +/// definitions that set RD under the opposite conditions. +MachineInstr *HexagonExpandCondsets::getReachingDefForPred(RegisterRef RD, + MachineBasicBlock::iterator UseIt, unsigned PredR, bool Cond) { + MachineBasicBlock &B = *UseIt->getParent(); + MachineBasicBlock::iterator I = UseIt, S = B.begin(); + if (I == S) + return 0; + + bool PredValid = true; + do { + --I; + MachineInstr *MI = &*I; + // Check if this instruction can be ignored, i.e. if it is predicated + // on the complementary condition. + if (PredValid && HII->isPredicated(MI)) { + if (MI->readsRegister(PredR) && (Cond != HII->isPredicatedTrue(MI))) + continue; + } + + // Check the defs. If the PredR is defined, invalidate it. If RD is + // defined, return the instruction or 0, depending on the circumstances. + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isDef()) + continue; + RegisterRef RR = Op; + if (RR.Reg == PredR) { + PredValid = false; + continue; + } + if (RR.Reg != RD.Reg) + continue; + // If the "Reg" part agrees, there is still the subregister to check. + // If we are looking for vreg1:loreg, we can skip vreg1:hireg, but + // not vreg1 (w/o subregisters). + if (RR.Sub == RD.Sub) + return MI; + if (RR.Sub == 0 || RD.Sub == 0) + return 0; + // We have different subregisters, so we can continue looking. + } + } while (I != S); + + return 0; +} + + +/// Check if the instruction MI can be safely moved over a set of instructions +/// whose side-effects (in terms of register defs and uses) are expressed in +/// the maps Defs and Uses. These maps reflect the conditional defs and uses +/// that depend on the same predicate register to allow moving instructions +/// over instructions predicated on the opposite condition. +bool HexagonExpandCondsets::canMoveOver(MachineInstr *MI, ReferenceMap &Defs, + ReferenceMap &Uses) { + // In order to be able to safely move MI over instructions that define + // "Defs" and use "Uses", no def operand from MI can be defined or used + // and no use operand can be defined. + for (auto &Op : MI->operands()) { + if (!Op.isReg()) + continue; + RegisterRef RR = Op; + // For physical register we would need to check register aliases, etc. + // and we don't want to bother with that. It would be of little value + // before the actual register rewriting (from virtual to physical). + if (!TargetRegisterInfo::isVirtualRegister(RR.Reg)) + return false; + // No redefs for any operand. + if (isRefInMap(RR, Defs, Exec_Then)) + return false; + // For defs, there cannot be uses. + if (Op.isDef() && isRefInMap(RR, Uses, Exec_Then)) + return false; + } + return true; +} + + +/// Check if the instruction accessing memory (TheI) can be moved to the +/// location ToI. +bool HexagonExpandCondsets::canMoveMemTo(MachineInstr *TheI, MachineInstr *ToI, + bool IsDown) { + bool IsLoad = TheI->mayLoad(), IsStore = TheI->mayStore(); + if (!IsLoad && !IsStore) + return true; + if (HII->areMemAccessesTriviallyDisjoint(TheI, ToI)) + return true; + if (TheI->hasUnmodeledSideEffects()) + return false; + + MachineBasicBlock::iterator StartI = IsDown ? TheI : ToI; + MachineBasicBlock::iterator EndI = IsDown ? ToI : TheI; + bool Ordered = TheI->hasOrderedMemoryRef(); + + // Search for aliased memory reference in (StartI, EndI). + for (MachineBasicBlock::iterator I = std::next(StartI); I != EndI; ++I) { + MachineInstr *MI = &*I; + if (MI->hasUnmodeledSideEffects()) + return false; + bool L = MI->mayLoad(), S = MI->mayStore(); + if (!L && !S) + continue; + if (Ordered && MI->hasOrderedMemoryRef()) + return false; + + bool Conflict = (L && IsStore) || S; + if (Conflict) + return false; + } + return true; +} + + +/// Generate a predicated version of MI (where the condition is given via +/// PredR and Cond) at the point indicated by Where. +void HexagonExpandCondsets::predicateAt(RegisterRef RD, MachineInstr *MI, + MachineBasicBlock::iterator Where, unsigned PredR, bool Cond) { + // The problem with updating live intervals is that we can move one def + // past another def. In particular, this can happen when moving an A2_tfrt + // over an A2_tfrf defining the same register. From the point of view of + // live intervals, these two instructions are two separate definitions, + // and each one starts another live segment. LiveIntervals's "handleMove" + // does not allow such moves, so we need to handle it ourselves. To avoid + // invalidating liveness data while we are using it, the move will be + // implemented in 4 steps: (1) add a clone of the instruction MI at the + // target location, (2) update liveness, (3) delete the old instruction, + // and (4) update liveness again. + + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = Where->getDebugLoc(); // "Where" points to an instruction. + unsigned Opc = MI->getOpcode(); + unsigned PredOpc = HII->getCondOpcode(Opc, !Cond); + MachineInstrBuilder MB = BuildMI(B, Where, DL, HII->get(PredOpc)); + unsigned Ox = 0, NP = MI->getNumOperands(); + // Skip all defs from MI first. + while (Ox < NP) { + MachineOperand &MO = MI->getOperand(Ox); + if (!MO.isReg() || !MO.isDef()) + break; + Ox++; + } + // Add the new def, then the predicate register, then the rest of the + // operands. + MB.addReg(RD.Reg, RegState::Define, RD.Sub); + MB.addReg(PredR); + while (Ox < NP) { + MachineOperand &MO = MI->getOperand(Ox); + if (!MO.isReg() || !MO.isImplicit()) + MB.addOperand(MO); + Ox++; + } + + MachineFunction &MF = *B.getParent(); + MachineInstr::mmo_iterator I = MI->memoperands_begin(); + unsigned NR = std::distance(I, MI->memoperands_end()); + MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(NR); + for (unsigned i = 0; i < NR; ++i) + MemRefs[i] = *I++; + MB.setMemRefs(MemRefs, MemRefs+NR); + + MachineInstr *NewI = MB; + NewI->clearKillInfo(); + addInstrToLiveness(NewI); +} + + +/// In the range [First, Last], rename all references to the "old" register RO +/// to the "new" register RN, but only in instructions predicated on the given +/// condition. +void HexagonExpandCondsets::renameInRange(RegisterRef RO, RegisterRef RN, + unsigned PredR, bool Cond, MachineBasicBlock::iterator First, + MachineBasicBlock::iterator Last) { + MachineBasicBlock::iterator End = std::next(Last); + for (MachineBasicBlock::iterator I = First; I != End; ++I) { + MachineInstr *MI = &*I; + // Do not touch instructions that are not predicated, or are predicated + // on the opposite condition. + if (!HII->isPredicated(MI)) + continue; + if (!MI->readsRegister(PredR) || (Cond != HII->isPredicatedTrue(MI))) + continue; + + for (auto &Op : MI->operands()) { + if (!Op.isReg() || RO != RegisterRef(Op)) + continue; + Op.setReg(RN.Reg); + Op.setSubReg(RN.Sub); + // In practice, this isn't supposed to see any defs. + assert(!Op.isDef() && "Not expecting a def"); + } + } +} + + +/// For a given conditional copy, predicate the definition of the source of +/// the copy under the given condition (using the same predicate register as +/// the copy). +bool HexagonExpandCondsets::predicate(MachineInstr *TfrI, bool Cond) { + // TfrI - A2_tfr[tf] Instruction (not A2_tfrsi). + unsigned Opc = TfrI->getOpcode(); + (void)Opc; + assert(Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf); + DEBUG(dbgs() << "\nattempt to predicate if-" << (Cond ? "true" : "false") + << ": " << *TfrI); + + MachineOperand &MD = TfrI->getOperand(0); + MachineOperand &MP = TfrI->getOperand(1); + MachineOperand &MS = TfrI->getOperand(2); + // The source operand should be a <kill>. This is not strictly necessary, + // but it makes things a lot simpler. Otherwise, we would need to rename + // some registers, which would complicate the transformation considerably. + if (!MS.isKill()) + return false; + + RegisterRef RT(MS); + unsigned PredR = MP.getReg(); + MachineInstr *DefI = getReachingDefForPred(RT, TfrI, PredR, Cond); + if (!DefI || !isPredicable(DefI)) + return false; + + DEBUG(dbgs() << "Source def: " << *DefI); + + // Collect the information about registers defined and used between the + // DefI and the TfrI. + // Map: reg -> bitmask of subregs + ReferenceMap Uses, Defs; + MachineBasicBlock::iterator DefIt = DefI, TfrIt = TfrI; + + // Check if the predicate register is valid between DefI and TfrI. + // If it is, we can then ignore instructions predicated on the negated + // conditions when collecting def and use information. + bool PredValid = true; + for (MachineBasicBlock::iterator I = std::next(DefIt); I != TfrIt; ++I) { + if (!I->modifiesRegister(PredR, 0)) + continue; + PredValid = false; + break; + } + + for (MachineBasicBlock::iterator I = std::next(DefIt); I != TfrIt; ++I) { + MachineInstr *MI = &*I; + // If this instruction is predicated on the same register, it could + // potentially be ignored. + // By default assume that the instruction executes on the same condition + // as TfrI (Exec_Then), and also on the opposite one (Exec_Else). + unsigned Exec = Exec_Then | Exec_Else; + if (PredValid && HII->isPredicated(MI) && MI->readsRegister(PredR)) + Exec = (Cond == HII->isPredicatedTrue(MI)) ? Exec_Then : Exec_Else; + + for (auto &Op : MI->operands()) { + if (!Op.isReg()) + continue; + // We don't want to deal with physical registers. The reason is that + // they can be aliased with other physical registers. Aliased virtual + // registers must share the same register number, and can only differ + // in the subregisters, which we are keeping track of. Physical + // registers ters no longer have subregisters---their super- and + // subregisters are other physical registers, and we are not checking + // that. + RegisterRef RR = Op; + if (!TargetRegisterInfo::isVirtualRegister(RR.Reg)) + return false; + + ReferenceMap &Map = Op.isDef() ? Defs : Uses; + addRefToMap(RR, Map, Exec); + } + } + + // The situation: + // RT = DefI + // ... + // RD = TfrI ..., RT + + // If the register-in-the-middle (RT) is used or redefined between + // DefI and TfrI, we may not be able proceed with this transformation. + // We can ignore a def that will not execute together with TfrI, and a + // use that will. If there is such a use (that does execute together with + // TfrI), we will not be able to move DefI down. If there is a use that + // executed if TfrI's condition is false, then RT must be available + // unconditionally (cannot be predicated). + // Essentially, we need to be able to rename RT to RD in this segment. + if (isRefInMap(RT, Defs, Exec_Then) || isRefInMap(RT, Uses, Exec_Else)) + return false; + RegisterRef RD = MD; + // If the predicate register is defined between DefI and TfrI, the only + // potential thing to do would be to move the DefI down to TfrI, and then + // predicate. The reaching def (DefI) must be movable down to the location + // of the TfrI. + // If the target register of the TfrI (RD) is not used or defined between + // DefI and TfrI, consider moving TfrI up to DefI. + bool CanUp = canMoveOver(TfrI, Defs, Uses); + bool CanDown = canMoveOver(DefI, Defs, Uses); + // The TfrI does not access memory, but DefI could. Check if it's safe + // to move DefI down to TfrI. + if (DefI->mayLoad() || DefI->mayStore()) + if (!canMoveMemTo(DefI, TfrI, true)) + CanDown = false; + + DEBUG(dbgs() << "Can move up: " << (CanUp ? "yes" : "no") + << ", can move down: " << (CanDown ? "yes\n" : "no\n")); + MachineBasicBlock::iterator PastDefIt = std::next(DefIt); + if (CanUp) + predicateAt(RD, DefI, PastDefIt, PredR, Cond); + else if (CanDown) + predicateAt(RD, DefI, TfrIt, PredR, Cond); + else + return false; + + if (RT != RD) + renameInRange(RT, RD, PredR, Cond, PastDefIt, TfrIt); + + // Delete the user of RT first (it should work either way, but this order + // of deleting is more natural). + removeInstrFromLiveness(TfrI); + removeInstrFromLiveness(DefI); + return true; +} + + +/// Predicate all cases of conditional copies in the specified block. +bool HexagonExpandCondsets::predicateInBlock(MachineBasicBlock &B) { + bool Changed = false; + MachineBasicBlock::iterator I, E, NextI; + for (I = B.begin(), E = B.end(); I != E; I = NextI) { + NextI = std::next(I); + unsigned Opc = I->getOpcode(); + if (Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf) { + bool Done = predicate(I, (Opc == Hexagon::A2_tfrt)); + if (!Done) { + // If we didn't predicate I, we may need to remove it in case it is + // an "identity" copy, e.g. vreg1 = A2_tfrt vreg2, vreg1. + if (RegisterRef(I->getOperand(0)) == RegisterRef(I->getOperand(2))) + removeInstrFromLiveness(I); + } + Changed |= Done; + } + } + return Changed; +} + + +void HexagonExpandCondsets::removeImplicitUses(MachineInstr *MI) { + for (unsigned i = MI->getNumOperands(); i > 0; --i) { + MachineOperand &MO = MI->getOperand(i-1); + if (MO.isReg() && MO.isUse() && MO.isImplicit()) + MI->RemoveOperand(i-1); + } +} + + +void HexagonExpandCondsets::removeImplicitUses(MachineBasicBlock &B) { + for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) { + MachineInstr *MI = &*I; + if (HII->isPredicated(MI)) + removeImplicitUses(MI); + } +} + + +void HexagonExpandCondsets::postprocessUndefImplicitUses(MachineBasicBlock &B) { + // Implicit uses that are "undef" are only meaningful (outside of the + // internals of this pass) when the instruction defines a subregister, + // and the implicit-undef use applies to the defined register. In such + // cases, the proper way to record the information in the IR is to mark + // the definition as "undef", which will be interpreted as "read-undef". + typedef SmallSet<unsigned,2> RegisterSet; + for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) { + MachineInstr *MI = &*I; + RegisterSet Undefs; + for (unsigned i = MI->getNumOperands(); i > 0; --i) { + MachineOperand &MO = MI->getOperand(i-1); + if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.isUndef()) { + MI->RemoveOperand(i-1); + Undefs.insert(MO.getReg()); + } + } + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isDef() || !Op.getSubReg()) + continue; + if (Undefs.count(Op.getReg())) + Op.setIsUndef(true); + } + } +} + + +bool HexagonExpandCondsets::isIntReg(RegisterRef RR, unsigned &BW) { + if (!TargetRegisterInfo::isVirtualRegister(RR.Reg)) + return false; + const TargetRegisterClass *RC = MRI->getRegClass(RR.Reg); + if (RC == &Hexagon::IntRegsRegClass) { + BW = 32; + return true; + } + if (RC == &Hexagon::DoubleRegsRegClass) { + BW = (RR.Sub != 0) ? 32 : 64; + return true; + } + return false; +} + + +bool HexagonExpandCondsets::isIntraBlocks(LiveInterval &LI) { + for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) { + LiveRange::Segment &LR = *I; + // Range must start at a register... + if (!LR.start.isRegister()) + return false; + // ...and end in a register or in a dead slot. + if (!LR.end.isRegister() && !LR.end.isDead()) + return false; + } + return true; +} + + +bool HexagonExpandCondsets::coalesceRegisters(RegisterRef R1, RegisterRef R2) { + if (CoaLimitActive) { + if (CoaCounter >= CoaLimit) + return false; + CoaCounter++; + } + unsigned BW1, BW2; + if (!isIntReg(R1, BW1) || !isIntReg(R2, BW2) || BW1 != BW2) + return false; + if (MRI->isLiveIn(R1.Reg)) + return false; + if (MRI->isLiveIn(R2.Reg)) + return false; + + LiveInterval &L1 = LIS->getInterval(R1.Reg); + LiveInterval &L2 = LIS->getInterval(R2.Reg); + bool Overlap = L1.overlaps(L2); + + DEBUG(dbgs() << "compatible registers: (" + << (Overlap ? "overlap" : "disjoint") << ")\n " + << PrintReg(R1.Reg, TRI, R1.Sub) << " " << L1 << "\n " + << PrintReg(R2.Reg, TRI, R2.Sub) << " " << L2 << "\n"); + if (R1.Sub || R2.Sub) + return false; + if (Overlap) + return false; + + // Coalescing could have a negative impact on scheduling, so try to limit + // to some reasonable extent. Only consider coalescing segments, when one + // of them does not cross basic block boundaries. + if (!isIntraBlocks(L1) && !isIntraBlocks(L2)) + return false; + + MRI->replaceRegWith(R2.Reg, R1.Reg); + + // Move all live segments from L2 to L1. + typedef DenseMap<VNInfo*,VNInfo*> ValueInfoMap; + ValueInfoMap VM; + for (LiveInterval::iterator I = L2.begin(), E = L2.end(); I != E; ++I) { + VNInfo *NewVN, *OldVN = I->valno; + ValueInfoMap::iterator F = VM.find(OldVN); + if (F == VM.end()) { + NewVN = L1.getNextValue(I->valno->def, LIS->getVNInfoAllocator()); + VM.insert(std::make_pair(OldVN, NewVN)); + } else { + NewVN = F->second; + } + L1.addSegment(LiveRange::Segment(I->start, I->end, NewVN)); + } + while (L2.begin() != L2.end()) + L2.removeSegment(*L2.begin()); + + updateKillFlags(R1.Reg, L1); + DEBUG(dbgs() << "coalesced: " << L1 << "\n"); + L1.verify(); + + return true; +} + + +/// Attempt to coalesce one of the source registers to a MUX intruction with +/// the destination register. This could lead to having only one predicated +/// instruction in the end instead of two. +bool HexagonExpandCondsets::coalesceSegments(MachineFunction &MF) { + SmallVector<MachineInstr*,16> Condsets; + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + MachineBasicBlock &B = *I; + for (MachineBasicBlock::iterator J = B.begin(), F = B.end(); J != F; ++J) { + MachineInstr *MI = &*J; + if (!isCondset(MI)) + continue; + MachineOperand &S1 = MI->getOperand(2), &S2 = MI->getOperand(3); + if (!S1.isReg() && !S2.isReg()) + continue; + Condsets.push_back(MI); + } + } + + bool Changed = false; + for (unsigned i = 0, n = Condsets.size(); i < n; ++i) { + MachineInstr *CI = Condsets[i]; + RegisterRef RD = CI->getOperand(0); + RegisterRef RP = CI->getOperand(1); + MachineOperand &S1 = CI->getOperand(2), &S2 = CI->getOperand(3); + bool Done = false; + // Consider this case: + // vreg1 = instr1 ... + // vreg2 = instr2 ... + // vreg0 = C2_mux ..., vreg1, vreg2 + // If vreg0 was coalesced with vreg1, we could end up with the following + // code: + // vreg0 = instr1 ... + // vreg2 = instr2 ... + // vreg0 = A2_tfrf ..., vreg2 + // which will later become: + // vreg0 = instr1 ... + // vreg0 = instr2_cNotPt ... + // i.e. there will be an unconditional definition (instr1) of vreg0 + // followed by a conditional one. The output dependency was there before + // and it unavoidable, but if instr1 is predicable, we will no longer be + // able to predicate it here. + // To avoid this scenario, don't coalesce the destination register with + // a source register that is defined by a predicable instruction. + if (S1.isReg()) { + RegisterRef RS = S1; + MachineInstr *RDef = getReachingDefForPred(RS, CI, RP.Reg, true); + if (!RDef || !HII->isPredicable(RDef)) + Done = coalesceRegisters(RD, RegisterRef(S1)); + } + if (!Done && S2.isReg()) { + RegisterRef RS = S2; + MachineInstr *RDef = getReachingDefForPred(RS, CI, RP.Reg, false); + if (!RDef || !HII->isPredicable(RDef)) + Done = coalesceRegisters(RD, RegisterRef(S2)); + } + Changed |= Done; + } + return Changed; +} + + +bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) { + HII = static_cast<const HexagonInstrInfo*>(MF.getSubtarget().getInstrInfo()); + TRI = MF.getSubtarget().getRegisterInfo(); + LIS = &getAnalysis<LiveIntervals>(); + MRI = &MF.getRegInfo(); + + bool Changed = false; + + // Try to coalesce the target of a mux with one of its sources. + // This could eliminate a register copy in some circumstances. + Changed |= coalesceSegments(MF); + + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + // First, simply split all muxes into a pair of conditional transfers + // and update the live intervals to reflect the new arrangement. + // This is done mainly to make the live interval update simpler, than it + // would be while trying to predicate instructions at the same time. + Changed |= splitInBlock(*I); + // Traverse all blocks and collapse predicable instructions feeding + // conditional transfers into predicated instructions. + // Walk over all the instructions again, so we may catch pre-existing + // cases that were not created in the previous step. + Changed |= predicateInBlock(*I); + } + + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) + postprocessUndefImplicitUses(*I); + return Changed; +} + + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +static void initializePassOnce(PassRegistry &Registry) { + const char *Name = "Hexagon Expand Condsets"; + PassInfo *PI = new PassInfo(Name, "expand-condsets", + &HexagonExpandCondsets::ID, 0, false, false); + Registry.registerPass(*PI, true); +} + +void llvm::initializeHexagonExpandCondsetsPass(PassRegistry &Registry) { + CALL_ONCE_INITIALIZATION(initializePassOnce) +} + + +FunctionPass *llvm::createHexagonExpandCondsets() { + return new HexagonExpandCondsets(); +} diff --git a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp index a2a847c47a20..40059fb27371 100644 --- a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp +++ b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp @@ -20,7 +20,6 @@ #include "Hexagon.h" #include "HexagonMachineFunctionInfo.h" #include "HexagonSubtarget.h" -#include "HexagonTargetMachine.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LatencyPriorityQueue.h" #include "llvm/CodeGen/MachineDominators.h" @@ -49,13 +48,9 @@ namespace llvm { namespace { class HexagonExpandPredSpillCode : public MachineFunctionPass { - const HexagonTargetMachine& QTM; - const HexagonSubtarget &QST; - public: static char ID; - HexagonExpandPredSpillCode(const HexagonTargetMachine& TM) : - MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) { + HexagonExpandPredSpillCode() : MachineFunctionPass(ID) { PassRegistry &Registry = *PassRegistry::getPassRegistry(); initializeHexagonExpandPredSpillCodePass(Registry); } @@ -72,7 +67,8 @@ char HexagonExpandPredSpillCode::ID = 0; bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) { - const HexagonInstrInfo *TII = QTM.getSubtargetImpl()->getInstrInfo(); + const HexagonSubtarget &QST = Fn.getSubtarget<HexagonSubtarget>(); + const HexagonInstrInfo *TII = QST.getInstrInfo(); // Loop over all of the basic blocks. for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end(); @@ -83,20 +79,177 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) { ++MII) { MachineInstr *MI = MII; int Opc = MI->getOpcode(); - if (Opc == Hexagon::STriw_pred) { + if (Opc == Hexagon::S2_storerb_pci_pseudo || + Opc == Hexagon::S2_storerh_pci_pseudo || + Opc == Hexagon::S2_storeri_pci_pseudo || + Opc == Hexagon::S2_storerd_pci_pseudo || + Opc == Hexagon::S2_storerf_pci_pseudo) { + unsigned Opcode; + if (Opc == Hexagon::S2_storerd_pci_pseudo) + Opcode = Hexagon::S2_storerd_pci; + else if (Opc == Hexagon::S2_storeri_pci_pseudo) + Opcode = Hexagon::S2_storeri_pci; + else if (Opc == Hexagon::S2_storerh_pci_pseudo) + Opcode = Hexagon::S2_storerh_pci; + else if (Opc == Hexagon::S2_storerf_pci_pseudo) + Opcode = Hexagon::S2_storerf_pci; + else if (Opc == Hexagon::S2_storerb_pci_pseudo) + Opcode = Hexagon::S2_storerb_pci; + else + llvm_unreachable("wrong Opc"); + MachineOperand &Op0 = MI->getOperand(0); + MachineOperand &Op1 = MI->getOperand(1); + MachineOperand &Op2 = MI->getOperand(2); + MachineOperand &Op3 = MI->getOperand(3); // Modifier value. + MachineOperand &Op4 = MI->getOperand(4); + // Emit a "C6 = Rn, C6 is the control register for M0". + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr), + Hexagon::C6)->addOperand(Op3); + // Replace the pseude circ_ldd by the real circ_ldd. + MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Opcode)); + NewMI->addOperand(Op0); + NewMI->addOperand(Op1); + NewMI->addOperand(Op4); + NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0, + false, /*isDef*/ + false, /*isImpl*/ + true /*isKill*/)); + NewMI->addOperand(Op2); + MII = MBB->erase(MI); + --MII; + } else if (Opc == Hexagon::L2_loadrd_pci_pseudo || + Opc == Hexagon::L2_loadri_pci_pseudo || + Opc == Hexagon::L2_loadrh_pci_pseudo || + Opc == Hexagon::L2_loadruh_pci_pseudo|| + Opc == Hexagon::L2_loadrb_pci_pseudo || + Opc == Hexagon::L2_loadrub_pci_pseudo) { + unsigned Opcode; + if (Opc == Hexagon::L2_loadrd_pci_pseudo) + Opcode = Hexagon::L2_loadrd_pci; + else if (Opc == Hexagon::L2_loadri_pci_pseudo) + Opcode = Hexagon::L2_loadri_pci; + else if (Opc == Hexagon::L2_loadrh_pci_pseudo) + Opcode = Hexagon::L2_loadrh_pci; + else if (Opc == Hexagon::L2_loadruh_pci_pseudo) + Opcode = Hexagon::L2_loadruh_pci; + else if (Opc == Hexagon::L2_loadrb_pci_pseudo) + Opcode = Hexagon::L2_loadrb_pci; + else if (Opc == Hexagon::L2_loadrub_pci_pseudo) + Opcode = Hexagon::L2_loadrub_pci; + else + llvm_unreachable("wrong Opc"); + + MachineOperand &Op0 = MI->getOperand(0); + MachineOperand &Op1 = MI->getOperand(1); + MachineOperand &Op2 = MI->getOperand(2); + MachineOperand &Op4 = MI->getOperand(4); // Modifier value. + MachineOperand &Op5 = MI->getOperand(5); + // Emit a "C6 = Rn, C6 is the control register for M0". + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr), + Hexagon::C6)->addOperand(Op4); + // Replace the pseude circ_ldd by the real circ_ldd. + MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Opcode)); + NewMI->addOperand(Op1); + NewMI->addOperand(Op0); + NewMI->addOperand(Op2); + NewMI->addOperand(Op5); + NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0, + false, /*isDef*/ + false, /*isImpl*/ + true /*isKill*/)); + MII = MBB->erase(MI); + --MII; + } else if (Opc == Hexagon::L2_loadrd_pbr_pseudo || + Opc == Hexagon::L2_loadri_pbr_pseudo || + Opc == Hexagon::L2_loadrh_pbr_pseudo || + Opc == Hexagon::L2_loadruh_pbr_pseudo|| + Opc == Hexagon::L2_loadrb_pbr_pseudo || + Opc == Hexagon::L2_loadrub_pbr_pseudo) { + unsigned Opcode; + if (Opc == Hexagon::L2_loadrd_pbr_pseudo) + Opcode = Hexagon::L2_loadrd_pbr; + else if (Opc == Hexagon::L2_loadri_pbr_pseudo) + Opcode = Hexagon::L2_loadri_pbr; + else if (Opc == Hexagon::L2_loadrh_pbr_pseudo) + Opcode = Hexagon::L2_loadrh_pbr; + else if (Opc == Hexagon::L2_loadruh_pbr_pseudo) + Opcode = Hexagon::L2_loadruh_pbr; + else if (Opc == Hexagon::L2_loadrb_pbr_pseudo) + Opcode = Hexagon::L2_loadrb_pbr; + else if (Opc == Hexagon::L2_loadrub_pbr_pseudo) + Opcode = Hexagon::L2_loadrub_pbr; + else + llvm_unreachable("wrong Opc"); + MachineOperand &Op0 = MI->getOperand(0); + MachineOperand &Op1 = MI->getOperand(1); + MachineOperand &Op2 = MI->getOperand(2); + MachineOperand &Op4 = MI->getOperand(4); // Modifier value. + // Emit a "C6 = Rn, C6 is the control register for M0". + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr), + Hexagon::C6)->addOperand(Op4); + // Replace the pseudo brev_ldd by the real brev_ldd. + MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Opcode)); + NewMI->addOperand(Op1); + NewMI->addOperand(Op0); + NewMI->addOperand(Op2); + NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0, + false, /*isDef*/ + false, /*isImpl*/ + true /*isKill*/)); + MII = MBB->erase(MI); + --MII; + } else if (Opc == Hexagon::S2_storerd_pbr_pseudo || + Opc == Hexagon::S2_storeri_pbr_pseudo || + Opc == Hexagon::S2_storerh_pbr_pseudo || + Opc == Hexagon::S2_storerb_pbr_pseudo || + Opc == Hexagon::S2_storerf_pbr_pseudo) { + unsigned Opcode; + if (Opc == Hexagon::S2_storerd_pbr_pseudo) + Opcode = Hexagon::S2_storerd_pbr; + else if (Opc == Hexagon::S2_storeri_pbr_pseudo) + Opcode = Hexagon::S2_storeri_pbr; + else if (Opc == Hexagon::S2_storerh_pbr_pseudo) + Opcode = Hexagon::S2_storerh_pbr; + else if (Opc == Hexagon::S2_storerf_pbr_pseudo) + Opcode = Hexagon::S2_storerf_pbr; + else if (Opc == Hexagon::S2_storerb_pbr_pseudo) + Opcode = Hexagon::S2_storerb_pbr; + else + llvm_unreachable("wrong Opc"); + MachineOperand &Op0 = MI->getOperand(0); + MachineOperand &Op1 = MI->getOperand(1); + MachineOperand &Op2 = MI->getOperand(2); + MachineOperand &Op3 = MI->getOperand(3); // Modifier value. + // Emit a "C6 = Rn, C6 is the control register for M0". + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr), + Hexagon::C6)->addOperand(Op3); + // Replace the pseudo brev_ldd by the real brev_ldd. + MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Opcode)); + NewMI->addOperand(Op0); + NewMI->addOperand(Op1); + NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0, + false, /*isDef*/ + false, /*isImpl*/ + true /*isKill*/)); + NewMI->addOperand(Op2); + MII = MBB->erase(MI); + --MII; + } else if (Opc == Hexagon::STriw_pred) { // STriw_pred [R30], ofst, SrcReg; unsigned FP = MI->getOperand(0).getReg(); - assert( - FP == - QTM.getSubtargetImpl()->getRegisterInfo()->getFrameRegister() && - "Not a Frame Pointer, Nor a Spill Slot"); + assert(FP == QST.getRegisterInfo()->getFrameRegister() && + "Not a Frame Pointer, Nor a Spill Slot"); assert(MI->getOperand(1).isImm() && "Not an offset"); int Offset = MI->getOperand(1).getImm(); int SrcReg = MI->getOperand(2).getReg(); assert(Hexagon::PredRegsRegClass.contains(SrcReg) && "Not a predicate register"); if (!TII->isValidOffset(Hexagon::S2_storeri_io, Offset)) { - if (!TII->isValidOffset(Hexagon::ADD_ri, Offset)) { + if (!TII->isValidOffset(Hexagon::A2_addi, Offset)) { BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::CONST32_Int_Real), HEXAGON_RESERVED_REG_1).addImm(Offset); @@ -110,7 +263,7 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) { .addReg(HEXAGON_RESERVED_REG_1) .addImm(0).addReg(HEXAGON_RESERVED_REG_2); } else { - BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_ri), + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_addi), HEXAGON_RESERVED_REG_1).addReg(FP).addImm(Offset); BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrpr), HEXAGON_RESERVED_REG_2).addReg(SrcReg); @@ -135,14 +288,12 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) { assert(Hexagon::PredRegsRegClass.contains(DstReg) && "Not a predicate register"); unsigned FP = MI->getOperand(1).getReg(); - assert( - FP == - QTM.getSubtargetImpl()->getRegisterInfo()->getFrameRegister() && - "Not a Frame Pointer, Nor a Spill Slot"); + assert(FP == QST.getRegisterInfo()->getFrameRegister() && + "Not a Frame Pointer, Nor a Spill Slot"); assert(MI->getOperand(2).isImm() && "Not an offset"); int Offset = MI->getOperand(2).getImm(); if (!TII->isValidOffset(Hexagon::L2_loadri_io, Offset)) { - if (!TII->isValidOffset(Hexagon::ADD_ri, Offset)) { + if (!TII->isValidOffset(Hexagon::A2_addi, Offset)) { BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::CONST32_Int_Real), HEXAGON_RESERVED_REG_1).addImm(Offset); @@ -157,7 +308,7 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) { BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrrp), DstReg).addReg(HEXAGON_RESERVED_REG_2); } else { - BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_ri), + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_addi), HEXAGON_RESERVED_REG_1).addReg(FP).addImm(Offset); BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::L2_loadri_io), HEXAGON_RESERVED_REG_2) @@ -200,6 +351,6 @@ void llvm::initializeHexagonExpandPredSpillCodePass(PassRegistry &Registry) { } FunctionPass* -llvm::createHexagonExpandPredSpillCode(const HexagonTargetMachine &TM) { - return new HexagonExpandPredSpillCode(TM); +llvm::createHexagonExpandPredSpillCode() { + return new HexagonExpandPredSpillCode(); } diff --git a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp index e8d8f1497bdb..3d786a92b9e5 100644 --- a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp +++ b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp @@ -6,9 +6,8 @@ // License. See LICENSE.TXT for details. // // The loop start address in the LOOPn instruction is encoded as a distance -// from the LOOPn instruction itself. If the start address is too far from -// the LOOPn instruction, the loop needs to be set up manually, i.e. via -// direct transfers to SAn and LCn. +// from the LOOPn instruction itself. If the start address is too far from +// the LOOPn instruction, the instruction needs to use a constant extender. // This pass will identify and convert such LOOPn instructions to a proper // form. //===----------------------------------------------------------------------===// @@ -21,12 +20,15 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/PassSupport.h" #include "llvm/Target/TargetInstrInfo.h" using namespace llvm; +static cl::opt<unsigned> MaxLoopRange( + "hexagon-loop-range", cl::Hidden, cl::init(200), + cl::desc("Restrict range of loopN instructions (testing only)")); + namespace llvm { void initializeHexagonFixupHwLoopsPass(PassRegistry&); } @@ -52,20 +54,15 @@ namespace { } private: - /// \brief Maximum distance between the loop instr and the basic block. - /// Just an estimate. - static const unsigned MAX_LOOP_DISTANCE = 200; - /// \brief Check the offset between each loop instruction and /// the loop basic block to determine if we can use the LOOP instruction /// or if we need to set the LC/SA registers explicitly. bool fixupLoopInstrs(MachineFunction &MF); - /// \brief Add the instruction to set the LC and SA registers explicitly. - void convertLoopInstr(MachineFunction &MF, - MachineBasicBlock::iterator &MII, - RegScavenger &RS); - + /// \brief Replace loop instruction with the constant extended + /// version if the loop label is too far from the loop instruction. + void useExtLoopInstr(MachineFunction &MF, + MachineBasicBlock::iterator &MII); }; char HexagonFixupHwLoops::ID = 0; @@ -78,20 +75,18 @@ FunctionPass *llvm::createHexagonFixupHwLoops() { return new HexagonFixupHwLoops(); } - /// \brief Returns true if the instruction is a hardware loop instruction. static bool isHardwareLoop(const MachineInstr *MI) { return MI->getOpcode() == Hexagon::J2_loop0r || - MI->getOpcode() == Hexagon::J2_loop0i; + MI->getOpcode() == Hexagon::J2_loop0i || + MI->getOpcode() == Hexagon::J2_loop1r || + MI->getOpcode() == Hexagon::J2_loop1i; } - bool HexagonFixupHwLoops::runOnMachineFunction(MachineFunction &MF) { - bool Changed = fixupLoopInstrs(MF); - return Changed; + return fixupLoopInstrs(MF); } - /// \brief For Hexagon, if the loop label is to far from the /// loop instruction then we need to set the LC0 and SA0 registers /// explicitly instead of using LOOP(start,count). This function @@ -105,41 +100,49 @@ bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) { // Offset of the current instruction from the start. unsigned InstOffset = 0; // Map for each basic block to it's first instruction. - DenseMap<MachineBasicBlock*, unsigned> BlockToInstOffset; + DenseMap<const MachineBasicBlock *, unsigned> BlockToInstOffset; + + const HexagonInstrInfo *HII = + static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo()); // First pass - compute the offset of each basic block. - for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end(); - MBB != MBBe; ++MBB) { - BlockToInstOffset[MBB] = InstOffset; - InstOffset += (MBB->size() * 4); + for (const MachineBasicBlock &MBB : MF) { + if (MBB.getAlignment()) { + // Although we don't know the exact layout of the final code, we need + // to account for alignment padding somehow. This heuristic pads each + // aligned basic block according to the alignment value. + int ByteAlign = (1u << MBB.getAlignment()) - 1; + InstOffset = (InstOffset + ByteAlign) & ~(ByteAlign); + } + + BlockToInstOffset[&MBB] = InstOffset; + for (const MachineInstr &MI : MBB) + InstOffset += HII->getSize(&MI); } - // Second pass - check each loop instruction to see if it needs to - // be converted. + // Second pass - check each loop instruction to see if it needs to be + // converted. InstOffset = 0; bool Changed = false; - RegScavenger RS; - - // Loop over all the basic blocks. - for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end(); - MBB != MBBe; ++MBB) { - InstOffset = BlockToInstOffset[MBB]; - RS.enterBasicBlock(MBB); + for (MachineBasicBlock &MBB : MF) { + InstOffset = BlockToInstOffset[&MBB]; // Loop over all the instructions. - MachineBasicBlock::iterator MIE = MBB->end(); - MachineBasicBlock::iterator MII = MBB->begin(); + MachineBasicBlock::iterator MII = MBB.begin(); + MachineBasicBlock::iterator MIE = MBB.end(); while (MII != MIE) { + InstOffset += HII->getSize(&*MII); + if (MII->isDebugValue()) { + ++MII; + continue; + } if (isHardwareLoop(MII)) { - RS.forward(MII); assert(MII->getOperand(0).isMBB() && "Expect a basic block as loop operand"); - int Sub = InstOffset - BlockToInstOffset[MII->getOperand(0).getMBB()]; - unsigned Dist = Sub > 0 ? Sub : -Sub; - if (Dist > MAX_LOOP_DISTANCE) { - // Convert to explicity setting LC0 and SA0. - convertLoopInstr(MF, MII, RS); - MII = MBB->erase(MII); + int diff = InstOffset - BlockToInstOffset[MII->getOperand(0).getMBB()]; + if ((unsigned)abs(diff) > MaxLoopRange) { + useExtLoopInstr(MF, MII); + MII = MBB.erase(MII); Changed = true; } else { ++MII; @@ -147,39 +150,38 @@ bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) { } else { ++MII; } - InstOffset += 4; } } return Changed; } - -/// \brief convert a loop instruction to a sequence of instructions that -/// set the LC0 and SA0 register explicitly. -void HexagonFixupHwLoops::convertLoopInstr(MachineFunction &MF, - MachineBasicBlock::iterator &MII, - RegScavenger &RS) { +/// \brief Replace loop instructions with the constant extended version. +void HexagonFixupHwLoops::useExtLoopInstr(MachineFunction &MF, + MachineBasicBlock::iterator &MII) { const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); MachineBasicBlock *MBB = MII->getParent(); DebugLoc DL = MII->getDebugLoc(); - unsigned Scratch = RS.scavengeRegister(&Hexagon::IntRegsRegClass, MII, 0); - - // First, set the LC0 with the trip count. - if (MII->getOperand(1).isReg()) { - // Trip count is a register - BuildMI(*MBB, MII, DL, TII->get(Hexagon::A2_tfrrcr), Hexagon::LC0) - .addReg(MII->getOperand(1).getReg()); - } else { - // Trip count is an immediate. - BuildMI(*MBB, MII, DL, TII->get(Hexagon::A2_tfrsi), Scratch) - .addImm(MII->getOperand(1).getImm()); - BuildMI(*MBB, MII, DL, TII->get(Hexagon::A2_tfrrcr), Hexagon::LC0) - .addReg(Scratch); + MachineInstrBuilder MIB; + unsigned newOp; + switch (MII->getOpcode()) { + case Hexagon::J2_loop0r: + newOp = Hexagon::J2_loop0rext; + break; + case Hexagon::J2_loop0i: + newOp = Hexagon::J2_loop0iext; + break; + case Hexagon::J2_loop1r: + newOp = Hexagon::J2_loop1rext; + break; + case Hexagon::J2_loop1i: + newOp = Hexagon::J2_loop1iext; + break; + default: + llvm_unreachable("Invalid Hardware Loop Instruction."); } - // Then, set the SA0 with the loop start address. - BuildMI(*MBB, MII, DL, TII->get(Hexagon::CONST32_Label), Scratch) - .addMBB(MII->getOperand(0).getMBB()); - BuildMI(*MBB, MII, DL, TII->get(Hexagon::A2_tfrrcr), Hexagon::SA0) - .addReg(Scratch); + MIB = BuildMI(*MBB, MII, DL, TII->get(newOp)); + + for (unsigned i = 0; i < MII->getNumOperands(); ++i) + MIB.addOperand(MII->getOperand(i)); } diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp index 9d1a527eddb3..0885a794a7b4 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -8,6 +8,8 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "hexagon-pei" + #include "HexagonFrameLowering.h" #include "Hexagon.h" #include "HexagonInstrInfo.h" @@ -16,334 +18,1274 @@ #include "HexagonSubtarget.h" #include "HexagonTargetMachine.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/IR/Function.h" #include "llvm/IR/Type.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MachineLocation.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +// Hexagon stack frame layout as defined by the ABI: +// +// Incoming arguments +// passed via stack +// | +// | +// SP during function's FP during function's | +// +-- runtime (top of stack) runtime (bottom) --+ | +// | | | +// --++---------------------+------------------+-----------------++-+------- +// | parameter area for | variable-size | fixed-size |LR| arg +// | called functions | local objects | local objects |FP| +// --+----------------------+------------------+-----------------+--+------- +// <- size known -> <- size unknown -> <- size known -> +// +// Low address High address +// +// <--- stack growth +// +// +// - In any circumstances, the outgoing function arguments are always accessi- +// ble using the SP, and the incoming arguments are accessible using the FP. +// - If the local objects are not aligned, they can always be accessed using +// the FP. +// - If there are no variable-sized objects, the local objects can always be +// accessed using the SP, regardless whether they are aligned or not. (The +// alignment padding will be at the bottom of the stack (highest address), +// and so the offset with respect to the SP will be known at the compile- +// -time.) +// +// The only complication occurs if there are both, local aligned objects, and +// dynamically allocated (variable-sized) objects. The alignment pad will be +// placed between the FP and the local objects, thus preventing the use of the +// FP to access the local objects. At the same time, the variable-sized objects +// will be between the SP and the local objects, thus introducing an unknown +// distance from the SP to the locals. +// +// To avoid this problem, a new register is created that holds the aligned +// address of the bottom of the stack, referred in the sources as AP (aligned +// pointer). The AP will be equal to "FP-p", where "p" is the smallest pad +// that aligns AP to the required boundary (a maximum of the alignments of +// all stack objects, fixed- and variable-sized). All local objects[1] will +// then use AP as the base pointer. +// [1] The exception is with "fixed" stack objects. "Fixed" stack objects get +// their name from being allocated at fixed locations on the stack, relative +// to the FP. In the presence of dynamic allocation and local alignment, such +// objects can only be accessed through the FP. +// +// Illustration of the AP: +// FP --+ +// | +// ---------------+---------------------+-----+-----------------------++-+-- +// Rest of the | Local stack objects | Pad | Fixed stack objects |LR| +// stack frame | (aligned) | | (CSR, spills, etc.) |FP| +// ---------------+---------------------+-----+-----------------+-----+--+-- +// |<-- Multiple of the -->| +// stack alignment +-- AP +// +// The AP is set up at the beginning of the function. Since it is not a dedi- +// cated (reserved) register, it needs to be kept live throughout the function +// to be available as the base register for local object accesses. +// Normally, an address of a stack objects is obtained by a pseudo-instruction +// TFR_FI. To access local objects with the AP register present, a different +// pseudo-instruction needs to be used: TFR_FIA. The TFR_FIA takes one extra +// argument compared to TFR_FI: the first input register is the AP register. +// This keeps the register live between its definition and its uses. + +// The AP register is originally set up using pseudo-instruction ALIGNA: +// AP = ALIGNA A +// where +// A - required stack alignment +// The alignment value must be the maximum of all alignments required by +// any stack object. + +// The dynamic allocation uses a pseudo-instruction ALLOCA: +// Rd = ALLOCA Rs, A +// where +// Rd - address of the allocated space +// Rs - minimum size (the actual allocated can be larger to accommodate +// alignment) +// A - required alignment + + using namespace llvm; -static cl::opt<bool> DisableDeallocRet( - "disable-hexagon-dealloc-ret", - cl::Hidden, - cl::desc("Disable Dealloc Return for Hexagon target")); +static cl::opt<bool> DisableDeallocRet("disable-hexagon-dealloc-ret", + cl::Hidden, cl::desc("Disable Dealloc Return for Hexagon target")); -/// determineFrameLayout - Determine the size of the frame and maximum call -/// frame size. -void HexagonFrameLowering::determineFrameLayout(MachineFunction &MF) const { - MachineFrameInfo *MFI = MF.getFrameInfo(); - // Get the number of bytes to allocate from the FrameInfo. - unsigned FrameSize = MFI->getStackSize(); +static cl::opt<int> NumberScavengerSlots("number-scavenger-slots", + cl::Hidden, cl::desc("Set the number of scavenger slots"), cl::init(2), + cl::ZeroOrMore); - // Get the alignments provided by the target. - unsigned TargetAlign = MF.getTarget() - .getSubtargetImpl() - ->getFrameLowering() - ->getStackAlignment(); - // Get the maximum call frame size of all the calls. - unsigned maxCallFrameSize = MFI->getMaxCallFrameSize(); +static cl::opt<int> SpillFuncThreshold("spill-func-threshold", + cl::Hidden, cl::desc("Specify O2(not Os) spill func threshold"), + cl::init(6), cl::ZeroOrMore); - // If we have dynamic alloca then maxCallFrameSize needs to be aligned so - // that allocations will be aligned. - if (MFI->hasVarSizedObjects()) - maxCallFrameSize = RoundUpToAlignment(maxCallFrameSize, TargetAlign); +static cl::opt<int> SpillFuncThresholdOs("spill-func-threshold-Os", + cl::Hidden, cl::desc("Specify Os spill func threshold"), + cl::init(1), cl::ZeroOrMore); - // Update maximum call frame size. - MFI->setMaxCallFrameSize(maxCallFrameSize); +static cl::opt<bool> EnableShrinkWrapping("hexagon-shrink-frame", + cl::init(true), cl::Hidden, cl::ZeroOrMore, + cl::desc("Enable stack frame shrink wrapping")); - // Include call frame size in total. - FrameSize += maxCallFrameSize; +static cl::opt<unsigned> ShrinkLimit("shrink-frame-limit", cl::init(UINT_MAX), + cl::Hidden, cl::ZeroOrMore, cl::desc("Max count of stack frame " + "shrink-wraps")); - // Make sure the frame is aligned. - FrameSize = RoundUpToAlignment(FrameSize, TargetAlign); +namespace { + /// Map a register pair Reg to the subregister that has the greater "number", + /// i.e. D3 (aka R7:6) will be mapped to R7, etc. + unsigned getMax32BitSubRegister(unsigned Reg, const TargetRegisterInfo &TRI, + bool hireg = true) { + if (Reg < Hexagon::D0 || Reg > Hexagon::D15) + return Reg; - // Update frame info. - MFI->setStackSize(FrameSize); + unsigned RegNo = 0; + for (MCSubRegIterator SubRegs(Reg, &TRI); SubRegs.isValid(); ++SubRegs) { + if (hireg) { + if (*SubRegs > RegNo) + RegNo = *SubRegs; + } else { + if (!RegNo || *SubRegs < RegNo) + RegNo = *SubRegs; + } + } + return RegNo; + } + + /// Returns the callee saved register with the largest id in the vector. + unsigned getMaxCalleeSavedReg(const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo &TRI) { + assert(Hexagon::R1 > 0 && + "Assume physical registers are encoded as positive integers"); + if (CSI.empty()) + return 0; + + unsigned Max = getMax32BitSubRegister(CSI[0].getReg(), TRI); + for (unsigned I = 1, E = CSI.size(); I < E; ++I) { + unsigned Reg = getMax32BitSubRegister(CSI[I].getReg(), TRI); + if (Reg > Max) + Max = Reg; + } + return Max; + } + + /// Checks if the basic block contains any instruction that needs a stack + /// frame to be already in place. + bool needsStackFrame(const MachineBasicBlock &MBB, const BitVector &CSR) { + for (auto &I : MBB) { + const MachineInstr *MI = &I; + if (MI->isCall()) + return true; + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case Hexagon::ALLOCA: + case Hexagon::ALIGNA: + return true; + default: + break; + } + // Check individual operands. + for (ConstMIOperands Mo(MI); Mo.isValid(); ++Mo) { + // While the presence of a frame index does not prove that a stack + // frame will be required, all frame indexes should be within alloc- + // frame/deallocframe. Otherwise, the code that translates a frame + // index into an offset would have to be aware of the placement of + // the frame creation/destruction instructions. + if (Mo->isFI()) + return true; + if (!Mo->isReg()) + continue; + unsigned R = Mo->getReg(); + // Virtual registers will need scavenging, which then may require + // a stack slot. + if (TargetRegisterInfo::isVirtualRegister(R)) + return true; + if (CSR[R]) + return true; + } + } + return false; + } + + /// Returns true if MBB has a machine instructions that indicates a tail call + /// in the block. + bool hasTailCall(const MachineBasicBlock &MBB) { + MachineBasicBlock::const_iterator I = MBB.getLastNonDebugInstr(); + unsigned RetOpc = I->getOpcode(); + return RetOpc == Hexagon::TCRETURNi || RetOpc == Hexagon::TCRETURNr; + } + + /// Returns true if MBB contains an instruction that returns. + bool hasReturn(const MachineBasicBlock &MBB) { + for (auto I = MBB.getFirstTerminator(), E = MBB.end(); I != E; ++I) + if (I->isReturn()) + return true; + return false; + } +} + + +/// Implements shrink-wrapping of the stack frame. By default, stack frame +/// is created in the function entry block, and is cleaned up in every block +/// that returns. This function finds alternate blocks: one for the frame +/// setup (prolog) and one for the cleanup (epilog). +void HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF, + MachineBasicBlock *&PrologB, MachineBasicBlock *&EpilogB) const { + static unsigned ShrinkCounter = 0; + + if (ShrinkLimit.getPosition()) { + if (ShrinkCounter >= ShrinkLimit) + return; + ShrinkCounter++; + } + + auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget()); + auto &HRI = *HST.getRegisterInfo(); + + MachineDominatorTree MDT; + MDT.runOnMachineFunction(MF); + MachinePostDominatorTree MPT; + MPT.runOnMachineFunction(MF); + + typedef DenseMap<unsigned,unsigned> UnsignedMap; + UnsignedMap RPO; + typedef ReversePostOrderTraversal<const MachineFunction*> RPOTType; + RPOTType RPOT(&MF); + unsigned RPON = 0; + for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I) + RPO[(*I)->getNumber()] = RPON++; + + // Don't process functions that have loops, at least for now. Placement + // of prolog and epilog must take loop structure into account. For simpli- + // city don't do it right now. + for (auto &I : MF) { + unsigned BN = RPO[I.getNumber()]; + for (auto SI = I.succ_begin(), SE = I.succ_end(); SI != SE; ++SI) { + // If found a back-edge, return. + if (RPO[(*SI)->getNumber()] <= BN) + return; + } + } + + // Collect the set of blocks that need a stack frame to execute. Scan + // each block for uses/defs of callee-saved registers, calls, etc. + SmallVector<MachineBasicBlock*,16> SFBlocks; + BitVector CSR(Hexagon::NUM_TARGET_REGS); + for (const MCPhysReg *P = HRI.getCalleeSavedRegs(&MF); *P; ++P) + CSR[*P] = true; + + for (auto &I : MF) + if (needsStackFrame(I, CSR)) + SFBlocks.push_back(&I); + + DEBUG({ + dbgs() << "Blocks needing SF: {"; + for (auto &B : SFBlocks) + dbgs() << " BB#" << B->getNumber(); + dbgs() << " }\n"; + }); + // No frame needed? + if (SFBlocks.empty()) + return; + + // Pick a common dominator and a common post-dominator. + MachineBasicBlock *DomB = SFBlocks[0]; + for (unsigned i = 1, n = SFBlocks.size(); i < n; ++i) { + DomB = MDT.findNearestCommonDominator(DomB, SFBlocks[i]); + if (!DomB) + break; + } + MachineBasicBlock *PDomB = SFBlocks[0]; + for (unsigned i = 1, n = SFBlocks.size(); i < n; ++i) { + PDomB = MPT.findNearestCommonDominator(PDomB, SFBlocks[i]); + if (!PDomB) + break; + } + DEBUG({ + dbgs() << "Computed dom block: BB#"; + if (DomB) dbgs() << DomB->getNumber(); + else dbgs() << "<null>"; + dbgs() << ", computed pdom block: BB#"; + if (PDomB) dbgs() << PDomB->getNumber(); + else dbgs() << "<null>"; + dbgs() << "\n"; + }); + if (!DomB || !PDomB) + return; + + // Make sure that DomB dominates PDomB and PDomB post-dominates DomB. + if (!MDT.dominates(DomB, PDomB)) { + DEBUG(dbgs() << "Dom block does not dominate pdom block\n"); + return; + } + if (!MPT.dominates(PDomB, DomB)) { + DEBUG(dbgs() << "PDom block does not post-dominate dom block\n"); + return; + } + + // Finally, everything seems right. + PrologB = DomB; + EpilogB = PDomB; +} + +/// Perform most of the PEI work here: +/// - saving/restoring of the callee-saved registers, +/// - stack frame creation and destruction. +/// Normally, this work is distributed among various functions, but doing it +/// in one place allows shrink-wrapping of the stack frame. +void HexagonFrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget()); + auto &HRI = *HST.getRegisterInfo(); + + assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + + MachineBasicBlock *PrologB = &MF.front(), *EpilogB = nullptr; + if (EnableShrinkWrapping) + findShrunkPrologEpilog(MF, PrologB, EpilogB); + + insertCSRSpillsInBlock(*PrologB, CSI, HRI); + insertPrologueInBlock(*PrologB); + + if (EpilogB) { + insertCSRRestoresInBlock(*EpilogB, CSI, HRI); + insertEpilogueInBlock(*EpilogB); + } else { + for (auto &B : MF) + if (!B.empty() && B.back().isReturn()) + insertCSRRestoresInBlock(B, CSI, HRI); + + for (auto &B : MF) + if (!B.empty() && B.back().isReturn()) + insertEpilogueInBlock(B); + } } -void HexagonFrameLowering::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); +void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB) const { + MachineFunction &MF = *MBB.getParent(); MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineModuleInfo &MMI = MF.getMMI(); MachineBasicBlock::iterator MBBI = MBB.begin(); - const HexagonRegisterInfo *QRI = static_cast<const HexagonRegisterInfo *>( - MF.getSubtarget().getRegisterInfo()); - DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); - determineFrameLayout(MF); + auto &HTM = static_cast<const HexagonTargetMachine&>(MF.getTarget()); + auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget()); + auto &HII = *HST.getInstrInfo(); + auto &HRI = *HST.getRegisterInfo(); + DebugLoc dl; + unsigned MaxAlign = std::max(MFI->getMaxAlignment(), getStackAlignment()); + + // Calculate the total stack frame size. // Get the number of bytes to allocate from the FrameInfo. - int NumBytes = (int) MFI->getStackSize(); + unsigned FrameSize = MFI->getStackSize(); + // Round up the max call frame size to the max alignment on the stack. + unsigned MaxCFA = RoundUpToAlignment(MFI->getMaxCallFrameSize(), MaxAlign); + MFI->setMaxCallFrameSize(MaxCFA); + + FrameSize = MaxCFA + RoundUpToAlignment(FrameSize, MaxAlign); + MFI->setStackSize(FrameSize); - // LLVM expects allocframe not to be the first instruction in the - // basic block. + bool AlignStack = (MaxAlign > getStackAlignment()); + + // Check if frame moves are needed for EH. + bool needsFrameMoves = MMI.hasDebugInfo() || + MF.getFunction()->needsUnwindTableEntry(); + + // Get the number of bytes to allocate from the FrameInfo. + unsigned NumBytes = MFI->getStackSize(); + unsigned SP = HRI.getStackRegister(); + unsigned MaxCF = MFI->getMaxCallFrameSize(); MachineBasicBlock::iterator InsertPt = MBB.begin(); - // - // ALLOCA adjust regs. Iterate over ADJDYNALLOC nodes and change the offset. - // - HexagonMachineFunctionInfo *FuncInfo = - MF.getInfo<HexagonMachineFunctionInfo>(); - const std::vector<MachineInstr*>& AdjustRegs = - FuncInfo->getAllocaAdjustInsts(); - for (std::vector<MachineInstr*>::const_iterator i = AdjustRegs.begin(), - e = AdjustRegs.end(); - i != e; ++i) { - MachineInstr* MI = *i; - assert((MI->getOpcode() == Hexagon::ADJDYNALLOC) && - "Expected adjust alloca node"); + auto *FuncInfo = MF.getInfo<HexagonMachineFunctionInfo>(); + auto &AdjustRegs = FuncInfo->getAllocaAdjustInsts(); - MachineOperand& MO = MI->getOperand(2); - assert(MO.isImm() && "Expected immediate"); - MO.setImm(MFI->getMaxCallFrameSize()); + for (auto MI : AdjustRegs) { + assert((MI->getOpcode() == Hexagon::ALLOCA) && "Expected alloca"); + expandAlloca(MI, HII, SP, MaxCF); + MI->eraseFromParent(); } // - // Only insert ALLOCFRAME if we need to. + // Only insert ALLOCFRAME if we need to or at -O0 for the debugger. Think + // that this shouldn't be required, but doing so now because gcc does and + // gdb can't break at the start of the function without it. Will remove if + // this turns out to be a gdb bug. // - if (hasFP(MF)) { - // Check for overflow. - // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used? - const int ALLOCFRAME_MAX = 16384; - const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); - - if (NumBytes >= ALLOCFRAME_MAX) { - // Emit allocframe(#0). - BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::S2_allocframe)).addImm(0); - - // Subtract offset from frame pointer. - BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::CONST32_Int_Real), - HEXAGON_RESERVED_REG_1).addImm(NumBytes); - BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::A2_sub), - QRI->getStackRegister()). - addReg(QRI->getStackRegister()). - addReg(HEXAGON_RESERVED_REG_1); - } else { - BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::S2_allocframe)).addImm(NumBytes); - } + bool NoOpt = (HTM.getOptLevel() == CodeGenOpt::None); + if (!NoOpt && !FuncInfo->hasClobberLR() && !hasFP(MF)) + return; + + // Check for overflow. + // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used? + const unsigned int ALLOCFRAME_MAX = 16384; + + // Create a dummy memory operand to avoid allocframe from being treated as + // a volatile memory reference. + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOStore, + 4, 4); + + if (NumBytes >= ALLOCFRAME_MAX) { + // Emit allocframe(#0). + BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe)) + .addImm(0) + .addMemOperand(MMO); + + // Subtract offset from frame pointer. + // We use a caller-saved non-parameter register for that. + unsigned CallerSavedReg = HRI.getFirstCallerSavedNonParamReg(); + BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::CONST32_Int_Real), + CallerSavedReg).addImm(NumBytes); + BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_sub), SP) + .addReg(SP) + .addReg(CallerSavedReg); + } else { + BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe)) + .addImm(NumBytes) + .addMemOperand(MMO); } -} -// Returns true if MBB has a machine instructions that indicates a tail call -// in the block. -bool HexagonFrameLowering::hasTailCall(MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); - unsigned RetOpcode = MBBI->getOpcode(); - return RetOpcode == Hexagon::TCRETURNtg || RetOpcode == Hexagon::TCRETURNtext; + if (AlignStack) { + BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_andir), SP) + .addReg(SP) + .addImm(-int64_t(MaxAlign)); + } + + if (needsFrameMoves) { + std::vector<MCCFIInstruction> Instructions = MMI.getFrameInstructions(); + MCSymbol *FrameLabel = MMI.getContext().createTempSymbol(); + + // Advance CFA. DW_CFA_def_cfa + unsigned DwFPReg = HRI.getDwarfRegNum(HRI.getFrameRegister(), true); + unsigned DwRAReg = HRI.getDwarfRegNum(HRI.getRARegister(), true); + + // CFA = FP + 8 + unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa( + FrameLabel, DwFPReg, -8)); + BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + + // R31 (return addr) = CFA - #4 + CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( + FrameLabel, DwRAReg, -4)); + BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + + // R30 (frame ptr) = CFA - #8) + CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( + FrameLabel, DwFPReg, -8)); + BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + + unsigned int regsToMove[] = { + Hexagon::R1, Hexagon::R0, Hexagon::R3, Hexagon::R2, + Hexagon::R17, Hexagon::R16, Hexagon::R19, Hexagon::R18, + Hexagon::R21, Hexagon::R20, Hexagon::R23, Hexagon::R22, + Hexagon::R25, Hexagon::R24, Hexagon::R27, Hexagon::R26, + Hexagon::D0, Hexagon::D1, Hexagon::D8, Hexagon::D9, Hexagon::D10, + Hexagon::D11, Hexagon::D12, Hexagon::D13, Hexagon::NoRegister + }; + + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + + for (unsigned i = 0; regsToMove[i] != Hexagon::NoRegister; ++i) { + for (unsigned I = 0, E = CSI.size(); I < E; ++I) { + if (CSI[I].getReg() == regsToMove[i]) { + // Subtract 8 to make room for R30 and R31, which are added above. + int64_t Offset = getFrameIndexOffset(MF, CSI[I].getFrameIdx()) - 8; + + if (regsToMove[i] < Hexagon::D0 || regsToMove[i] > Hexagon::D15) { + unsigned DwarfReg = HRI.getDwarfRegNum(regsToMove[i], true); + unsigned CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createOffset(FrameLabel, + DwarfReg, Offset)); + BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } else { + // Split the double regs into subregs, and generate appropriate + // cfi_offsets. + // The only reason, we are split double regs is, llvm-mc does not + // understand paired registers for cfi_offset. + // Eg .cfi_offset r1:0, -64 + unsigned HiReg = getMax32BitSubRegister(regsToMove[i], HRI); + unsigned LoReg = getMax32BitSubRegister(regsToMove[i], HRI, false); + unsigned HiDwarfReg = HRI.getDwarfRegNum(HiReg, true); + unsigned LoDwarfReg = HRI.getDwarfRegNum(LoReg, true); + unsigned HiCFIIndex = MMI.addFrameInst( + MCCFIInstruction::createOffset(FrameLabel, + HiDwarfReg, Offset+4)); + BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(HiCFIIndex); + unsigned LoCFIIndex = MMI.addFrameInst( + MCCFIInstruction::createOffset(FrameLabel, + LoDwarfReg, Offset)); + BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(LoCFIIndex); + } + break; + } + } // for CSI.size() + } // for regsToMove + } // needsFrameMoves } -void HexagonFrameLowering::emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator MBBI = std::prev(MBB.end()); - DebugLoc dl = MBBI->getDebugLoc(); +void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const { + MachineFunction &MF = *MBB.getParent(); // // Only insert deallocframe if we need to. Also at -O0. See comment - // in emitPrologue above. + // in insertPrologueInBlock above. // - if (hasFP(MF) || MF.getTarget().getOptLevel() == CodeGenOpt::None) { - MachineBasicBlock::iterator MBBI = std::prev(MBB.end()); - MachineBasicBlock::iterator MBBI_end = MBB.end(); - - const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); - // Handle EH_RETURN. - if (MBBI->getOpcode() == Hexagon::EH_RETURN_JMPR) { - assert(MBBI->getOperand(0).isReg() && "Offset should be in register!"); - BuildMI(MBB, MBBI, dl, TII.get(Hexagon::L2_deallocframe)); - BuildMI(MBB, MBBI, dl, TII.get(Hexagon::A2_add), - Hexagon::R29).addReg(Hexagon::R29).addReg(Hexagon::R28); - return; - } - // Replace 'jumpr r31' instruction with dealloc_return for V4 and higher - // versions. - if (MF.getTarget().getSubtarget<HexagonSubtarget>().hasV4TOps() && - MBBI->getOpcode() == Hexagon::JMPret && !DisableDeallocRet) { - // Check for RESTORE_DEALLOC_RET_JMP_V4 call. Don't emit an extra DEALLOC - // instruction if we encounter it. - MachineBasicBlock::iterator BeforeJMPR = - MBB.begin() == MBBI ? MBBI : std::prev(MBBI); - if (BeforeJMPR != MBBI && - BeforeJMPR->getOpcode() == Hexagon::RESTORE_DEALLOC_RET_JMP_V4) { - // Remove the JMPR node. - MBB.erase(MBBI); - return; - } + if (!hasFP(MF) && MF.getTarget().getOptLevel() != CodeGenOpt::None) + return; - // Add dealloc_return. - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI_end, dl, TII.get(Hexagon::L4_return)); - // Transfer the function live-out registers. - MIB->copyImplicitOps(*MBB.getParent(), &*MBBI); - // Remove the JUMPR node. - MBB.erase(MBBI); - } else { // Add deallocframe for V2 and V3, and V4 tail calls. - // Check for RESTORE_DEALLOC_BEFORE_TAILCALL_V4. We don't need an extra - // DEALLOCFRAME instruction after it. - MachineBasicBlock::iterator Term = MBB.getFirstTerminator(); - MachineBasicBlock::iterator I = - Term == MBB.begin() ? MBB.end() : std::prev(Term); - if (I != MBB.end() && - I->getOpcode() == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4) - return; + auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget()); + auto &HII = *HST.getInstrInfo(); + auto &HRI = *HST.getRegisterInfo(); + unsigned SP = HRI.getStackRegister(); + + MachineInstr *RetI = nullptr; + for (auto &I : MBB) { + if (!I.isReturn()) + continue; + RetI = &I; + break; + } + unsigned RetOpc = RetI ? RetI->getOpcode() : 0; - BuildMI(MBB, MBBI, dl, TII.get(Hexagon::L2_deallocframe)); + MachineBasicBlock::iterator InsertPt = MBB.getFirstTerminator(); + DebugLoc DL; + if (InsertPt != MBB.end()) + DL = InsertPt->getDebugLoc(); + else if (!MBB.empty()) + DL = std::prev(MBB.end())->getDebugLoc(); + + // Handle EH_RETURN. + if (RetOpc == Hexagon::EH_RETURN_JMPR) { + BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::L2_deallocframe)); + BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::A2_add), SP) + .addReg(SP) + .addReg(Hexagon::R28); + return; + } + + // Check for RESTORE_DEALLOC_RET* tail call. Don't emit an extra dealloc- + // frame instruction if we encounter it. + if (RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4) { + MachineBasicBlock::iterator It = RetI; + ++It; + // Delete all instructions after the RESTORE (except labels). + while (It != MBB.end()) { + if (!It->isLabel()) + It = MBB.erase(It); + else + ++It; } + return; + } + + // It is possible that the restoring code is a call to a library function. + // All of the restore* functions include "deallocframe", so we need to make + // sure that we don't add an extra one. + bool NeedsDeallocframe = true; + if (!MBB.empty() && InsertPt != MBB.begin()) { + MachineBasicBlock::iterator PrevIt = std::prev(InsertPt); + unsigned COpc = PrevIt->getOpcode(); + if (COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4) + NeedsDeallocframe = false; + } + + if (!NeedsDeallocframe) + return; + // If the returning instruction is JMPret, replace it with dealloc_return, + // otherwise just add deallocframe. The function could be returning via a + // tail call. + if (RetOpc != Hexagon::JMPret || DisableDeallocRet) { + BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::L2_deallocframe)); + return; } + unsigned NewOpc = Hexagon::L4_return; + MachineInstr *NewI = BuildMI(MBB, RetI, DL, HII.get(NewOpc)); + // Transfer the function live-out registers. + NewI->copyImplicitOps(MF, RetI); + MBB.erase(RetI); } + bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const HexagonMachineFunctionInfo *FuncInfo = MF.getInfo<HexagonMachineFunctionInfo>(); - return (MFI->hasCalls() || (MFI->getStackSize() > 0) || - FuncInfo->hasClobberLR() ); + return MFI->hasCalls() || MFI->getStackSize() > 0 || + FuncInfo->hasClobberLR(); } -static inline -unsigned uniqueSuperReg(unsigned Reg, const TargetRegisterInfo *TRI) { - MCSuperRegIterator SRI(Reg, TRI); - assert(SRI.isValid() && "Expected a superreg"); - unsigned SuperReg = *SRI; - ++SRI; - assert(!SRI.isValid() && "Expected exactly one superreg"); - return SuperReg; -} -bool -HexagonFrameLowering::spillCalleeSavedRegisters( - MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const { - MachineFunction *MF = MBB.getParent(); - const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo(); +enum SpillKind { + SK_ToMem, + SK_FromMem, + SK_FromMemTailcall +}; - if (CSI.empty()) { - return false; +static const char * +getSpillFunctionFor(unsigned MaxReg, SpillKind SpillType) { + const char * V4SpillToMemoryFunctions[] = { + "__save_r16_through_r17", + "__save_r16_through_r19", + "__save_r16_through_r21", + "__save_r16_through_r23", + "__save_r16_through_r25", + "__save_r16_through_r27" }; + + const char * V4SpillFromMemoryFunctions[] = { + "__restore_r16_through_r17_and_deallocframe", + "__restore_r16_through_r19_and_deallocframe", + "__restore_r16_through_r21_and_deallocframe", + "__restore_r16_through_r23_and_deallocframe", + "__restore_r16_through_r25_and_deallocframe", + "__restore_r16_through_r27_and_deallocframe" }; + + const char * V4SpillFromMemoryTailcallFunctions[] = { + "__restore_r16_through_r17_and_deallocframe_before_tailcall", + "__restore_r16_through_r19_and_deallocframe_before_tailcall", + "__restore_r16_through_r21_and_deallocframe_before_tailcall", + "__restore_r16_through_r23_and_deallocframe_before_tailcall", + "__restore_r16_through_r25_and_deallocframe_before_tailcall", + "__restore_r16_through_r27_and_deallocframe_before_tailcall" + }; + + const char **SpillFunc = nullptr; + + switch(SpillType) { + case SK_ToMem: + SpillFunc = V4SpillToMemoryFunctions; + break; + case SK_FromMem: + SpillFunc = V4SpillFromMemoryFunctions; + break; + case SK_FromMemTailcall: + SpillFunc = V4SpillFromMemoryTailcallFunctions; + break; + } + assert(SpillFunc && "Unknown spill kind"); + + // Spill all callee-saved registers up to the highest register used. + switch (MaxReg) { + case Hexagon::R17: + return SpillFunc[0]; + case Hexagon::R19: + return SpillFunc[1]; + case Hexagon::R21: + return SpillFunc[2]; + case Hexagon::R23: + return SpillFunc[3]; + case Hexagon::R25: + return SpillFunc[4]; + case Hexagon::R27: + return SpillFunc[5]; + default: + llvm_unreachable("Unhandled maximum callee save register"); } + return 0; +} - // We can only schedule double loads if we spill contiguous callee-saved regs - // For instance, we cannot scheduled double-word loads if we spill r24, - // r26, and r27. - // Hexagon_TODO: We can try to double-word align odd registers for -O2 and - // above. - bool ContiguousRegs = true; +/// Adds all callee-saved registers up to MaxReg to the instruction. +static void addCalleeSaveRegistersAsImpOperand(MachineInstr *Inst, + unsigned MaxReg, bool IsDef) { + // Add the callee-saved registers as implicit uses. + for (unsigned R = Hexagon::R16; R <= MaxReg; ++R) { + MachineOperand ImpUse = MachineOperand::CreateReg(R, IsDef, true); + Inst->addOperand(ImpUse); + } +} - for (unsigned i = 0; i < CSI.size(); ++i) { + +int HexagonFrameLowering::getFrameIndexOffset(const MachineFunction &MF, + int FI) const { + return MF.getFrameInfo()->getObjectOffset(FI); +} + + +bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB, + const CSIVect &CSI, const HexagonRegisterInfo &HRI) const { + if (CSI.empty()) + return true; + + MachineBasicBlock::iterator MI = MBB.begin(); + MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + + if (useSpillFunction(MF, CSI)) { + unsigned MaxReg = getMaxCalleeSavedReg(CSI, HRI); + const char *SpillFun = getSpillFunctionFor(MaxReg, SK_ToMem); + // Call spill function. + DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); + MachineInstr *SaveRegsCall = + BuildMI(MBB, MI, DL, TII.get(Hexagon::SAVE_REGISTERS_CALL_V4)) + .addExternalSymbol(SpillFun); + // Add callee-saved registers as use. + addCalleeSaveRegistersAsImpOperand(SaveRegsCall, MaxReg, false); + // Add live in registers. + for (unsigned I = 0; I < CSI.size(); ++I) + MBB.addLiveIn(CSI[I].getReg()); + return true; + } + + for (unsigned i = 0, n = CSI.size(); i < n; ++i) { unsigned Reg = CSI[i].getReg(); + // Add live in registers. We treat eh_return callee saved register r0 - r3 + // specially. They are not really callee saved registers as they are not + // supposed to be killed. + bool IsKill = !HRI.isEHReturnCalleeSaveReg(Reg); + int FI = CSI[i].getFrameIdx(); + const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(MBB, MI, Reg, IsKill, FI, RC, &HRI); + if (IsKill) + MBB.addLiveIn(Reg); + } + return true; +} - // - // Check if we can use a double-word store. - // - unsigned SuperReg = uniqueSuperReg(Reg, TRI); - bool CanUseDblStore = false; - const TargetRegisterClass* SuperRegClass = nullptr; - - if (ContiguousRegs && (i < CSI.size()-1)) { - unsigned SuperRegNext = uniqueSuperReg(CSI[i+1].getReg(), TRI); - SuperRegClass = TRI->getMinimalPhysRegClass(SuperReg); - CanUseDblStore = (SuperRegNext == SuperReg); - } +bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB, + const CSIVect &CSI, const HexagonRegisterInfo &HRI) const { + if (CSI.empty()) + return false; + + MachineBasicBlock::iterator MI = MBB.getFirstTerminator(); + MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); - if (CanUseDblStore) { - TII.storeRegToStackSlot(MBB, MI, SuperReg, true, - CSI[i+1].getFrameIdx(), SuperRegClass, TRI); - MBB.addLiveIn(SuperReg); - ++i; + if (useRestoreFunction(MF, CSI)) { + bool HasTC = hasTailCall(MBB) || !hasReturn(MBB); + unsigned MaxR = getMaxCalleeSavedReg(CSI, HRI); + SpillKind Kind = HasTC ? SK_FromMemTailcall : SK_FromMem; + const char *RestoreFn = getSpillFunctionFor(MaxR, Kind); + + // Call spill function. + DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() + : MBB.getLastNonDebugInstr()->getDebugLoc(); + MachineInstr *DeallocCall = nullptr; + + if (HasTC) { + unsigned ROpc = Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4; + DeallocCall = BuildMI(MBB, MI, DL, TII.get(ROpc)) + .addExternalSymbol(RestoreFn); } else { - // Cannot use a double-word store. - ContiguousRegs = false; - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), RC, - TRI); - MBB.addLiveIn(Reg); + // The block has a return. + MachineBasicBlock::iterator It = MBB.getFirstTerminator(); + assert(It->isReturn() && std::next(It) == MBB.end()); + unsigned ROpc = Hexagon::RESTORE_DEALLOC_RET_JMP_V4; + DeallocCall = BuildMI(MBB, It, DL, TII.get(ROpc)) + .addExternalSymbol(RestoreFn); + // Transfer the function live-out registers. + DeallocCall->copyImplicitOps(MF, It); } + addCalleeSaveRegistersAsImpOperand(DeallocCall, MaxR, true); + return true; + } + + for (unsigned i = 0; i < CSI.size(); ++i) { + unsigned Reg = CSI[i].getReg(); + const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg); + int FI = CSI[i].getFrameIdx(); + TII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, &HRI); } return true; } -bool HexagonFrameLowering::restoreCalleeSavedRegisters( - MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const { +void HexagonFrameLowering::eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { + MachineInstr &MI = *I; + unsigned Opc = MI.getOpcode(); + (void)Opc; // Silence compiler warning. + assert((Opc == Hexagon::ADJCALLSTACKDOWN || Opc == Hexagon::ADJCALLSTACKUP) && + "Cannot handle this call frame pseudo instruction"); + MBB.erase(I); +} + + +void HexagonFrameLowering::processFunctionBeforeFrameFinalized( + MachineFunction &MF, RegScavenger *RS) const { + // If this function has uses aligned stack and also has variable sized stack + // objects, then we need to map all spill slots to fixed positions, so that + // they can be accessed through FP. Otherwise they would have to be accessed + // via AP, which may not be available at the particular place in the program. + MachineFrameInfo *MFI = MF.getFrameInfo(); + bool HasAlloca = MFI->hasVarSizedObjects(); + bool HasAligna = (MFI->getMaxAlignment() > getStackAlignment()); + + if (!HasAlloca || !HasAligna) + return; + + unsigned LFS = MFI->getLocalFrameSize(); + int Offset = -LFS; + for (int i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { + if (!MFI->isSpillSlotObjectIndex(i) || MFI->isDeadObjectIndex(i)) + continue; + int S = MFI->getObjectSize(i); + LFS += S; + Offset -= S; + MFI->mapLocalFrameObject(i, Offset); + } + + MFI->setLocalFrameSize(LFS); + unsigned A = MFI->getLocalFrameMaxAlign(); + assert(A <= 8 && "Unexpected local frame alignment"); + if (A == 0) + MFI->setLocalFrameMaxAlign(8); + MFI->setUseLocalStackAllocationBlock(true); +} - MachineFunction *MF = MBB.getParent(); - const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo(); +/// Returns true if there is no caller saved registers available. +static bool needToReserveScavengingSpillSlots(MachineFunction &MF, + const HexagonRegisterInfo &HRI) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + const MCPhysReg *CallerSavedRegs = HRI.getCallerSavedRegs(&MF); + // Check for an unused caller-saved register. + for ( ; *CallerSavedRegs; ++CallerSavedRegs) { + MCPhysReg FreeReg = *CallerSavedRegs; + if (MRI.isPhysRegUsed(FreeReg)) + continue; - if (CSI.empty()) { + // Check aliased register usage. + bool IsCurrentRegUsed = false; + for (MCRegAliasIterator AI(FreeReg, &HRI, false); AI.isValid(); ++AI) + if (MRI.isPhysRegUsed(*AI)) { + IsCurrentRegUsed = true; + break; + } + if (IsCurrentRegUsed) + continue; + + // Neither directly used nor used through an aliased register. return false; } + // All caller-saved registers are used. + return true; +} - // We can only schedule double loads if we spill contiguous callee-saved regs - // For instance, we cannot scheduled double-word loads if we spill r24, - // r26, and r27. - // Hexagon_TODO: We can try to double-word align odd registers for -O2 and - // above. - bool ContiguousRegs = true; - for (unsigned i = 0; i < CSI.size(); ++i) { - unsigned Reg = CSI[i].getReg(); +/// Replaces the predicate spill code pseudo instructions by valid instructions. +bool HexagonFrameLowering::replacePredRegPseudoSpillCode(MachineFunction &MF) + const { + auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget()); + auto &HII = *HST.getInstrInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + bool HasReplacedPseudoInst = false; + // Replace predicate spill pseudo instructions by real code. + // Loop over all of the basic blocks. + for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end(); + MBBb != MBBe; ++MBBb) { + MachineBasicBlock* MBB = MBBb; + // Traverse the basic block. + MachineBasicBlock::iterator NextII; + for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end(); + MII = NextII) { + MachineInstr *MI = MII; + NextII = std::next(MII); + int Opc = MI->getOpcode(); + if (Opc == Hexagon::STriw_pred) { + HasReplacedPseudoInst = true; + // STriw_pred FI, 0, SrcReg; + unsigned VirtReg = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + unsigned SrcReg = MI->getOperand(2).getReg(); + bool IsOrigSrcRegKilled = MI->getOperand(2).isKill(); + + assert(MI->getOperand(0).isFI() && "Expect a frame index"); + assert(Hexagon::PredRegsRegClass.contains(SrcReg) && + "Not a predicate register"); + + // Insert transfer to general purpose register. + // VirtReg = C2_tfrpr SrcPredReg + BuildMI(*MBB, MII, MI->getDebugLoc(), HII.get(Hexagon::C2_tfrpr), + VirtReg).addReg(SrcReg, getKillRegState(IsOrigSrcRegKilled)); + + // Change instruction to S2_storeri_io. + // S2_storeri_io FI, 0, VirtReg + MI->setDesc(HII.get(Hexagon::S2_storeri_io)); + MI->getOperand(2).setReg(VirtReg); + MI->getOperand(2).setIsKill(); - // - // Check if we can use a double-word load. - // - unsigned SuperReg = uniqueSuperReg(Reg, TRI); - const TargetRegisterClass* SuperRegClass = nullptr; - bool CanUseDblLoad = false; - if (ContiguousRegs && (i < CSI.size()-1)) { - unsigned SuperRegNext = uniqueSuperReg(CSI[i+1].getReg(), TRI); - SuperRegClass = TRI->getMinimalPhysRegClass(SuperReg); - CanUseDblLoad = (SuperRegNext == SuperReg); + } else if (Opc == Hexagon::LDriw_pred) { + // DstReg = LDriw_pred FI, 0 + MachineOperand &M0 = MI->getOperand(0); + if (M0.isDead()) { + MBB->erase(MII); + continue; + } + + unsigned VirtReg = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + unsigned DestReg = MI->getOperand(0).getReg(); + + assert(MI->getOperand(1).isFI() && "Expect a frame index"); + assert(Hexagon::PredRegsRegClass.contains(DestReg) && + "Not a predicate register"); + + // Change instruction to L2_loadri_io. + // VirtReg = L2_loadri_io FI, 0 + MI->setDesc(HII.get(Hexagon::L2_loadri_io)); + MI->getOperand(0).setReg(VirtReg); + + // Insert transfer to general purpose register. + // DestReg = C2_tfrrp VirtReg + const MCInstrDesc &D = HII.get(Hexagon::C2_tfrrp); + BuildMI(*MBB, std::next(MII), MI->getDebugLoc(), D, DestReg) + .addReg(VirtReg, getKillRegState(true)); + HasReplacedPseudoInst = true; + } } + } + return HasReplacedPseudoInst; +} - if (CanUseDblLoad) { - TII.loadRegFromStackSlot(MBB, MI, SuperReg, CSI[i+1].getFrameIdx(), - SuperRegClass, TRI); - MBB.addLiveIn(SuperReg); - ++i; - } else { - // Cannot use a double-word load. - ContiguousRegs = false; - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI); - MBB.addLiveIn(Reg); +void HexagonFrameLowering::processFunctionBeforeCalleeSavedScan( + MachineFunction &MF, RegScavenger* RS) const { + auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget()); + auto &HRI = *HST.getRegisterInfo(); + + bool HasEHReturn = MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn(); + + // If we have a function containing __builtin_eh_return we want to spill and + // restore all callee saved registers. Pretend that they are used. + if (HasEHReturn) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + for (const MCPhysReg *CSRegs = HRI.getCalleeSavedRegs(&MF); *CSRegs; + ++CSRegs) + if (!MRI.isPhysRegUsed(*CSRegs)) + MRI.setPhysRegUsed(*CSRegs); + } + + const TargetRegisterClass &RC = Hexagon::IntRegsRegClass; + + // Replace predicate register pseudo spill code. + bool HasReplacedPseudoInst = replacePredRegPseudoSpillCode(MF); + + // We need to reserve a a spill slot if scavenging could potentially require + // spilling a scavenged register. + if (HasReplacedPseudoInst && needToReserveScavengingSpillSlots(MF, HRI)) { + MachineFrameInfo *MFI = MF.getFrameInfo(); + for (int i=0; i < NumberScavengerSlots; i++) + RS->addScavengingFrameIndex( + MFI->CreateSpillStackObject(RC.getSize(), RC.getAlignment())); + } +} + + +#ifndef NDEBUG +static void dump_registers(BitVector &Regs, const TargetRegisterInfo &TRI) { + dbgs() << '{'; + for (int x = Regs.find_first(); x >= 0; x = Regs.find_next(x)) { + unsigned R = x; + dbgs() << ' ' << PrintReg(R, &TRI); + } + dbgs() << " }"; +} +#endif + + +bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF, + const TargetRegisterInfo *TRI, std::vector<CalleeSavedInfo> &CSI) const { + DEBUG(dbgs() << LLVM_FUNCTION_NAME << " on " + << MF.getFunction()->getName() << '\n'); + MachineFrameInfo *MFI = MF.getFrameInfo(); + BitVector SRegs(Hexagon::NUM_TARGET_REGS); + + // Generate a set of unique, callee-saved registers (SRegs), where each + // register in the set is maximal in terms of sub-/super-register relation, + // i.e. for each R in SRegs, no proper super-register of R is also in SRegs. + + // (1) For each callee-saved register, add that register and all of its + // sub-registers to SRegs. + DEBUG(dbgs() << "Initial CS registers: {"); + for (unsigned i = 0, n = CSI.size(); i < n; ++i) { + unsigned R = CSI[i].getReg(); + DEBUG(dbgs() << ' ' << PrintReg(R, TRI)); + for (MCSubRegIterator SR(R, TRI, true); SR.isValid(); ++SR) + SRegs[*SR] = true; + } + DEBUG(dbgs() << " }\n"); + DEBUG(dbgs() << "SRegs.1: "; dump_registers(SRegs, *TRI); dbgs() << "\n"); + + // (2) For each reserved register, remove that register and all of its + // sub- and super-registers from SRegs. + BitVector Reserved = TRI->getReservedRegs(MF); + for (int x = Reserved.find_first(); x >= 0; x = Reserved.find_next(x)) { + unsigned R = x; + for (MCSuperRegIterator SR(R, TRI, true); SR.isValid(); ++SR) + SRegs[*SR] = false; + } + DEBUG(dbgs() << "Res: "; dump_registers(Reserved, *TRI); dbgs() << "\n"); + DEBUG(dbgs() << "SRegs.2: "; dump_registers(SRegs, *TRI); dbgs() << "\n"); + + // (3) Collect all registers that have at least one sub-register in SRegs, + // and also have no sub-registers that are reserved. These will be the can- + // didates for saving as a whole instead of their individual sub-registers. + // (Saving R17:16 instead of R16 is fine, but only if R17 was not reserved.) + BitVector TmpSup(Hexagon::NUM_TARGET_REGS); + for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) { + unsigned R = x; + for (MCSuperRegIterator SR(R, TRI); SR.isValid(); ++SR) + TmpSup[*SR] = true; + } + for (int x = TmpSup.find_first(); x >= 0; x = TmpSup.find_next(x)) { + unsigned R = x; + for (MCSubRegIterator SR(R, TRI, true); SR.isValid(); ++SR) { + if (!Reserved[*SR]) + continue; + TmpSup[R] = false; + break; + } + } + DEBUG(dbgs() << "TmpSup: "; dump_registers(TmpSup, *TRI); dbgs() << "\n"); + + // (4) Include all super-registers found in (3) into SRegs. + SRegs |= TmpSup; + DEBUG(dbgs() << "SRegs.4: "; dump_registers(SRegs, *TRI); dbgs() << "\n"); + + // (5) For each register R in SRegs, if any super-register of R is in SRegs, + // remove R from SRegs. + for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) { + unsigned R = x; + for (MCSuperRegIterator SR(R, TRI); SR.isValid(); ++SR) { + if (!SRegs[*SR]) + continue; + SRegs[R] = false; + break; + } + } + DEBUG(dbgs() << "SRegs.5: "; dump_registers(SRegs, *TRI); dbgs() << "\n"); + + // Now, for each register that has a fixed stack slot, create the stack + // object for it. + CSI.clear(); + + typedef TargetFrameLowering::SpillSlot SpillSlot; + unsigned NumFixed; + int MinOffset = 0; // CS offsets are negative. + const SpillSlot *FixedSlots = getCalleeSavedSpillSlots(NumFixed); + for (const SpillSlot *S = FixedSlots; S != FixedSlots+NumFixed; ++S) { + if (!SRegs[S->Reg]) + continue; + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(S->Reg); + int FI = MFI->CreateFixedSpillStackObject(RC->getSize(), S->Offset); + MinOffset = std::min(MinOffset, S->Offset); + CSI.push_back(CalleeSavedInfo(S->Reg, FI)); + SRegs[S->Reg] = false; + } + + // There can be some registers that don't have fixed slots. For example, + // we need to store R0-R3 in functions with exception handling. For each + // such register, create a non-fixed stack object. + for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) { + unsigned R = x; + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(R); + int Off = MinOffset - RC->getSize(); + unsigned Align = std::min(RC->getAlignment(), getStackAlignment()); + assert(isPowerOf2_32(Align)); + Off &= -Align; + int FI = MFI->CreateFixedSpillStackObject(RC->getSize(), Off); + MinOffset = std::min(MinOffset, Off); + CSI.push_back(CalleeSavedInfo(R, FI)); + SRegs[R] = false; + } + + DEBUG({ + dbgs() << "CS information: {"; + for (unsigned i = 0, n = CSI.size(); i < n; ++i) { + int FI = CSI[i].getFrameIdx(); + int Off = MFI->getObjectOffset(FI); + dbgs() << ' ' << PrintReg(CSI[i].getReg(), TRI) << ":fi#" << FI << ":sp"; + if (Off >= 0) + dbgs() << '+'; + dbgs() << Off; } + dbgs() << " }\n"; + }); + +#ifndef NDEBUG + // Verify that all registers were handled. + bool MissedReg = false; + for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) { + unsigned R = x; + dbgs() << PrintReg(R, TRI) << ' '; + MissedReg = true; } + if (MissedReg) + llvm_unreachable("...there are unhandled callee-saved registers!"); +#endif + return true; } -void HexagonFrameLowering:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - MachineInstr &MI = *I; - if (MI.getOpcode() == Hexagon::ADJCALLSTACKDOWN) { - // Hexagon_TODO: add code - } else if (MI.getOpcode() == Hexagon::ADJCALLSTACKUP) { - // Hexagon_TODO: add code - } else { - llvm_unreachable("Cannot handle this call frame pseudo instruction"); +void HexagonFrameLowering::expandAlloca(MachineInstr *AI, + const HexagonInstrInfo &HII, unsigned SP, unsigned CF) const { + MachineBasicBlock &MB = *AI->getParent(); + DebugLoc DL = AI->getDebugLoc(); + unsigned A = AI->getOperand(2).getImm(); + + // Have + // Rd = alloca Rs, #A + // + // If Rs and Rd are different registers, use this sequence: + // Rd = sub(r29, Rs) + // r29 = sub(r29, Rs) + // Rd = and(Rd, #-A) ; if necessary + // r29 = and(r29, #-A) ; if necessary + // Rd = add(Rd, #CF) ; CF size aligned to at most A + // otherwise, do + // Rd = sub(r29, Rs) + // Rd = and(Rd, #-A) ; if necessary + // r29 = Rd + // Rd = add(Rd, #CF) ; CF size aligned to at most A + + MachineOperand &RdOp = AI->getOperand(0); + MachineOperand &RsOp = AI->getOperand(1); + unsigned Rd = RdOp.getReg(), Rs = RsOp.getReg(); + + // Rd = sub(r29, Rs) + BuildMI(MB, AI, DL, HII.get(Hexagon::A2_sub), Rd) + .addReg(SP) + .addReg(Rs); + if (Rs != Rd) { + // r29 = sub(r29, Rs) + BuildMI(MB, AI, DL, HII.get(Hexagon::A2_sub), SP) + .addReg(SP) + .addReg(Rs); + } + if (A > 8) { + // Rd = and(Rd, #-A) + BuildMI(MB, AI, DL, HII.get(Hexagon::A2_andir), Rd) + .addReg(Rd) + .addImm(-int64_t(A)); + if (Rs != Rd) + BuildMI(MB, AI, DL, HII.get(Hexagon::A2_andir), SP) + .addReg(SP) + .addImm(-int64_t(A)); + } + if (Rs == Rd) { + // r29 = Rd + BuildMI(MB, AI, DL, HII.get(TargetOpcode::COPY), SP) + .addReg(Rd); + } + if (CF > 0) { + // Rd = add(Rd, #CF) + BuildMI(MB, AI, DL, HII.get(Hexagon::A2_addi), Rd) + .addReg(Rd) + .addImm(CF); } - MBB.erase(I); } -int HexagonFrameLowering::getFrameIndexOffset(const MachineFunction &MF, - int FI) const { - return MF.getFrameInfo()->getObjectOffset(FI); + +bool HexagonFrameLowering::needsAligna(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + if (!MFI->hasVarSizedObjects()) + return false; + unsigned MaxA = MFI->getMaxAlignment(); + if (MaxA <= getStackAlignment()) + return false; + return true; +} + + +MachineInstr *HexagonFrameLowering::getAlignaInstr(MachineFunction &MF) const { + for (auto &B : MF) + for (auto &I : B) + if (I.getOpcode() == Hexagon::ALIGNA) + return &I; + return nullptr; +} + + +inline static bool isOptSize(const MachineFunction &MF) { + AttributeSet AF = MF.getFunction()->getAttributes(); + return AF.hasAttribute(AttributeSet::FunctionIndex, + Attribute::OptimizeForSize); +} + +inline static bool isMinSize(const MachineFunction &MF) { + AttributeSet AF = MF.getFunction()->getAttributes(); + return AF.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); +} + + +/// Determine whether the callee-saved register saves and restores should +/// be generated via inline code. If this function returns "true", inline +/// code will be generated. If this function returns "false", additional +/// checks are performed, which may still lead to the inline code. +bool HexagonFrameLowering::shouldInlineCSR(MachineFunction &MF, + const CSIVect &CSI) const { + if (MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn()) + return true; + if (!isOptSize(MF) && !isMinSize(MF)) + if (MF.getTarget().getOptLevel() > CodeGenOpt::Default) + return true; + + // Check if CSI only has double registers, and if the registers form + // a contiguous block starting from D8. + BitVector Regs(Hexagon::NUM_TARGET_REGS); + for (unsigned i = 0, n = CSI.size(); i < n; ++i) { + unsigned R = CSI[i].getReg(); + if (!Hexagon::DoubleRegsRegClass.contains(R)) + return true; + Regs[R] = true; + } + int F = Regs.find_first(); + if (F != Hexagon::D8) + return true; + while (F >= 0) { + int N = Regs.find_next(F); + if (N >= 0 && N != F+1) + return true; + F = N; + } + + return false; +} + + +bool HexagonFrameLowering::useSpillFunction(MachineFunction &MF, + const CSIVect &CSI) const { + if (shouldInlineCSR(MF, CSI)) + return false; + unsigned NumCSI = CSI.size(); + if (NumCSI <= 1) + return false; + + unsigned Threshold = isOptSize(MF) ? SpillFuncThresholdOs + : SpillFuncThreshold; + return Threshold < NumCSI; +} + + +bool HexagonFrameLowering::useRestoreFunction(MachineFunction &MF, + const CSIVect &CSI) const { + if (shouldInlineCSR(MF, CSI)) + return false; + unsigned NumCSI = CSI.size(); + unsigned Threshold = isOptSize(MF) ? SpillFuncThresholdOs-1 + : SpillFuncThreshold; + return Threshold < NumCSI; } + diff --git a/lib/Target/Hexagon/HexagonFrameLowering.h b/lib/Target/Hexagon/HexagonFrameLowering.h index 2d6b45793809..89500cb85724 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.h +++ b/lib/Target/Hexagon/HexagonFrameLowering.h @@ -15,35 +15,88 @@ namespace llvm { -class HexagonFrameLowering : public TargetFrameLowering { -private: - void determineFrameLayout(MachineFunction &MF) const; +class HexagonInstrInfo; +class HexagonRegisterInfo; +class HexagonFrameLowering : public TargetFrameLowering { public: - explicit HexagonFrameLowering() : TargetFrameLowering(StackGrowsDown, 8, 0) {} + explicit HexagonFrameLowering() + : TargetFrameLowering(StackGrowsDown, 8, 0, 1, true) {} - /// emitProlog/emitEpilog - These methods insert prolog and epilog code into - /// the function. - void emitPrologue(MachineFunction &MF) const override; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + // All of the prolog/epilog functionality, including saving and restoring + // callee-saved registers is handled in emitPrologue. This is to have the + // logic for shrink-wrapping in one place. + void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const + override; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const + override {} bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const override; - - void - eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const override; - - bool - restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const override; + MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const override { + return true; + } + bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const override { + return true; + } + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const override; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS = nullptr) const override; + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const override; + + bool targetHandlesStackFrameRounding() const override { + return true; + } int getFrameIndexOffset(const MachineFunction &MF, int FI) const override; bool hasFP(const MachineFunction &MF) const override; - bool hasTailCall(MachineBasicBlock &MBB) const; + + const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) + const override { + static const SpillSlot Offsets[] = { + { Hexagon::R17, -4 }, { Hexagon::R16, -8 }, { Hexagon::D8, -8 }, + { Hexagon::R19, -12 }, { Hexagon::R18, -16 }, { Hexagon::D9, -16 }, + { Hexagon::R21, -20 }, { Hexagon::R20, -24 }, { Hexagon::D10, -24 }, + { Hexagon::R23, -28 }, { Hexagon::R22, -32 }, { Hexagon::D11, -32 }, + { Hexagon::R25, -36 }, { Hexagon::R24, -40 }, { Hexagon::D12, -40 }, + { Hexagon::R27, -44 }, { Hexagon::R26, -48 }, { Hexagon::D13, -48 } + }; + NumEntries = array_lengthof(Offsets); + return Offsets; + } + + bool assignCalleeSavedSpillSlots(MachineFunction &MF, + const TargetRegisterInfo *TRI, std::vector<CalleeSavedInfo> &CSI) + const override; + + bool needsAligna(const MachineFunction &MF) const; + MachineInstr *getAlignaInstr(MachineFunction &MF) const; + +private: + typedef std::vector<CalleeSavedInfo> CSIVect; + + void expandAlloca(MachineInstr *AI, const HexagonInstrInfo &TII, + unsigned SP, unsigned CF) const; + void insertPrologueInBlock(MachineBasicBlock &MBB) const; + void insertEpilogueInBlock(MachineBasicBlock &MBB) const; + bool insertCSRSpillsInBlock(MachineBasicBlock &MBB, const CSIVect &CSI, + const HexagonRegisterInfo &HRI) const; + bool insertCSRRestoresInBlock(MachineBasicBlock &MBB, const CSIVect &CSI, + const HexagonRegisterInfo &HRI) const; + + void adjustForCalleeSavedRegsSpillCall(MachineFunction &MF) const; + bool replacePredRegPseudoSpillCode(MachineFunction &MF) const; + bool replaceVecPredRegPseudoSpillCode(MachineFunction &MF) const; + + void findShrunkPrologEpilog(MachineFunction &MF, MachineBasicBlock *&PrologB, + MachineBasicBlock *&EpilogB) const; + + bool shouldInlineCSR(llvm::MachineFunction&, const CSIVect&) const; + bool useSpillFunction(MachineFunction &MF, const CSIVect &CSI) const; + bool useRestoreFunction(MachineFunction &MF, const CSIVect &CSI) const; }; } // End llvm namespace diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp index 637b0a0d0bff..db72899388e5 100644 --- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -21,14 +21,13 @@ // - Countable loops (w/ ind. var for a trip count) // - Assumes loops are normalized by IndVarSimplify // - Try inner-most loops first -// - No nested hardware loops. // - No function calls in loops. // //===----------------------------------------------------------------------===// #include "llvm/ADT/SmallSet.h" #include "Hexagon.h" -#include "HexagonTargetMachine.h" +#include "HexagonSubtarget.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" @@ -49,9 +48,18 @@ using namespace llvm; #define DEBUG_TYPE "hwloops" #ifndef NDEBUG -static cl::opt<int> HWLoopLimit("max-hwloop", cl::Hidden, cl::init(-1)); +static cl::opt<int> HWLoopLimit("hexagon-max-hwloop", cl::Hidden, cl::init(-1)); + +// Option to create preheader only for a specific function. +static cl::opt<std::string> PHFn("hexagon-hwloop-phfn", cl::Hidden, + cl::init("")); #endif +// Option to create a preheader if one doesn't exist. +static cl::opt<bool> HWCreatePreheader("hexagon-hwloop-preheader", + cl::Hidden, cl::init(true), + cl::desc("Add a preheader to a hardware loop if one doesn't exist")); + STATISTIC(NumHWLoops, "Number of loops converted to hardware loops"); namespace llvm { @@ -64,9 +72,7 @@ namespace { MachineLoopInfo *MLI; MachineRegisterInfo *MRI; MachineDominatorTree *MDT; - const HexagonTargetMachine *TM; const HexagonInstrInfo *TII; - const HexagonRegisterInfo *TRI; #ifndef NDEBUG static int Counter; #endif @@ -89,14 +95,16 @@ namespace { } private: + typedef std::map<unsigned, MachineInstr *> LoopFeederMap; + /// Kinds of comparisons in the compare instructions. struct Comparison { enum Kind { EQ = 0x01, NE = 0x02, - L = 0x04, // Less-than property. - G = 0x08, // Greater-than property. - U = 0x40, // Unsigned property. + L = 0x04, + G = 0x08, + U = 0x40, LTs = L, LEs = L | EQ, GTs = G, @@ -113,6 +121,23 @@ namespace { return (Kind)(Cmp ^ (L|G)); return Cmp; } + + static Kind getNegatedComparison(Kind Cmp) { + if ((Cmp & L) || (Cmp & G)) + return (Kind)((Cmp ^ (L | G)) ^ EQ); + if ((Cmp & NE) || (Cmp & EQ)) + return (Kind)(Cmp ^ (EQ | NE)); + return (Kind)0; + } + + static bool isSigned(Kind Cmp) { + return (Cmp & (L | G) && !(Cmp & U)); + } + + static bool isUnsigned(Kind Cmp) { + return (Cmp & U); + } + }; /// \brief Find the register that contains the loop controlling @@ -130,6 +155,12 @@ namespace { bool findInductionRegister(MachineLoop *L, unsigned &Reg, int64_t &IVBump, MachineInstr *&IVOp) const; + /// \brief Return the comparison kind for the specified opcode. + Comparison::Kind getComparisonKind(unsigned CondOpc, + MachineOperand *InitialValue, + const MachineOperand *Endvalue, + int64_t IVBump) const; + /// \brief Analyze the statements in a loop to determine if the loop /// has a computable trip count and, if so, return a value that represents /// the trip count expression. @@ -143,24 +174,22 @@ namespace { /// If the trip count is not directly available (as an immediate value, /// or a register), the function will attempt to insert computation of it /// to the loop's preheader. - CountValue *computeCount(MachineLoop *Loop, - const MachineOperand *Start, - const MachineOperand *End, - unsigned IVReg, - int64_t IVBump, - Comparison::Kind Cmp) const; + CountValue *computeCount(MachineLoop *Loop, const MachineOperand *Start, + const MachineOperand *End, unsigned IVReg, + int64_t IVBump, Comparison::Kind Cmp) const; /// \brief Return true if the instruction is not valid within a hardware /// loop. - bool isInvalidLoopOperation(const MachineInstr *MI) const; + bool isInvalidLoopOperation(const MachineInstr *MI, + bool IsInnerHWLoop) const; /// \brief Return true if the loop contains an instruction that inhibits /// using the hardware loop. - bool containsInvalidInstruction(MachineLoop *L) const; + bool containsInvalidInstruction(MachineLoop *L, bool IsInnerHWLoop) const; /// \brief Given a loop, check if we can convert it to a hardware loop. /// If so, then perform the conversion and return true. - bool convertToHardwareLoop(MachineLoop *L); + bool convertToHardwareLoop(MachineLoop *L, bool &L0used, bool &L1used); /// \brief Return true if the instruction is now dead. bool isDead(const MachineInstr *MI, @@ -175,14 +204,44 @@ namespace { /// defined. If the instructions are out of order, try to reorder them. bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI); - /// \brief Get the instruction that loads an immediate value into \p R, - /// or 0 if such an instruction does not exist. - MachineInstr *defWithImmediate(unsigned R); + /// \brief Return true if MO and MI pair is visited only once. If visited + /// more than once, this indicates there is recursion. In such a case, + /// return false. + bool isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, MachineInstr *MI, + const MachineOperand *MO, + LoopFeederMap &LoopFeederPhi) const; + + /// \brief Return true if the Phi may generate a value that may underflow, + /// or may wrap. + bool phiMayWrapOrUnderflow(MachineInstr *Phi, const MachineOperand *EndVal, + MachineBasicBlock *MBB, MachineLoop *L, + LoopFeederMap &LoopFeederPhi) const; + + /// \brief Return true if the induction variable may underflow an unsigned + /// value in the first iteration. + bool loopCountMayWrapOrUnderFlow(const MachineOperand *InitVal, + const MachineOperand *EndVal, + MachineBasicBlock *MBB, MachineLoop *L, + LoopFeederMap &LoopFeederPhi) const; + + /// \brief Check if the given operand has a compile-time known constant + /// value. Return true if yes, and false otherwise. When returning true, set + /// Val to the corresponding constant value. + bool checkForImmediate(const MachineOperand &MO, int64_t &Val) const; + + /// \brief Check if the operand has a compile-time known constant value. + bool isImmediate(const MachineOperand &MO) const { + int64_t V; + return checkForImmediate(MO, V); + } - /// \brief Get the immediate value referenced to by \p MO, either for - /// immediate operands, or for register operands, where the register - /// was defined with an immediate value. - int64_t getImmediate(MachineOperand &MO); + /// \brief Return the immediate for the specified operand. + int64_t getImmediate(const MachineOperand &MO) const { + int64_t V; + if (!checkForImmediate(MO, V)) + llvm_unreachable("Invalid operand"); + return V; + } /// \brief Reset the given machine operand to now refer to a new immediate /// value. Assumes that the operand was already referencing an immediate @@ -265,9 +324,7 @@ namespace { return Contents.ImmVal; } - void print(raw_ostream &OS, const TargetMachine *TM = nullptr) const { - const TargetRegisterInfo *TRI = - TM ? TM->getSubtargetImpl()->getRegisterInfo() : nullptr; + void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr) const { if (isReg()) { OS << PrintReg(Contents.R.Reg, TRI, Contents.R.Sub); } if (isImm()) { OS << Contents.ImmVal; } } @@ -282,18 +339,10 @@ INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops", "Hexagon Hardware Loops", false, false) - -/// \brief Returns true if the instruction is a hardware loop instruction. -static bool isHardwareLoop(const MachineInstr *MI) { - return MI->getOpcode() == Hexagon::J2_loop0r || - MI->getOpcode() == Hexagon::J2_loop0i; -} - FunctionPass *llvm::createHexagonHardwareLoops() { return new HexagonHardwareLoops(); } - bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n"); @@ -302,22 +351,30 @@ bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) { MLI = &getAnalysis<MachineLoopInfo>(); MRI = &MF.getRegInfo(); MDT = &getAnalysis<MachineDominatorTree>(); - TM = static_cast<const HexagonTargetMachine*>(&MF.getTarget()); - TII = static_cast<const HexagonInstrInfo *>( - TM->getSubtargetImpl()->getInstrInfo()); - TRI = static_cast<const HexagonRegisterInfo *>( - TM->getSubtargetImpl()->getRegisterInfo()); + TII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); - for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); - I != E; ++I) { - MachineLoop *L = *I; - if (!L->getParentLoop()) - Changed |= convertToHardwareLoop(L); - } + for (auto &L : *MLI) + if (!L->getParentLoop()) { + bool L0Used = false; + bool L1Used = false; + Changed |= convertToHardwareLoop(L, L0Used, L1Used); + } return Changed; } +/// \brief Return the latch block if it's one of the exiting blocks. Otherwise, +/// return the exiting block. Return 'null' when multiple exiting blocks are +/// present. +static MachineBasicBlock* getExitingBlock(MachineLoop *L) { + if (MachineBasicBlock *Latch = L->getLoopLatch()) { + if (L->isLoopExiting(Latch)) + return Latch; + else + return L->getExitingBlock(); + } + return nullptr; +} bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L, unsigned &Reg, @@ -327,7 +384,8 @@ bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L, MachineBasicBlock *Header = L->getHeader(); MachineBasicBlock *Preheader = L->getLoopPreheader(); MachineBasicBlock *Latch = L->getLoopLatch(); - if (!Header || !Preheader || !Latch) + MachineBasicBlock *ExitingBlock = getExitingBlock(L); + if (!Header || !Preheader || !Latch || !ExitingBlock) return false; // This pair represents an induction register together with an immediate @@ -357,15 +415,16 @@ bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L, unsigned PhiOpReg = Phi->getOperand(i).getReg(); MachineInstr *DI = MRI->getVRegDef(PhiOpReg); unsigned UpdOpc = DI->getOpcode(); - bool isAdd = (UpdOpc == Hexagon::ADD_ri); + bool isAdd = (UpdOpc == Hexagon::A2_addi || UpdOpc == Hexagon::A2_addp); if (isAdd) { - // If the register operand to the add is the PHI we're - // looking at, this meets the induction pattern. + // If the register operand to the add is the PHI we're looking at, this + // meets the induction pattern. unsigned IndReg = DI->getOperand(1).getReg(); - if (MRI->getVRegDef(IndReg) == Phi) { + MachineOperand &Opnd2 = DI->getOperand(2); + int64_t V; + if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) { unsigned UpdReg = DI->getOperand(0).getReg(); - int64_t V = DI->getOperand(2).getImm(); IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V))); } } @@ -374,13 +433,13 @@ bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L, SmallVector<MachineOperand,2> Cond; MachineBasicBlock *TB = nullptr, *FB = nullptr; - bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false); + bool NotAnalyzed = TII->AnalyzeBranch(*ExitingBlock, TB, FB, Cond, false); if (NotAnalyzed) return false; - unsigned CSz = Cond.size(); - assert (CSz == 1 || CSz == 2); - unsigned PredR = Cond[CSz-1].getReg(); + unsigned PredR, PredPos, PredRegFlags; + if (!TII->getPredReg(Cond, PredR, PredPos, PredRegFlags)) + return false; MachineInstr *PredI = MRI->getVRegDef(PredR); if (!PredI->isCompare()) @@ -392,7 +451,7 @@ bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L, CmpMask, CmpImm); // Fail if the compare was not analyzed, or it's not comparing a register // with an immediate value. Not checking the mask here, since we handle - // the individual compare opcodes (including CMPb) later on. + // the individual compare opcodes (including A4_cmpb*) later on. if (!CmpAnalyzed) return false; @@ -422,6 +481,44 @@ bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L, return true; } +// Return the comparison kind for the specified opcode. +HexagonHardwareLoops::Comparison::Kind +HexagonHardwareLoops::getComparisonKind(unsigned CondOpc, + MachineOperand *InitialValue, + const MachineOperand *EndValue, + int64_t IVBump) const { + Comparison::Kind Cmp = (Comparison::Kind)0; + switch (CondOpc) { + case Hexagon::C2_cmpeqi: + case Hexagon::C2_cmpeq: + case Hexagon::C2_cmpeqp: + Cmp = Comparison::EQ; + break; + case Hexagon::C4_cmpneq: + case Hexagon::C4_cmpneqi: + Cmp = Comparison::NE; + break; + case Hexagon::C4_cmplte: + Cmp = Comparison::LEs; + break; + case Hexagon::C4_cmplteu: + Cmp = Comparison::LEu; + break; + case Hexagon::C2_cmpgtui: + case Hexagon::C2_cmpgtu: + case Hexagon::C2_cmpgtup: + Cmp = Comparison::GTu; + break; + case Hexagon::C2_cmpgti: + case Hexagon::C2_cmpgt: + case Hexagon::C2_cmpgtp: + Cmp = Comparison::GTs; + break; + default: + return (Comparison::Kind)0; + } + return Cmp; +} /// \brief Analyze the statements in a loop to determine if the loop has /// a computable trip count and, if so, return a value that represents @@ -431,7 +528,7 @@ bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L, /// induction variable patterns that are used in the calculation for /// the number of time the loop is executed. CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, - SmallVectorImpl<MachineInstr *> &OldInsts) { + SmallVectorImpl<MachineInstr *> &OldInsts) { MachineBasicBlock *TopMBB = L->getTopBlock(); MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(); assert(PI != TopMBB->pred_end() && @@ -455,8 +552,8 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, // Look for the cmp instruction to determine if we can get a useful trip // count. The trip count can be either a register or an immediate. The // location of the value depends upon the type (reg or imm). - MachineBasicBlock *Latch = L->getLoopLatch(); - if (!Latch) + MachineBasicBlock *ExitingBlock = getExitingBlock(L); + if (!ExitingBlock) return nullptr; unsigned IVReg = 0; @@ -470,6 +567,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, MachineOperand *InitialValue = nullptr; MachineInstr *IV_Phi = MRI->getVRegDef(IVReg); + MachineBasicBlock *Latch = L->getLoopLatch(); for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) { MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB(); if (MBB == Preheader) @@ -482,7 +580,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, SmallVector<MachineOperand,2> Cond; MachineBasicBlock *TB = nullptr, *FB = nullptr; - bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false); + bool NotAnalyzed = TII->AnalyzeBranch(*ExitingBlock, TB, FB, Cond, false); if (NotAnalyzed) return nullptr; @@ -490,7 +588,18 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, // TB must be non-null. If FB is also non-null, one of them must be // the header. Otherwise, branch to TB could be exiting the loop, and // the fall through can go to the header. - assert (TB && "Latch block without a branch?"); + assert (TB && "Exit block without a branch?"); + if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) { + MachineBasicBlock *LTB = 0, *LFB = 0; + SmallVector<MachineOperand,2> LCond; + bool NotAnalyzed = TII->AnalyzeBranch(*Latch, LTB, LFB, LCond, false); + if (NotAnalyzed) + return nullptr; + if (TB == Latch) + TB = (LTB == Header) ? LTB : LFB; + else + FB = (LTB == Header) ? LTB: LFB; + } assert ((!FB || TB == Header || FB == Header) && "Branches not to header?"); if (!TB || (FB && TB != Header && FB != Header)) return nullptr; @@ -499,8 +608,10 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, // to put imm(0), followed by P in the vector Cond. // If TB is not the header, it means that the "not-taken" path must lead // to the header. - bool Negated = (Cond.size() > 1) ^ (TB != Header); - unsigned PredReg = Cond[Cond.size()-1].getReg(); + bool Negated = TII->predOpcodeHasNot(Cond) ^ (TB != Header); + unsigned PredReg, PredPos, PredRegFlags; + if (!TII->getPredReg(Cond, PredReg, PredPos, PredRegFlags)) + return nullptr; MachineInstr *CondI = MRI->getVRegDef(PredReg); unsigned CondOpc = CondI->getOpcode(); @@ -539,57 +650,13 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, if (!EndValue) return nullptr; - switch (CondOpc) { - case Hexagon::C2_cmpeqi: - case Hexagon::C2_cmpeq: - Cmp = !Negated ? Comparison::EQ : Comparison::NE; - break; - case Hexagon::C2_cmpgtui: - case Hexagon::C2_cmpgtu: - Cmp = !Negated ? Comparison::GTu : Comparison::LEu; - break; - case Hexagon::C2_cmpgti: - case Hexagon::C2_cmpgt: - Cmp = !Negated ? Comparison::GTs : Comparison::LEs; - break; - // Very limited support for byte/halfword compares. - case Hexagon::CMPbEQri_V4: - case Hexagon::CMPhEQri_V4: { - if (IVBump != 1) - return nullptr; - - int64_t InitV, EndV; - // Since the comparisons are "ri", the EndValue should be an - // immediate. Check it just in case. - assert(EndValue->isImm() && "Unrecognized latch comparison"); - EndV = EndValue->getImm(); - // Allow InitialValue to be a register defined with an immediate. - if (InitialValue->isReg()) { - if (!defWithImmediate(InitialValue->getReg())) - return nullptr; - InitV = getImmediate(*InitialValue); - } else { - assert(InitialValue->isImm()); - InitV = InitialValue->getImm(); - } - if (InitV >= EndV) - return nullptr; - if (CondOpc == Hexagon::CMPbEQri_V4) { - if (!isInt<8>(InitV) || !isInt<8>(EndV)) - return nullptr; - } else { // Hexagon::CMPhEQri_V4 - if (!isInt<16>(InitV) || !isInt<16>(EndV)) - return nullptr; - } - Cmp = !Negated ? Comparison::EQ : Comparison::NE; - break; - } - default: - return nullptr; - } - + Cmp = getComparisonKind(CondOpc, InitialValue, EndValue, IVBump); + if (!Cmp) + return nullptr; + if (Negated) + Cmp = Comparison::getNegatedComparison(Cmp); if (isSwapped) - Cmp = Comparison::getSwappedComparison(Cmp); + Cmp = Comparison::getSwappedComparison(Cmp); if (InitialValue->isReg()) { unsigned R = InitialValue->getReg(); @@ -603,6 +670,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent(); if (!MDT->properlyDominates(DefBB, Header)) return nullptr; + OldInsts.push_back(MRI->getVRegDef(R)); } return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp); @@ -626,32 +694,45 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, // If so, use the immediate value rather than the register. if (Start->isReg()) { const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg()); - if (StartValInstr && StartValInstr->getOpcode() == Hexagon::A2_tfrsi) + if (StartValInstr && (StartValInstr->getOpcode() == Hexagon::A2_tfrsi || + StartValInstr->getOpcode() == Hexagon::A2_tfrpi)) Start = &StartValInstr->getOperand(1); } if (End->isReg()) { const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg()); - if (EndValInstr && EndValInstr->getOpcode() == Hexagon::A2_tfrsi) + if (EndValInstr && (EndValInstr->getOpcode() == Hexagon::A2_tfrsi || + EndValInstr->getOpcode() == Hexagon::A2_tfrpi)) End = &EndValInstr->getOperand(1); } - assert (Start->isReg() || Start->isImm()); - assert (End->isReg() || End->isImm()); + if (!Start->isReg() && !Start->isImm()) + return nullptr; + if (!End->isReg() && !End->isImm()) + return nullptr; bool CmpLess = Cmp & Comparison::L; bool CmpGreater = Cmp & Comparison::G; bool CmpHasEqual = Cmp & Comparison::EQ; // Avoid certain wrap-arounds. This doesn't detect all wrap-arounds. - // If loop executes while iv is "less" with the iv value going down, then - // the iv must wrap. if (CmpLess && IVBump < 0) + // Loop going while iv is "less" with the iv value going down. Must wrap. return nullptr; - // If loop executes while iv is "greater" with the iv value going up, then - // the iv must wrap. + if (CmpGreater && IVBump > 0) + // Loop going while iv is "greater" with the iv value going up. Must wrap. return nullptr; + // Phis that may feed into the loop. + LoopFeederMap LoopFeederPhi; + + // Check if the inital value may be zero and can be decremented in the first + // iteration. If the value is zero, the endloop instruction will not decrement + // the loop counter, so we shoudn't generate a hardware loop in this case. + if (loopCountMayWrapOrUnderFlow(Start, End, Loop->getLoopPreheader(), Loop, + LoopFeederPhi)) + return nullptr; + if (Start->isImm() && End->isImm()) { // Both, start and end are immediates. int64_t StartV = Start->getImm(); @@ -674,14 +755,16 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, if (CmpHasEqual) Dist = Dist > 0 ? Dist+1 : Dist-1; - // assert (CmpLess => Dist > 0); - assert ((!CmpLess || Dist > 0) && "Loop should never iterate!"); - // assert (CmpGreater => Dist < 0); - assert ((!CmpGreater || Dist < 0) && "Loop should never iterate!"); + // For the loop to iterate, CmpLess should imply Dist > 0. Similarly, + // CmpGreater should imply Dist < 0. These conditions could actually + // fail, for example, in unreachable code (which may still appear to be + // reachable in the CFG). + if ((CmpLess && Dist < 0) || (CmpGreater && Dist > 0)) + return nullptr; // "Normalized" distance, i.e. with the bump set to +-1. - int64_t Dist1 = (IVBump > 0) ? (Dist + (IVBump-1)) / IVBump - : (-Dist + (-IVBump-1)) / (-IVBump); + int64_t Dist1 = (IVBump > 0) ? (Dist + (IVBump - 1)) / IVBump + : (-Dist + (-IVBump - 1)) / (-IVBump); assert (Dist1 > 0 && "Fishy thing. Both operands have the same sign."); uint64_t Count = Dist1; @@ -698,14 +781,15 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, // If the induction variable bump is not a power of 2, quit. // Othwerise we'd need a general integer division. - if (!isPowerOf2_64(abs64(IVBump))) + if (!isPowerOf2_64(std::abs(IVBump))) return nullptr; MachineBasicBlock *PH = Loop->getLoopPreheader(); assert (PH && "Should have a preheader by now"); MachineBasicBlock::iterator InsertPos = PH->getFirstTerminator(); - DebugLoc DL = (InsertPos != PH->end()) ? InsertPos->getDebugLoc() - : DebugLoc(); + DebugLoc DL; + if (InsertPos != PH->end()) + DL = InsertPos->getDebugLoc(); // If Start is an immediate and End is a register, the trip count // will be "reg - imm". Hexagon's "subtract immediate" instruction @@ -782,23 +866,37 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, DistSR = End->getSubReg(); } else { const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::A2_sub) : - (RegToImm ? TII->get(Hexagon::SUB_ri) : - TII->get(Hexagon::ADD_ri)); - unsigned SubR = MRI->createVirtualRegister(IntRC); - MachineInstrBuilder SubIB = - BuildMI(*PH, InsertPos, DL, SubD, SubR); - - if (RegToReg) { - SubIB.addReg(End->getReg(), 0, End->getSubReg()) - .addReg(Start->getReg(), 0, Start->getSubReg()); - } else if (RegToImm) { - SubIB.addImm(EndV) - .addReg(Start->getReg(), 0, Start->getSubReg()); - } else { // ImmToReg - SubIB.addReg(End->getReg(), 0, End->getSubReg()) - .addImm(-StartV); + (RegToImm ? TII->get(Hexagon::A2_subri) : + TII->get(Hexagon::A2_addi)); + if (RegToReg || RegToImm) { + unsigned SubR = MRI->createVirtualRegister(IntRC); + MachineInstrBuilder SubIB = + BuildMI(*PH, InsertPos, DL, SubD, SubR); + + if (RegToReg) + SubIB.addReg(End->getReg(), 0, End->getSubReg()) + .addReg(Start->getReg(), 0, Start->getSubReg()); + else + SubIB.addImm(EndV) + .addReg(Start->getReg(), 0, Start->getSubReg()); + DistR = SubR; + } else { + // If the loop has been unrolled, we should use the original loop count + // instead of recalculating the value. This will avoid additional + // 'Add' instruction. + const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg()); + if (EndValInstr->getOpcode() == Hexagon::A2_addi && + EndValInstr->getOperand(2).getImm() == StartV) { + DistR = EndValInstr->getOperand(1).getReg(); + } else { + unsigned SubR = MRI->createVirtualRegister(IntRC); + MachineInstrBuilder SubIB = + BuildMI(*PH, InsertPos, DL, SubD, SubR); + SubIB.addReg(End->getReg(), 0, End->getSubReg()) + .addImm(-StartV); + DistR = SubR; + } } - DistR = SubR; DistSR = 0; } @@ -811,7 +909,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, } else { // Generate CountR = ADD DistR, AdjVal unsigned AddR = MRI->createVirtualRegister(IntRC); - const MCInstrDesc &AddD = TII->get(Hexagon::ADD_ri); + MCInstrDesc const &AddD = TII->get(Hexagon::A2_addi); BuildMI(*PH, InsertPos, DL, AddD, AddR) .addReg(DistR, 0, DistSR) .addImm(AdjV); @@ -844,50 +942,50 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, return new CountValue(CountValue::CV_Register, CountR, CountSR); } - /// \brief Return true if the operation is invalid within hardware loop. -bool HexagonHardwareLoops::isInvalidLoopOperation( - const MachineInstr *MI) const { +bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI, + bool IsInnerHWLoop) const { - // call is not allowed because the callee may use a hardware loop - if (MI->getDesc().isCall()) + // Call is not allowed because the callee may use a hardware loop except for + // the case when the call never returns. + if (MI->getDesc().isCall() && MI->getOpcode() != Hexagon::CALLv3nr) return true; - // do not allow nested hardware loops - if (isHardwareLoop(MI)) - return true; - - // check if the instruction defines a hardware loop register + // Check if the instruction defines a hardware loop register. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; unsigned R = MO.getReg(); - if (R == Hexagon::LC0 || R == Hexagon::LC1 || - R == Hexagon::SA0 || R == Hexagon::SA1) + if (IsInnerHWLoop && (R == Hexagon::LC0 || R == Hexagon::SA0 || + R == Hexagon::LC1 || R == Hexagon::SA1)) + return true; + if (!IsInnerHWLoop && (R == Hexagon::LC1 || R == Hexagon::SA1)) return true; } return false; } - -/// \brief - Return true if the loop contains an instruction that inhibits -/// the use of the hardware loop function. -bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L) const { +/// \brief Return true if the loop contains an instruction that inhibits +/// the use of the hardware loop instruction. +bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L, + bool IsInnerHWLoop) const { const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks(); + DEBUG(dbgs() << "\nhw_loop head, BB#" << Blocks[0]->getNumber();); for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { MachineBasicBlock *MBB = Blocks[i]; for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) { const MachineInstr *MI = &*MII; - if (isInvalidLoopOperation(MI)) + if (isInvalidLoopOperation(MI, IsInnerHWLoop)) { + DEBUG(dbgs()<< "\nCannot convert to hw_loop due to:"; MI->dump();); return true; + } } } return false; } - /// \brief Returns true if the instruction is dead. This was essentially /// copied from DeadMachineInstructionElim::isDead, but with special cases /// for inline asm, physical registers and instructions with side effects @@ -928,7 +1026,7 @@ bool HexagonHardwareLoops::isDead(const MachineInstr *MI, MachineOperand &Use = *J; MachineInstr *UseMI = Use.getParent(); - // If the phi node has a user that is not MI, bail... + // If the phi node has a user that is not MI, bail. if (MI != UseMI) return false; } @@ -965,8 +1063,6 @@ void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) { continue; if (Use.isDebug()) UseMI->getOperand(0).setReg(0U); - // This may also be a "instr -> phi -> instr" case which can - // be removed too. } } @@ -984,19 +1080,47 @@ void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) { /// /// The code makes several assumptions about the representation of the loop /// in llvm. -bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) { +bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L, + bool &RecL0used, + bool &RecL1used) { // This is just for sanity. assert(L->getHeader() && "Loop without a header?"); bool Changed = false; + bool L0Used = false; + bool L1Used = false; + // Process nested loops first. - for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) - Changed |= convertToHardwareLoop(*I); + for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) { + Changed |= convertToHardwareLoop(*I, RecL0used, RecL1used); + L0Used |= RecL0used; + L1Used |= RecL1used; + } // If a nested loop has been converted, then we can't convert this loop. - if (Changed) + if (Changed && L0Used && L1Used) return Changed; + unsigned LOOP_i; + unsigned LOOP_r; + unsigned ENDLOOP; + + // Flag used to track loopN instruction: + // 1 - Hardware loop is being generated for the inner most loop. + // 0 - Hardware loop is being generated for the outer loop. + unsigned IsInnerHWLoop = 1; + + if (L0Used) { + LOOP_i = Hexagon::J2_loop1i; + LOOP_r = Hexagon::J2_loop1r; + ENDLOOP = Hexagon::ENDLOOP1; + IsInnerHWLoop = 0; + } else { + LOOP_i = Hexagon::J2_loop0i; + LOOP_r = Hexagon::J2_loop0r; + ENDLOOP = Hexagon::ENDLOOP0; + } + #ifndef NDEBUG // Stop trying after reaching the limit (if any). int Limit = HWLoopLimit; @@ -1008,14 +1132,10 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) { #endif // Does the loop contain any invalid instructions? - if (containsInvalidInstruction(L)) + if (containsInvalidInstruction(L, IsInnerHWLoop)) return false; - // Is the induction variable bump feeding the latch condition? - if (!fixupInductionVariable(L)) - return false; - - MachineBasicBlock *LastMBB = L->getExitingBlock(); + MachineBasicBlock *LastMBB = getExitingBlock(L); // Don't generate hw loop if the loop has more than one exit. if (!LastMBB) return false; @@ -1024,16 +1144,19 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) { if (LastI == LastMBB->end()) return false; + // Is the induction variable bump feeding the latch condition? + if (!fixupInductionVariable(L)) + return false; + // Ensure the loop has a preheader: the loop instruction will be // placed there. - bool NewPreheader = false; MachineBasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { Preheader = createPreheaderForLoop(L); if (!Preheader) return false; - NewPreheader = true; } + MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator(); SmallVector<MachineInstr*, 2> OldInsts; @@ -1048,31 +1171,30 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) { // so make sure that the register is actually defined at that point. MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg()); MachineBasicBlock *BBDef = TCDef->getParent(); - if (!NewPreheader) { - if (!MDT->dominates(BBDef, Preheader)) - return false; - } else { - // If we have just created a preheader, the dominator tree won't be - // aware of it. Check if the definition of the register dominates - // the header, but is not the header itself. - if (!MDT->properlyDominates(BBDef, L->getHeader())) - return false; - } + if (!MDT->dominates(BBDef, Preheader)) + return false; } // Determine the loop start. - MachineBasicBlock *LoopStart = L->getTopBlock(); - if (L->getLoopLatch() != LastMBB) { - // When the exit and latch are not the same, use the latch block as the - // start. - // The loop start address is used only after the 1st iteration, and the - // loop latch may contains instrs. that need to be executed after the - // first iteration. - LoopStart = L->getLoopLatch(); - // Make sure the latch is a successor of the exit, otherwise it won't work. - if (!LastMBB->isSuccessor(LoopStart)) + MachineBasicBlock *TopBlock = L->getTopBlock(); + MachineBasicBlock *ExitingBlock = getExitingBlock(L); + MachineBasicBlock *LoopStart = 0; + if (ExitingBlock != L->getLoopLatch()) { + MachineBasicBlock *TB = 0, *FB = 0; + SmallVector<MachineOperand, 2> Cond; + + if (TII->AnalyzeBranch(*ExitingBlock, TB, FB, Cond, false)) + return false; + + if (L->contains(TB)) + LoopStart = TB; + else if (L->contains(FB)) + LoopStart = FB; + else return false; } + else + LoopStart = TopBlock; // Convert the loop to a hardware loop. DEBUG(dbgs() << "Change to hardware loop at "; L->dump()); @@ -1086,8 +1208,7 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) { BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg) .addReg(TripCount->getReg(), 0, TripCount->getSubReg()); // Add the Loop instruction to the beginning of the loop. - BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::J2_loop0r)) - .addMBB(LoopStart) + BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)).addMBB(LoopStart) .addReg(CountReg); } else { assert(TripCount->isImm() && "Expecting immediate value for trip count"); @@ -1095,14 +1216,14 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) { // if the immediate fits in the instructions. Otherwise, we need to // create a new virtual register. int64_t CountImm = TripCount->getImm(); - if (!TII->isValidOffset(Hexagon::J2_loop0i, CountImm)) { + if (!TII->isValidOffset(LOOP_i, CountImm)) { unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::A2_tfrsi), CountReg) .addImm(CountImm); - BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::J2_loop0r)) + BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)) .addMBB(LoopStart).addReg(CountReg); } else - BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::J2_loop0i)) + BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_i)) .addMBB(LoopStart).addImm(CountImm); } @@ -1116,8 +1237,7 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) { // Replace the loop branch with an endloop instruction. DebugLoc LastIDL = LastI->getDebugLoc(); - BuildMI(*LastMBB, LastI, LastIDL, - TII->get(Hexagon::ENDLOOP0)).addMBB(LoopStart); + BuildMI(*LastMBB, LastI, LastIDL, TII->get(ENDLOOP)).addMBB(LoopStart); // The loop ends with either: // - a conditional branch followed by an unconditional branch, or @@ -1145,10 +1265,18 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) { removeIfDead(OldInsts[i]); ++NumHWLoops; + + // Set RecL1used and RecL0used only after hardware loop has been + // successfully generated. Doing it earlier can cause wrong loop instruction + // to be used. + if (L0Used) // Loop0 was already used. So, the correct loop must be loop1. + RecL1used = true; + else + RecL0used = true; + return true; } - bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI) { assert (BumpI != CmpI && "Bump and compare in the same instruction?"); @@ -1189,35 +1317,226 @@ bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI, return FoundBump; } +/// This function is required to break recursion. Visiting phis in a loop may +/// result in recursion during compilation. We break the recursion by making +/// sure that we visit a MachineOperand and its definition in a +/// MachineInstruction only once. If we attempt to visit more than once, then +/// there is recursion, and will return false. +bool HexagonHardwareLoops::isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, + MachineInstr *MI, + const MachineOperand *MO, + LoopFeederMap &LoopFeederPhi) const { + if (LoopFeederPhi.find(MO->getReg()) == LoopFeederPhi.end()) { + const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks(); + DEBUG(dbgs() << "\nhw_loop head, BB#" << Blocks[0]->getNumber();); + // Ignore all BBs that form Loop. + for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { + MachineBasicBlock *MBB = Blocks[i]; + if (A == MBB) + return false; + } + MachineInstr *Def = MRI->getVRegDef(MO->getReg()); + LoopFeederPhi.insert(std::make_pair(MO->getReg(), Def)); + return true; + } else + // Already visited node. + return false; +} + +/// Return true if a Phi may generate a value that can underflow. +/// This function calls loopCountMayWrapOrUnderFlow for each Phi operand. +bool HexagonHardwareLoops::phiMayWrapOrUnderflow( + MachineInstr *Phi, const MachineOperand *EndVal, MachineBasicBlock *MBB, + MachineLoop *L, LoopFeederMap &LoopFeederPhi) const { + assert(Phi->isPHI() && "Expecting a Phi."); + // Walk through each Phi, and its used operands. Make sure that + // if there is recursion in Phi, we won't generate hardware loops. + for (int i = 1, n = Phi->getNumOperands(); i < n; i += 2) + if (isLoopFeeder(L, MBB, Phi, &(Phi->getOperand(i)), LoopFeederPhi)) + if (loopCountMayWrapOrUnderFlow(&(Phi->getOperand(i)), EndVal, + Phi->getParent(), L, LoopFeederPhi)) + return true; + return false; +} + +/// Return true if the induction variable can underflow in the first iteration. +/// An example, is an initial unsigned value that is 0 and is decrement in the +/// first itertion of a do-while loop. In this case, we cannot generate a +/// hardware loop because the endloop instruction does not decrement the loop +/// counter if it is <= 1. We only need to perform this analysis if the +/// initial value is a register. +/// +/// This function assumes the initial value may underfow unless proven +/// otherwise. If the type is signed, then we don't care because signed +/// underflow is undefined. We attempt to prove the initial value is not +/// zero by perfoming a crude analysis of the loop counter. This function +/// checks if the initial value is used in any comparison prior to the loop +/// and, if so, assumes the comparison is a range check. This is inexact, +/// but will catch the simple cases. +bool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow( + const MachineOperand *InitVal, const MachineOperand *EndVal, + MachineBasicBlock *MBB, MachineLoop *L, + LoopFeederMap &LoopFeederPhi) const { + // Only check register values since they are unknown. + if (!InitVal->isReg()) + return false; + + if (!EndVal->isImm()) + return false; + + // A register value that is assigned an immediate is a known value, and it + // won't underflow in the first iteration. + int64_t Imm; + if (checkForImmediate(*InitVal, Imm)) + return (EndVal->getImm() == Imm); + + unsigned Reg = InitVal->getReg(); + + // We don't know the value of a physical register. + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + return true; + + MachineInstr *Def = MRI->getVRegDef(Reg); + if (!Def) + return true; -MachineInstr *HexagonHardwareLoops::defWithImmediate(unsigned R) { + // If the initial value is a Phi or copy and the operands may not underflow, + // then the definition cannot be underflow either. + if (Def->isPHI() && !phiMayWrapOrUnderflow(Def, EndVal, Def->getParent(), + L, LoopFeederPhi)) + return false; + if (Def->isCopy() && !loopCountMayWrapOrUnderFlow(&(Def->getOperand(1)), + EndVal, Def->getParent(), + L, LoopFeederPhi)) + return false; + + // Iterate over the uses of the initial value. If the initial value is used + // in a compare, then we assume this is a range check that ensures the loop + // doesn't underflow. This is not an exact test and should be improved. + for (MachineRegisterInfo::use_instr_nodbg_iterator I = MRI->use_instr_nodbg_begin(Reg), + E = MRI->use_instr_nodbg_end(); I != E; ++I) { + MachineInstr *MI = &*I; + unsigned CmpReg1 = 0, CmpReg2 = 0; + int CmpMask = 0, CmpValue = 0; + + if (!TII->analyzeCompare(MI, CmpReg1, CmpReg2, CmpMask, CmpValue)) + continue; + + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 2> Cond; + if (TII->AnalyzeBranch(*MI->getParent(), TBB, FBB, Cond, false)) + continue; + + Comparison::Kind Cmp = getComparisonKind(MI->getOpcode(), 0, 0, 0); + if (Cmp == 0) + continue; + if (TII->predOpcodeHasNot(Cond) ^ (TBB != MBB)) + Cmp = Comparison::getNegatedComparison(Cmp); + if (CmpReg2 != 0 && CmpReg2 == Reg) + Cmp = Comparison::getSwappedComparison(Cmp); + + // Signed underflow is undefined. + if (Comparison::isSigned(Cmp)) + return false; + + // Check if there is a comparison of the inital value. If the initial value + // is greater than or not equal to another value, then assume this is a + // range check. + if ((Cmp & Comparison::G) || Cmp == Comparison::NE) + return false; + } + + // OK - this is a hack that needs to be improved. We really need to analyze + // the instructions performed on the initial value. This works on the simplest + // cases only. + if (!Def->isCopy() && !Def->isPHI()) + return false; + + return true; +} + +bool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO, + int64_t &Val) const { + if (MO.isImm()) { + Val = MO.getImm(); + return true; + } + if (!MO.isReg()) + return false; + + // MO is a register. Check whether it is defined as an immediate value, + // and if so, get the value of it in TV. That value will then need to be + // processed to handle potential subregisters in MO. + int64_t TV; + + unsigned R = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(R)) + return false; MachineInstr *DI = MRI->getVRegDef(R); unsigned DOpc = DI->getOpcode(); switch (DOpc) { + case TargetOpcode::COPY: case Hexagon::A2_tfrsi: case Hexagon::A2_tfrpi: case Hexagon::CONST32_Int_Real: - case Hexagon::CONST64_Int_Real: - return DI; - } - return nullptr; -} + case Hexagon::CONST64_Int_Real: { + // Call recursively to avoid an extra check whether operand(1) is + // indeed an immediate (it could be a global address, for example), + // plus we can handle COPY at the same time. + if (!checkForImmediate(DI->getOperand(1), TV)) + return false; + break; + } + case Hexagon::A2_combineii: + case Hexagon::A4_combineir: + case Hexagon::A4_combineii: + case Hexagon::A4_combineri: + case Hexagon::A2_combinew: { + const MachineOperand &S1 = DI->getOperand(1); + const MachineOperand &S2 = DI->getOperand(2); + int64_t V1, V2; + if (!checkForImmediate(S1, V1) || !checkForImmediate(S2, V2)) + return false; + TV = V2 | (V1 << 32); + break; + } + case TargetOpcode::REG_SEQUENCE: { + const MachineOperand &S1 = DI->getOperand(1); + const MachineOperand &S3 = DI->getOperand(3); + int64_t V1, V3; + if (!checkForImmediate(S1, V1) || !checkForImmediate(S3, V3)) + return false; + unsigned Sub2 = DI->getOperand(2).getImm(); + unsigned Sub4 = DI->getOperand(4).getImm(); + if (Sub2 == Hexagon::subreg_loreg && Sub4 == Hexagon::subreg_hireg) + TV = V1 | (V3 << 32); + else if (Sub2 == Hexagon::subreg_hireg && Sub4 == Hexagon::subreg_loreg) + TV = V3 | (V1 << 32); + else + llvm_unreachable("Unexpected form of REG_SEQUENCE"); + break; + } + default: + return false; + } -int64_t HexagonHardwareLoops::getImmediate(MachineOperand &MO) { - if (MO.isImm()) - return MO.getImm(); - assert(MO.isReg()); - unsigned R = MO.getReg(); - MachineInstr *DI = defWithImmediate(R); - assert(DI && "Need an immediate operand"); - // All currently supported "define-with-immediate" instructions have the - // actual immediate value in the operand(1). - int64_t v = DI->getOperand(1).getImm(); - return v; + // By now, we should have successfuly obtained the immediate value defining + // the register referenced in MO. Handle a potential use of a subregister. + switch (MO.getSubReg()) { + case Hexagon::subreg_loreg: + Val = TV & 0xFFFFFFFFULL; + break; + case Hexagon::subreg_hireg: + Val = (TV >> 32) & 0xFFFFFFFFULL; + break; + default: + Val = TV; + break; + } + return true; } - void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) { if (MO.isImm()) { MO.setImm(Val); @@ -1226,30 +1545,32 @@ void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) { assert(MO.isReg()); unsigned R = MO.getReg(); - MachineInstr *DI = defWithImmediate(R); - if (MRI->hasOneNonDBGUse(R)) { - // If R has only one use, then just change its defining instruction to - // the new immediate value. - DI->getOperand(1).setImm(Val); - return; - } + MachineInstr *DI = MRI->getVRegDef(R); const TargetRegisterClass *RC = MRI->getRegClass(R); unsigned NewR = MRI->createVirtualRegister(RC); MachineBasicBlock &B = *DI->getParent(); DebugLoc DL = DI->getDebugLoc(); - BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR) - .addImm(Val); + BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR).addImm(Val); MO.setReg(NewR); } +static bool isImmValidForOpcode(unsigned CmpOpc, int64_t Imm) { + // These two instructions are not extendable. + if (CmpOpc == Hexagon::A4_cmpbeqi) + return isUInt<8>(Imm); + if (CmpOpc == Hexagon::A4_cmpbgti) + return isInt<8>(Imm); + // The rest of the comparison-with-immediate instructions are extendable. + return true; +} bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { MachineBasicBlock *Header = L->getHeader(); - MachineBasicBlock *Preheader = L->getLoopPreheader(); MachineBasicBlock *Latch = L->getLoopLatch(); + MachineBasicBlock *ExitingBlock = getExitingBlock(L); - if (!Header || !Preheader || !Latch) + if (!(Header && Latch && ExitingBlock)) return false; // These data structures follow the same concept as the corresponding @@ -1277,15 +1598,16 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { unsigned PhiReg = Phi->getOperand(i).getReg(); MachineInstr *DI = MRI->getVRegDef(PhiReg); unsigned UpdOpc = DI->getOpcode(); - bool isAdd = (UpdOpc == Hexagon::ADD_ri); + bool isAdd = (UpdOpc == Hexagon::A2_addi || UpdOpc == Hexagon::A2_addp); if (isAdd) { // If the register operand to the add/sub is the PHI we are looking // at, this meets the induction pattern. unsigned IndReg = DI->getOperand(1).getReg(); - if (MRI->getVRegDef(IndReg) == Phi) { + MachineOperand &Opnd2 = DI->getOperand(2); + int64_t V; + if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) { unsigned UpdReg = DI->getOperand(0).getReg(); - int64_t V = DI->getOperand(2).getImm(); IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V))); } } @@ -1298,17 +1620,38 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { MachineBasicBlock *TB = nullptr, *FB = nullptr; SmallVector<MachineOperand,2> Cond; // AnalyzeBranch returns true if it fails to analyze branch. - bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false); - if (NotAnalyzed) + bool NotAnalyzed = TII->AnalyzeBranch(*ExitingBlock, TB, FB, Cond, false); + if (NotAnalyzed || Cond.empty()) return false; - // Check if the latch branch is unconditional. - if (Cond.empty()) - return false; + if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) { + MachineBasicBlock *LTB = 0, *LFB = 0; + SmallVector<MachineOperand,2> LCond; + bool NotAnalyzed = TII->AnalyzeBranch(*Latch, LTB, LFB, LCond, false); + if (NotAnalyzed) + return false; - if (TB != Header && FB != Header) - // The latch does not go back to the header. Not a latch we know and love. - return false; + // Since latch is not the exiting block, the latch branch should be an + // unconditional branch to the loop header. + if (TB == Latch) + TB = (LTB == Header) ? LTB : LFB; + else + FB = (LTB == Header) ? LTB : LFB; + } + if (TB != Header) { + if (FB != Header) { + // The latch/exit block does not go back to the header. + return false; + } + // FB is the header (i.e., uncond. jump to branch header) + // In this case, the LoopBody -> TB should not be a back edge otherwise + // it could result in an infinite loop after conversion to hw_loop. + // This case can happen when the Latch has two jumps like this: + // Jmp_c OuterLoopHeader <-- TB + // Jmp InnerLoopHeader <-- FB + if (MDT->dominates(TB, FB)) + return false; + } // Expecting a predicate register as a condition. It won't be a hardware // predicate register at this point yet, just a vreg. @@ -1319,6 +1662,9 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { if (CSz != 1 && CSz != 2) return false; + if (!Cond[CSz-1].isReg()) + return false; + unsigned P = Cond[CSz-1].getReg(); MachineInstr *PredDef = MRI->getVRegDef(P); @@ -1340,8 +1686,7 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { if (MO.isImplicit()) continue; if (MO.isUse()) { - unsigned R = MO.getReg(); - if (!defWithImmediate(R)) { + if (!isImmediate(MO)) { CmpRegs.insert(MO.getReg()); continue; } @@ -1374,20 +1719,70 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { // compared against an immediate, we can fix it. const RegisterBump &RB = I->second; if (CmpRegs.count(RB.first)) { - if (!CmpImmOp) + if (!CmpImmOp) { + // If both operands to the compare instruction are registers, see if + // it can be changed to use induction register as one of the operands. + MachineInstr *IndI = nullptr; + MachineInstr *nonIndI = nullptr; + MachineOperand *IndMO = nullptr; + MachineOperand *nonIndMO = nullptr; + + for (unsigned i = 1, n = PredDef->getNumOperands(); i < n; ++i) { + MachineOperand &MO = PredDef->getOperand(i); + if (MO.isReg() && MO.getReg() == RB.first) { + DEBUG(dbgs() << "\n DefMI(" << i << ") = " + << *(MRI->getVRegDef(I->first))); + if (IndI) + return false; + + IndI = MRI->getVRegDef(I->first); + IndMO = &MO; + } else if (MO.isReg()) { + DEBUG(dbgs() << "\n DefMI(" << i << ") = " + << *(MRI->getVRegDef(MO.getReg()))); + if (nonIndI) + return false; + + nonIndI = MRI->getVRegDef(MO.getReg()); + nonIndMO = &MO; + } + } + if (IndI && nonIndI && + nonIndI->getOpcode() == Hexagon::A2_addi && + nonIndI->getOperand(2).isImm() && + nonIndI->getOperand(2).getImm() == - RB.second) { + bool Order = orderBumpCompare(IndI, PredDef); + if (Order) { + IndMO->setReg(I->first); + nonIndMO->setReg(nonIndI->getOperand(1).getReg()); + return true; + } + } return false; + } + // It is not valid to do this transformation on an unsigned comparison + // because it may underflow. + Comparison::Kind Cmp = getComparisonKind(PredDef->getOpcode(), 0, 0, 0); + if (!Cmp || Comparison::isUnsigned(Cmp)) + return false; + + // If the register is being compared against an immediate, try changing + // the compare instruction to use induction register and adjust the + // immediate operand. int64_t CmpImm = getImmediate(*CmpImmOp); int64_t V = RB.second; - if (V > 0 && CmpImm+V < CmpImm) // Overflow (64-bit). - return false; - if (V < 0 && CmpImm+V > CmpImm) // Overflow (64-bit). + // Handle Overflow (64-bit). + if (((V > 0) && (CmpImm > INT64_MAX - V)) || + ((V < 0) && (CmpImm < INT64_MIN - V))) return false; CmpImm += V; - // Some forms of cmp-immediate allow u9 and s10. Assume the worst case - // scenario, i.e. an 8-bit value. - if (CmpImmOp->isImm() && !isInt<8>(CmpImm)) - return false; + // Most comparisons of register against an immediate value allow + // the immediate to be constant-extended. There are some exceptions + // though. Make sure the new combination will work. + if (CmpImmOp->isImm()) + if (!isImmValidForOpcode(PredDef->getOpcode(), CmpImm)) + return false; // Make sure that the compare happens after the bump. Otherwise, // after the fixup, the compare would use a yet-undefined register. @@ -1411,19 +1806,27 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { return false; } - /// \brief Create a preheader for a given loop. MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop( MachineLoop *L) { if (MachineBasicBlock *TmpPH = L->getLoopPreheader()) return TmpPH; + if (!HWCreatePreheader) + return nullptr; + MachineBasicBlock *Header = L->getHeader(); MachineBasicBlock *Latch = L->getLoopLatch(); + MachineBasicBlock *ExitingBlock = getExitingBlock(L); MachineFunction *MF = Header->getParent(); DebugLoc DL; - if (!Latch || Header->hasAddressTaken()) +#ifndef NDEBUG + if ((PHFn != "") && (PHFn != MF->getName())) + return nullptr; +#endif + + if (!Latch || !ExitingBlock || Header->hasAddressTaken()) return nullptr; typedef MachineBasicBlock::instr_iterator instr_iterator; @@ -1435,16 +1838,14 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop( SmallVector<MachineOperand,2> Tmp1; MachineBasicBlock *TB = nullptr, *FB = nullptr; - if (TII->AnalyzeBranch(*Latch, TB, FB, Tmp1, false)) + if (TII->AnalyzeBranch(*ExitingBlock, TB, FB, Tmp1, false)) return nullptr; for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) { MachineBasicBlock *PB = *I; - if (PB != Latch) { - bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp1, false); - if (NotAnalyzed) - return nullptr; - } + bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp1, false); + if (NotAnalyzed) + return nullptr; } MachineBasicBlock *NewPH = MF->CreateMachineBasicBlock(); @@ -1453,7 +1854,7 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop( if (Header->pred_size() > 2) { // Ensure that the header has only two predecessors: the preheader and // the loop latch. Any additional predecessors of the header should - // join at the newly created preheader. Inspect all PHI nodes from the + // join at the newly created preheader. Inspect all PHI nodes from the // header and create appropriate corresponding PHI nodes in the preheader. for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); @@ -1473,11 +1874,14 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop( // created PHI node in the preheader. for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) { unsigned PredR = PN->getOperand(i).getReg(); + unsigned PredRSub = PN->getOperand(i).getSubReg(); MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB(); if (PredB == Latch) continue; - NewPN->addOperand(MachineOperand::CreateReg(PredR, false)); + MachineOperand MO = MachineOperand::CreateReg(PredR, false); + MO.setSubReg(PredRSub); + NewPN->addOperand(MO); NewPN->addOperand(MachineOperand::CreateMBB(PredB)); } @@ -1547,5 +1951,16 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop( TII->InsertBranch(*NewPH, Header, nullptr, EmptyCond, DL); NewPH->addSuccessor(Header); + MachineLoop *ParentLoop = L->getParentLoop(); + if (ParentLoop) + ParentLoop->addBasicBlockToLoop(NewPH, MLI->getBase()); + + // Update the dominator information with the new preheader. + if (MDT) { + MachineDomTreeNode *HDom = MDT->getNode(Header); + MDT->addNewBlock(NewPH, HDom->getIDom()->getBlock()); + MDT->changeImmediateDominator(Header, NewPH); + } + return NewPH; } diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index ea3a1770ac31..7a213aad072c 100644 --- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -13,8 +13,11 @@ #include "Hexagon.h" #include "HexagonISelLowering.h" +#include "HexagonMachineFunctionInfo.h" #include "HexagonTargetMachine.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/IR/Intrinsics.h" #include "llvm/Support/CommandLine.h" @@ -45,51 +48,43 @@ namespace llvm { /// namespace { class HexagonDAGToDAGISel : public SelectionDAGISel { - /// Subtarget - Keep a pointer to the Hexagon Subtarget around so that we can - /// make the right decision when generating code for different targets. - const HexagonSubtarget &Subtarget; - - // Keep a reference to HexagonTargetMachine. - const HexagonTargetMachine& TM; - DenseMap<const GlobalValue *, unsigned> GlobalAddressUseCountMap; + const HexagonTargetMachine& HTM; + const HexagonSubtarget *HST; public: - explicit HexagonDAGToDAGISel(HexagonTargetMachine &targetmachine, + explicit HexagonDAGToDAGISel(HexagonTargetMachine &tm, CodeGenOpt::Level OptLevel) - : SelectionDAGISel(targetmachine, OptLevel), - Subtarget(targetmachine.getSubtarget<HexagonSubtarget>()), - TM(targetmachine) { + : SelectionDAGISel(tm, OptLevel), HTM(tm) { initializeHexagonDAGToDAGISelPass(*PassRegistry::getPassRegistry()); } - bool hasNumUsesBelowThresGA(SDNode *N) const; + + bool runOnMachineFunction(MachineFunction &MF) override { + // Reset the subtarget each time through. + HST = &MF.getSubtarget<HexagonSubtarget>(); + SelectionDAGISel::runOnMachineFunction(MF); + return true; + } + + virtual void PreprocessISelDAG() override; + virtual void EmitFunctionEntryCode() override; SDNode *Select(SDNode *N) override; // Complex Pattern Selectors. - inline bool foldGlobalAddress(SDValue &N, SDValue &R); - inline bool foldGlobalAddressGP(SDValue &N, SDValue &R); - bool foldGlobalAddressImpl(SDValue &N, SDValue &R, bool ShouldLookForGP); - bool SelectADDRri(SDValue& N, SDValue &R1, SDValue &R2); - bool SelectADDRriS11_0(SDValue& N, SDValue &R1, SDValue &R2); - bool SelectADDRriS11_1(SDValue& N, SDValue &R1, SDValue &R2); - bool SelectADDRriS11_2(SDValue& N, SDValue &R1, SDValue &R2); - bool SelectMEMriS11_2(SDValue& Addr, SDValue &Base, SDValue &Offset); - bool SelectADDRriS11_3(SDValue& N, SDValue &R1, SDValue &R2); - bool SelectADDRrr(SDValue &Addr, SDValue &Base, SDValue &Offset); - bool SelectADDRriU6_0(SDValue& N, SDValue &R1, SDValue &R2); - bool SelectADDRriU6_1(SDValue& N, SDValue &R1, SDValue &R2); - bool SelectADDRriU6_2(SDValue& N, SDValue &R1, SDValue &R2); + inline bool SelectAddrGA(SDValue &N, SDValue &R); + inline bool SelectAddrGP(SDValue &N, SDValue &R); + bool SelectGlobalAddress(SDValue &N, SDValue &R, bool UseGP); + bool SelectAddrFI(SDValue &N, SDValue &R); const char *getPassName() const override { return "Hexagon DAG->DAG Pattern Instruction Selection"; } + SDNode *SelectFrameIndex(SDNode *N); /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, + unsigned ConstraintID, std::vector<SDValue> &OutOps) override; - bool SelectAddr(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Offset); - SDNode *SelectLoad(SDNode *N); SDNode *SelectBaseOffsetLoad(LoadSDNode *LD, SDLoc dl); SDNode *SelectIndexedLoad(LoadSDNode *LD, SDLoc dl); @@ -101,88 +96,98 @@ public: SDNode *SelectIndexedStore(StoreSDNode *ST, SDLoc dl); SDNode *SelectStore(SDNode *N); SDNode *SelectSHL(SDNode *N); - SDNode *SelectSelect(SDNode *N); - SDNode *SelectTruncate(SDNode *N); SDNode *SelectMul(SDNode *N); SDNode *SelectZeroExtend(SDNode *N); - SDNode *SelectIntrinsicWOChain(SDNode *N); SDNode *SelectIntrinsicWChain(SDNode *N); + SDNode *SelectIntrinsicWOChain(SDNode *N); SDNode *SelectConstant(SDNode *N); SDNode *SelectConstantFP(SDNode *N); SDNode *SelectAdd(SDNode *N); - bool isConstExtProfitable(SDNode *N) const; - -// XformMskToBitPosU5Imm - Returns the bit position which -// the single bit 32 bit mask represents. -// Used in Clr and Set bit immediate memops. -SDValue XformMskToBitPosU5Imm(uint32_t Imm) { - int32_t bitPos; - bitPos = Log2_32(Imm); - assert(bitPos >= 0 && bitPos < 32 && - "Constant out of range for 32 BitPos Memops"); - return CurDAG->getTargetConstant(bitPos, MVT::i32); -} + SDNode *SelectBitOp(SDNode *N); + + // XformMskToBitPosU5Imm - Returns the bit position which + // the single bit 32 bit mask represents. + // Used in Clr and Set bit immediate memops. + SDValue XformMskToBitPosU5Imm(uint32_t Imm, SDLoc DL) { + int32_t bitPos; + bitPos = Log2_32(Imm); + assert(bitPos >= 0 && bitPos < 32 && + "Constant out of range for 32 BitPos Memops"); + return CurDAG->getTargetConstant(bitPos, DL, MVT::i32); + } -// XformMskToBitPosU4Imm - Returns the bit position which the single bit 16 bit -// mask represents. Used in Clr and Set bit immediate memops. -SDValue XformMskToBitPosU4Imm(uint16_t Imm) { - return XformMskToBitPosU5Imm(Imm); -} + // XformMskToBitPosU4Imm - Returns the bit position which the single-bit + // 16 bit mask represents. Used in Clr and Set bit immediate memops. + SDValue XformMskToBitPosU4Imm(uint16_t Imm, SDLoc DL) { + return XformMskToBitPosU5Imm(Imm, DL); + } -// XformMskToBitPosU3Imm - Returns the bit position which the single bit 8 bit -// mask represents. Used in Clr and Set bit immediate memops. -SDValue XformMskToBitPosU3Imm(uint8_t Imm) { - return XformMskToBitPosU5Imm(Imm); -} + // XformMskToBitPosU3Imm - Returns the bit position which the single-bit + // 8 bit mask represents. Used in Clr and Set bit immediate memops. + SDValue XformMskToBitPosU3Imm(uint8_t Imm, SDLoc DL) { + return XformMskToBitPosU5Imm(Imm, DL); + } -// Return true if there is exactly one bit set in V, i.e., if V is one of the -// following integers: 2^0, 2^1, ..., 2^31. -bool ImmIsSingleBit(uint32_t v) const { - uint32_t c = CountPopulation_64(v); - // Only return true if we counted 1 bit. - return c == 1; -} + // Return true if there is exactly one bit set in V, i.e., if V is one of the + // following integers: 2^0, 2^1, ..., 2^31. + bool ImmIsSingleBit(uint32_t v) const { + return isPowerOf2_32(v); + } -// XformM5ToU5Imm - Return a target constant with the specified value, of type -// i32 where the negative literal is transformed into a positive literal for -// use in -= memops. -inline SDValue XformM5ToU5Imm(signed Imm) { - assert( (Imm >= -31 && Imm <= -1) && "Constant out of range for Memops"); - return CurDAG->getTargetConstant( - Imm, MVT::i32); -} + // XformM5ToU5Imm - Return a target constant with the specified value, of + // type i32 where the negative literal is transformed into a positive literal + // for use in -= memops. + inline SDValue XformM5ToU5Imm(signed Imm, SDLoc DL) { + assert( (Imm >= -31 && Imm <= -1) && "Constant out of range for Memops"); + return CurDAG->getTargetConstant( - Imm, DL, MVT::i32); + } + // XformU7ToU7M1Imm - Return a target constant decremented by 1, in range + // [1..128], used in cmpb.gtu instructions. + inline SDValue XformU7ToU7M1Imm(signed Imm, SDLoc DL) { + assert((Imm >= 1 && Imm <= 128) && "Constant out of range for cmpb op"); + return CurDAG->getTargetConstant(Imm - 1, DL, MVT::i8); + } -// XformU7ToU7M1Imm - Return a target constant decremented by 1, in range -// [1..128], used in cmpb.gtu instructions. -inline SDValue XformU7ToU7M1Imm(signed Imm) { - assert((Imm >= 1 && Imm <= 128) && "Constant out of range for cmpb op"); - return CurDAG->getTargetConstant(Imm - 1, MVT::i8); -} + // XformS8ToS8M1Imm - Return a target constant decremented by 1. + inline SDValue XformSToSM1Imm(signed Imm, SDLoc DL) { + return CurDAG->getTargetConstant(Imm - 1, DL, MVT::i32); + } -// XformS8ToS8M1Imm - Return a target constant decremented by 1. -inline SDValue XformSToSM1Imm(signed Imm) { - return CurDAG->getTargetConstant(Imm - 1, MVT::i32); -} + // XformU8ToU8M1Imm - Return a target constant decremented by 1. + inline SDValue XformUToUM1Imm(unsigned Imm, SDLoc DL) { + assert((Imm >= 1) && "Cannot decrement unsigned int less than 1"); + return CurDAG->getTargetConstant(Imm - 1, DL, MVT::i32); + } -// XformU8ToU8M1Imm - Return a target constant decremented by 1. -inline SDValue XformUToUM1Imm(unsigned Imm) { - assert((Imm >= 1) && "Cannot decrement unsigned int less than 1"); - return CurDAG->getTargetConstant(Imm - 1, MVT::i32); -} + // XformSToSM2Imm - Return a target constant decremented by 2. + inline SDValue XformSToSM2Imm(unsigned Imm, SDLoc DL) { + return CurDAG->getTargetConstant(Imm - 2, DL, MVT::i32); + } + + // XformSToSM3Imm - Return a target constant decremented by 3. + inline SDValue XformSToSM3Imm(unsigned Imm, SDLoc DL) { + return CurDAG->getTargetConstant(Imm - 3, DL, MVT::i32); + } -// Include the pieces autogenerated from the target description. -#include "HexagonGenDAGISel.inc" -}; + // Include the pieces autogenerated from the target description. + #include "HexagonGenDAGISel.inc" + +private: + bool isValueExtension(const SDValue &Val, unsigned FromBits, SDValue &Src); +}; // end HexagonDAGToDAGISel } // end anonymous namespace /// createHexagonISelDag - This pass converts a legalized DAG into a /// Hexagon-specific DAG, ready for instruction scheduling. /// -FunctionPass *llvm::createHexagonISelDag(HexagonTargetMachine &TM, - CodeGenOpt::Level OptLevel) { +namespace llvm { +FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM, + CodeGenOpt::Level OptLevel) { return new HexagonDAGToDAGISel(TM, OptLevel); } +} static void initializePassOnce(PassRegistry &Registry) { const char *Name = "Hexagon DAG->DAG Pattern Instruction Selection"; @@ -196,76 +201,6 @@ void llvm::initializeHexagonDAGToDAGISelPass(PassRegistry &Registry) { } -static bool IsS11_0_Offset(SDNode * S) { - ConstantSDNode *N = cast<ConstantSDNode>(S); - - // immS16 predicate - True if the immediate fits in a 16-bit sign extended - // field. - int64_t v = (int64_t)N->getSExtValue(); - return isInt<11>(v); -} - - -static bool IsS11_1_Offset(SDNode * S) { - ConstantSDNode *N = cast<ConstantSDNode>(S); - - // immS16 predicate - True if the immediate fits in a 16-bit sign extended - // field. - int64_t v = (int64_t)N->getSExtValue(); - return isShiftedInt<11,1>(v); -} - - -static bool IsS11_2_Offset(SDNode * S) { - ConstantSDNode *N = cast<ConstantSDNode>(S); - - // immS16 predicate - True if the immediate fits in a 16-bit sign extended - // field. - int64_t v = (int64_t)N->getSExtValue(); - return isShiftedInt<11,2>(v); -} - - -static bool IsS11_3_Offset(SDNode * S) { - ConstantSDNode *N = cast<ConstantSDNode>(S); - - // immS16 predicate - True if the immediate fits in a 16-bit sign extended - // field. - int64_t v = (int64_t)N->getSExtValue(); - return isShiftedInt<11,3>(v); -} - - -static bool IsU6_0_Offset(SDNode * S) { - ConstantSDNode *N = cast<ConstantSDNode>(S); - - // u6 predicate - True if the immediate fits in a 6-bit unsigned extended - // field. - int64_t v = (int64_t)N->getSExtValue(); - return isUInt<6>(v); -} - - -static bool IsU6_1_Offset(SDNode * S) { - ConstantSDNode *N = cast<ConstantSDNode>(S); - - // u6 predicate - True if the immediate fits in a 6-bit unsigned extended - // field. - int64_t v = (int64_t)N->getSExtValue(); - return isShiftedUInt<6,1>(v); -} - - -static bool IsU6_2_Offset(SDNode * S) { - ConstantSDNode *N = cast<ConstantSDNode>(S); - - // u6 predicate - True if the immediate fits in a 6-bit unsigned extended - // field. - int64_t v = (int64_t)N->getSExtValue(); - return isShiftedUInt<6,2>(v); -} - - // Intrinsics that return a a predicate. static unsigned doesIntrinsicReturnPredicate(unsigned ID) { @@ -312,268 +247,119 @@ static unsigned doesIntrinsicReturnPredicate(unsigned ID) } } - -// Intrinsics that have predicate operands. -static unsigned doesIntrinsicContainPredicate(unsigned ID) -{ - switch (ID) { - default: - return 0; - case Intrinsic::hexagon_C2_tfrpr: - return Hexagon::C2_tfrpr; - case Intrinsic::hexagon_C2_and: - return Hexagon::C2_and; - case Intrinsic::hexagon_C2_xor: - return Hexagon::C2_xor; - case Intrinsic::hexagon_C2_or: - return Hexagon::C2_or; - case Intrinsic::hexagon_C2_not: - return Hexagon::C2_not; - case Intrinsic::hexagon_C2_any8: - return Hexagon::C2_any8; - case Intrinsic::hexagon_C2_all8: - return Hexagon::C2_all8; - case Intrinsic::hexagon_C2_vitpack: - return Hexagon::C2_vitpack; - case Intrinsic::hexagon_C2_mask: - return Hexagon::C2_mask; - case Intrinsic::hexagon_C2_mux: - return Hexagon::C2_mux; - - // Mapping hexagon_C2_muxir to MUX_pri. This is pretty weird - but - // that's how it's mapped in q6protos.h. - case Intrinsic::hexagon_C2_muxir: - return Hexagon::C2_muxri; - - // Mapping hexagon_C2_muxri to MUX_pir. This is pretty weird - but - // that's how it's mapped in q6protos.h. - case Intrinsic::hexagon_C2_muxri: - return Hexagon::C2_muxir; - - case Intrinsic::hexagon_C2_muxii: - return Hexagon::C2_muxii; - case Intrinsic::hexagon_C2_vmux: - return Hexagon::VMUX_prr64; - case Intrinsic::hexagon_S2_valignrb: - return Hexagon::VALIGN_rrp; - case Intrinsic::hexagon_S2_vsplicerb: - return Hexagon::VSPLICE_rrp; - } -} - - -static bool OffsetFitsS11(EVT MemType, int64_t Offset) { - if (MemType == MVT::i64 && isShiftedInt<11,3>(Offset)) { - return true; - } - if (MemType == MVT::i32 && isShiftedInt<11,2>(Offset)) { - return true; - } - if (MemType == MVT::i16 && isShiftedInt<11,1>(Offset)) { - return true; - } - if (MemType == MVT::i8 && isInt<11>(Offset)) { - return true; - } - return false; -} - - -// -// Try to lower loads of GlobalAdresses into base+offset loads. Custom -// lowering for GlobalAddress nodes has already turned it into a -// CONST32. -// -SDNode *HexagonDAGToDAGISel::SelectBaseOffsetLoad(LoadSDNode *LD, SDLoc dl) { - SDValue Chain = LD->getChain(); - SDNode* Const32 = LD->getBasePtr().getNode(); - unsigned Opcode = 0; - - if (Const32->getOpcode() == HexagonISD::CONST32 && - ISD::isNormalLoad(LD)) { - SDValue Base = Const32->getOperand(0); - EVT LoadedVT = LD->getMemoryVT(); - int64_t Offset = cast<GlobalAddressSDNode>(Base)->getOffset(); - if (Offset != 0 && OffsetFitsS11(LoadedVT, Offset)) { - MVT PointerTy = getTargetLowering()->getPointerTy(); - const GlobalValue* GV = - cast<GlobalAddressSDNode>(Base)->getGlobal(); - SDValue TargAddr = - CurDAG->getTargetGlobalAddress(GV, dl, PointerTy, 0); - SDNode* NewBase = CurDAG->getMachineNode(Hexagon::CONST32_set, - dl, PointerTy, - TargAddr); - // Figure out base + offset opcode - if (LoadedVT == MVT::i64) Opcode = Hexagon::L2_loadrd_io; - else if (LoadedVT == MVT::i32) Opcode = Hexagon::L2_loadri_io; - else if (LoadedVT == MVT::i16) Opcode = Hexagon::L2_loadrh_io; - else if (LoadedVT == MVT::i8) Opcode = Hexagon::L2_loadrb_io; - else llvm_unreachable("unknown memory type"); - - // Build indexed load. - SDValue TargetConstOff = CurDAG->getTargetConstant(Offset, PointerTy); - SDNode* Result = CurDAG->getMachineNode(Opcode, dl, - LD->getValueType(0), - MVT::Other, - SDValue(NewBase,0), - TargetConstOff, - Chain); - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = LD->getMemOperand(); - cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1); - ReplaceUses(LD, Result); - return Result; - } - } - - return SelectCode(LD); -} - - SDNode *HexagonDAGToDAGISel::SelectIndexedLoadSignExtend64(LoadSDNode *LD, unsigned Opcode, - SDLoc dl) -{ + SDLoc dl) { SDValue Chain = LD->getChain(); EVT LoadedVT = LD->getMemoryVT(); SDValue Base = LD->getBasePtr(); SDValue Offset = LD->getOffset(); SDNode *OffsetNode = Offset.getNode(); int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue(); - SDValue N1 = LD->getOperand(1); - SDValue CPTmpN1_0; - SDValue CPTmpN1_1; - - if (SelectADDRriS11_2(N1, CPTmpN1_0, CPTmpN1_1) && - N1.getNode()->getValueType(0) == MVT::i32) { - const HexagonInstrInfo *TII = static_cast<const HexagonInstrInfo *>( - TM.getSubtargetImpl()->getInstrInfo()); - if (TII->isValidAutoIncImm(LoadedVT, Val)) { - SDValue TargetConst = CurDAG->getTargetConstant(Val, MVT::i32); - SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32, - MVT::Other, Base, TargetConst, - Chain); - SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64, - SDValue(Result_1, 0)); - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = LD->getMemOperand(); - cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); - const SDValue Froms[] = { SDValue(LD, 0), - SDValue(LD, 1), - SDValue(LD, 2) - }; - const SDValue Tos[] = { SDValue(Result_2, 0), - SDValue(Result_1, 1), - SDValue(Result_1, 2) - }; - ReplaceUses(Froms, Tos, 3); - return Result_2; - } - SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); - SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); - SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, - MVT::Other, Base, TargetConst0, + + const HexagonInstrInfo &TII = *HST->getInstrInfo(); + if (TII.isValidAutoIncImm(LoadedVT, Val)) { + SDValue TargetConst = CurDAG->getTargetConstant(Val, dl, MVT::i32); + SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32, + MVT::Other, Base, TargetConst, Chain); - SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, - MVT::i64, SDValue(Result_1, 0)); - SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, - MVT::i32, Base, TargetConstVal, - SDValue(Result_1, 1)); + SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64, + SDValue(Result_1, 0)); MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = LD->getMemOperand(); cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); const SDValue Froms[] = { SDValue(LD, 0), SDValue(LD, 1), - SDValue(LD, 2) - }; + SDValue(LD, 2) }; const SDValue Tos[] = { SDValue(Result_2, 0), - SDValue(Result_3, 0), - SDValue(Result_1, 1) - }; + SDValue(Result_1, 1), + SDValue(Result_1, 2) }; ReplaceUses(Froms, Tos, 3); return Result_2; } - return SelectCode(LD); + + SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32); + SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32); + SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Other, + Base, TargetConst0, Chain); + SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64, + SDValue(Result_1, 0)); + SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32, + Base, TargetConstVal, + SDValue(Result_1, 1)); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = LD->getMemOperand(); + cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); + const SDValue Froms[] = { SDValue(LD, 0), + SDValue(LD, 1), + SDValue(LD, 2) }; + const SDValue Tos[] = { SDValue(Result_2, 0), + SDValue(Result_3, 0), + SDValue(Result_1, 1) }; + ReplaceUses(Froms, Tos, 3); + return Result_2; } SDNode *HexagonDAGToDAGISel::SelectIndexedLoadZeroExtend64(LoadSDNode *LD, unsigned Opcode, - SDLoc dl) -{ + SDLoc dl) { SDValue Chain = LD->getChain(); EVT LoadedVT = LD->getMemoryVT(); SDValue Base = LD->getBasePtr(); SDValue Offset = LD->getOffset(); SDNode *OffsetNode = Offset.getNode(); int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue(); - SDValue N1 = LD->getOperand(1); - SDValue CPTmpN1_0; - SDValue CPTmpN1_1; - - if (SelectADDRriS11_2(N1, CPTmpN1_0, CPTmpN1_1) && - N1.getNode()->getValueType(0) == MVT::i32) { - const HexagonInstrInfo *TII = static_cast<const HexagonInstrInfo *>( - TM.getSubtargetImpl()->getInstrInfo()); - if (TII->isValidAutoIncImm(LoadedVT, Val)) { - SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); - SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); - SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, - MVT::i32, MVT::Other, Base, - TargetConstVal, Chain); - SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, - TargetConst0); - SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::A2_combinew, dl, - MVT::i64, MVT::Other, - SDValue(Result_2,0), - SDValue(Result_1,0)); - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = LD->getMemOperand(); - cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); - const SDValue Froms[] = { SDValue(LD, 0), - SDValue(LD, 1), - SDValue(LD, 2) - }; - const SDValue Tos[] = { SDValue(Result_3, 0), - SDValue(Result_1, 1), - SDValue(Result_1, 2) - }; - ReplaceUses(Froms, Tos, 3); - return Result_3; - } - // Generate an indirect load. - SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); - SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); + const HexagonInstrInfo &TII = *HST->getInstrInfo(); + if (TII.isValidAutoIncImm(LoadedVT, Val)) { + SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32); + SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32); SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, - MVT::Other, - Base, TargetConst0, Chain); - SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, - TargetConst0); - SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::A2_combinew, dl, + MVT::i32, MVT::Other, Base, + TargetConstVal, Chain); + SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A4_combineir, dl, MVT::i64, MVT::Other, - SDValue(Result_2,0), + TargetConst0, SDValue(Result_1,0)); - // Add offset to base. - SDNode* Result_4 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32, - Base, TargetConstVal, - SDValue(Result_1, 1)); MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = LD->getMemOperand(); cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); const SDValue Froms[] = { SDValue(LD, 0), SDValue(LD, 1), - SDValue(LD, 2) - }; - const SDValue Tos[] = { SDValue(Result_3, 0), // Load value. - SDValue(Result_4, 0), // New address. - SDValue(Result_1, 1) - }; + SDValue(LD, 2) }; + const SDValue Tos[] = { SDValue(Result_2, 0), + SDValue(Result_1, 1), + SDValue(Result_1, 2) }; ReplaceUses(Froms, Tos, 3); - return Result_3; + return Result_2; } - return SelectCode(LD); + // Generate an indirect load. + SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32); + SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32); + SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, + MVT::Other, Base, TargetConst0, + Chain); + SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A4_combineir, dl, + MVT::i64, MVT::Other, + TargetConst0, + SDValue(Result_1,0)); + // Add offset to base. + SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32, + Base, TargetConstVal, + SDValue(Result_1, 1)); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = LD->getMemOperand(); + cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); + const SDValue Froms[] = { SDValue(LD, 0), + SDValue(LD, 1), + SDValue(LD, 2) }; + const SDValue Tos[] = { SDValue(Result_2, 0), // Load value. + SDValue(Result_3, 0), // New address. + SDValue(Result_1, 1) }; + ReplaceUses(Froms, Tos, 3); + return Result_2; } @@ -587,47 +373,45 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, SDLoc dl) { EVT LoadedVT = LD->getMemoryVT(); unsigned Opcode = 0; - // Check for zero ext loads. - bool zextval = (LD->getExtensionType() == ISD::ZEXTLOAD); + // Check for zero extended loads. Treat any-extend loads as zero extended + // loads. + ISD::LoadExtType ExtType = LD->getExtensionType(); + bool IsZeroExt = (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD); // Figure out the opcode. - const HexagonInstrInfo *TII = static_cast<const HexagonInstrInfo *>( - TM.getSubtargetImpl()->getInstrInfo()); + const HexagonInstrInfo &TII = *HST->getInstrInfo(); if (LoadedVT == MVT::i64) { - if (TII->isValidAutoIncImm(LoadedVT, Val)) + if (TII.isValidAutoIncImm(LoadedVT, Val)) Opcode = Hexagon::L2_loadrd_pi; else Opcode = Hexagon::L2_loadrd_io; } else if (LoadedVT == MVT::i32) { - if (TII->isValidAutoIncImm(LoadedVT, Val)) + if (TII.isValidAutoIncImm(LoadedVT, Val)) Opcode = Hexagon::L2_loadri_pi; else Opcode = Hexagon::L2_loadri_io; } else if (LoadedVT == MVT::i16) { - if (TII->isValidAutoIncImm(LoadedVT, Val)) - Opcode = zextval ? Hexagon::L2_loadruh_pi : Hexagon::L2_loadrh_pi; + if (TII.isValidAutoIncImm(LoadedVT, Val)) + Opcode = IsZeroExt ? Hexagon::L2_loadruh_pi : Hexagon::L2_loadrh_pi; else - Opcode = zextval ? Hexagon::L2_loadruh_io : Hexagon::L2_loadrh_io; + Opcode = IsZeroExt ? Hexagon::L2_loadruh_io : Hexagon::L2_loadrh_io; } else if (LoadedVT == MVT::i8) { - if (TII->isValidAutoIncImm(LoadedVT, Val)) - Opcode = zextval ? Hexagon::L2_loadrub_pi : Hexagon::L2_loadrb_pi; + if (TII.isValidAutoIncImm(LoadedVT, Val)) + Opcode = IsZeroExt ? Hexagon::L2_loadrub_pi : Hexagon::L2_loadrb_pi; else - Opcode = zextval ? Hexagon::L2_loadrub_io : Hexagon::L2_loadrb_io; + Opcode = IsZeroExt ? Hexagon::L2_loadrub_io : Hexagon::L2_loadrb_io; } else llvm_unreachable("unknown memory type"); - // For zero ext i64 loads, we need to add combine instructions. - if (LD->getValueType(0) == MVT::i64 && - LD->getExtensionType() == ISD::ZEXTLOAD) { + // For zero extended i64 loads, we need to add combine instructions. + if (LD->getValueType(0) == MVT::i64 && IsZeroExt) return SelectIndexedLoadZeroExtend64(LD, Opcode, dl); - } - if (LD->getValueType(0) == MVT::i64 && - LD->getExtensionType() == ISD::SEXTLOAD) { - // Handle sign ext i64 loads. + // Handle sign extended i64 loads. + if (LD->getValueType(0) == MVT::i64 && ExtType == ISD::SEXTLOAD) return SelectIndexedLoadSignExtend64(LD, Opcode, dl); - } - if (TII->isValidAutoIncImm(LoadedVT, Val)) { - SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); + + if (TII.isValidAutoIncImm(LoadedVT, Val)) { + SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32); SDNode* Result = CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0), MVT::i32, MVT::Other, Base, @@ -646,13 +430,13 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, SDLoc dl) { ReplaceUses(Froms, Tos, 3); return Result; } else { - SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); - SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); + SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32); + SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32); SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0), MVT::Other, Base, TargetConst0, Chain); - SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32, + SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32, Base, TargetConstVal, SDValue(Result_1, 1)); MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); @@ -682,7 +466,7 @@ SDNode *HexagonDAGToDAGISel::SelectLoad(SDNode *N) { if (AM != ISD::UNINDEXED) { result = SelectIndexedLoad(LD, dl); } else { - result = SelectBaseOffsetLoad(LD, dl); + result = SelectCode(LD); } return result; @@ -698,14 +482,12 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) { // Get the constant value. int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue(); EVT StoredVT = ST->getMemoryVT(); + EVT ValueVT = Value.getValueType(); // Offset value must be within representable range // and must have correct alignment properties. - const HexagonInstrInfo *TII = static_cast<const HexagonInstrInfo *>( - TM.getSubtargetImpl()->getInstrInfo()); - if (TII->isValidAutoIncImm(StoredVT, Val)) { - SDValue Ops[] = {Base, CurDAG->getTargetConstant(Val, MVT::i32), Value, - Chain}; + const HexagonInstrInfo &TII = *HST->getInstrInfo(); + if (TII.isValidAutoIncImm(StoredVT, Val)) { unsigned Opcode = 0; // Figure out the post inc version of opcode. @@ -715,6 +497,13 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) { else if (StoredVT == MVT::i8) Opcode = Hexagon::S2_storerb_pi; else llvm_unreachable("unknown memory type"); + if (ST->isTruncatingStore() && ValueVT.getSizeInBits() == 64) { + assert(StoredVT.getSizeInBits() < 64 && "Not a truncating store"); + Value = CurDAG->getTargetExtractSubreg(Hexagon::subreg_loreg, + dl, MVT::i32, Value); + } + SDValue Ops[] = {Base, CurDAG->getTargetConstant(Val, dl, MVT::i32), Value, + Chain}; // Build post increment store. SDNode* Result = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Other, Ops); @@ -728,9 +517,10 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) { } // Note: Order of operands matches the def of instruction: - // def STrid : STInst<(outs), (ins MEMri:$addr, DoubleRegs:$src1), ... + // def S2_storerd_io + // : STInst<(outs), (ins IntRegs:$base, imm:$offset, DoubleRegs:$src1), ... // and it differs for POST_ST* for instance. - SDValue Ops[] = { Base, CurDAG->getTargetConstant(0, MVT::i32), Value, + SDValue Ops[] = { Base, CurDAG->getTargetConstant(0, dl, MVT::i32), Value, Chain}; unsigned Opcode = 0; @@ -742,10 +532,10 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) { else llvm_unreachable("unknown memory type"); // Build regular store. - SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); + SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32); SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); // Build splitted incriment instruction. - SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32, + SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32, Base, TargetConstVal, SDValue(Result_1, 0)); @@ -758,61 +548,6 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) { return Result_2; } - -SDNode *HexagonDAGToDAGISel::SelectBaseOffsetStore(StoreSDNode *ST, - SDLoc dl) { - SDValue Chain = ST->getChain(); - SDNode* Const32 = ST->getBasePtr().getNode(); - SDValue Value = ST->getValue(); - unsigned Opcode = 0; - - // Try to lower stores of GlobalAdresses into indexed stores. Custom - // lowering for GlobalAddress nodes has already turned it into a - // CONST32. Avoid truncating stores for the moment. Post-inc stores - // do the same. Don't think there's a reason for it, so will file a - // bug to fix. - if ((Const32->getOpcode() == HexagonISD::CONST32) && - !(Value.getValueType() == MVT::i64 && ST->isTruncatingStore())) { - SDValue Base = Const32->getOperand(0); - if (Base.getOpcode() == ISD::TargetGlobalAddress) { - EVT StoredVT = ST->getMemoryVT(); - int64_t Offset = cast<GlobalAddressSDNode>(Base)->getOffset(); - if (Offset != 0 && OffsetFitsS11(StoredVT, Offset)) { - MVT PointerTy = getTargetLowering()->getPointerTy(); - const GlobalValue* GV = - cast<GlobalAddressSDNode>(Base)->getGlobal(); - SDValue TargAddr = - CurDAG->getTargetGlobalAddress(GV, dl, PointerTy, 0); - SDNode* NewBase = CurDAG->getMachineNode(Hexagon::CONST32_set, - dl, PointerTy, - TargAddr); - - // Figure out base + offset opcode - if (StoredVT == MVT::i64) Opcode = Hexagon::S2_storerd_io; - else if (StoredVT == MVT::i32) Opcode = Hexagon::S2_storeri_io; - else if (StoredVT == MVT::i16) Opcode = Hexagon::S2_storerh_io; - else if (StoredVT == MVT::i8) Opcode = Hexagon::S2_storerb_io; - else llvm_unreachable("unknown memory type"); - - SDValue Ops[] = {SDValue(NewBase,0), - CurDAG->getTargetConstant(Offset,PointerTy), - Value, Chain}; - // build indexed store - SDNode* Result = CurDAG->getMachineNode(Opcode, dl, - MVT::Other, Ops); - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = ST->getMemOperand(); - cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1); - ReplaceUses(ST, Result); - return Result; - } - } - } - - return SelectCode(ST); -} - - SDNode *HexagonDAGToDAGISel::SelectStore(SDNode *N) { SDLoc dl(N); StoreSDNode *ST = cast<StoreSDNode>(N); @@ -823,7 +558,7 @@ SDNode *HexagonDAGToDAGISel::SelectStore(SDNode *N) { return SelectIndexedStore(ST, dl); } - return SelectBaseOffsetStore(ST, dl); + return SelectCode(ST); } SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) { @@ -864,7 +599,7 @@ SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) { } SDValue Chain = LD->getChain(); - SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); + SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32); OP0 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32, MVT::Other, LD->getBasePtr(), TargetConst0, @@ -890,7 +625,7 @@ SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) { } SDValue Chain = LD->getChain(); - SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); + SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32); OP1 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32, MVT::Other, LD->getBasePtr(), TargetConst0, @@ -909,187 +644,6 @@ SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) { return SelectCode(N); } - -SDNode *HexagonDAGToDAGISel::SelectSelect(SDNode *N) { - SDLoc dl(N); - SDValue N0 = N->getOperand(0); - if (N0.getOpcode() == ISD::SETCC) { - SDValue N00 = N0.getOperand(0); - if (N00.getOpcode() == ISD::SIGN_EXTEND_INREG) { - SDValue N000 = N00.getOperand(0); - SDValue N001 = N00.getOperand(1); - if (cast<VTSDNode>(N001)->getVT() == MVT::i16) { - SDValue N01 = N0.getOperand(1); - SDValue N02 = N0.getOperand(2); - - // Pattern: (select:i32 (setcc:i1 (sext_inreg:i32 IntRegs:i32:$src2, - // i16:Other),IntRegs:i32:$src1, SETLT:Other),IntRegs:i32:$src1, - // IntRegs:i32:$src2) - // Emits: (MAXh_rr:i32 IntRegs:i32:$src1, IntRegs:i32:$src2) - // Pattern complexity = 9 cost = 1 size = 0. - if (cast<CondCodeSDNode>(N02)->get() == ISD::SETLT) { - SDValue N1 = N->getOperand(1); - if (N01 == N1) { - SDValue N2 = N->getOperand(2); - if (N000 == N2 && - N0.getNode()->getValueType(N0.getResNo()) == MVT::i1 && - N00.getNode()->getValueType(N00.getResNo()) == MVT::i32) { - SDNode *SextNode = CurDAG->getMachineNode(Hexagon::A2_sxth, dl, - MVT::i32, N000); - SDNode *Result = CurDAG->getMachineNode(Hexagon::A2_max, dl, - MVT::i32, - SDValue(SextNode, 0), - N1); - ReplaceUses(N, Result); - return Result; - } - } - } - - // Pattern: (select:i32 (setcc:i1 (sext_inreg:i32 IntRegs:i32:$src2, - // i16:Other), IntRegs:i32:$src1, SETGT:Other), IntRegs:i32:$src1, - // IntRegs:i32:$src2) - // Emits: (MINh_rr:i32 IntRegs:i32:$src1, IntRegs:i32:$src2) - // Pattern complexity = 9 cost = 1 size = 0. - if (cast<CondCodeSDNode>(N02)->get() == ISD::SETGT) { - SDValue N1 = N->getOperand(1); - if (N01 == N1) { - SDValue N2 = N->getOperand(2); - if (N000 == N2 && - N0.getNode()->getValueType(N0.getResNo()) == MVT::i1 && - N00.getNode()->getValueType(N00.getResNo()) == MVT::i32) { - SDNode *SextNode = CurDAG->getMachineNode(Hexagon::A2_sxth, dl, - MVT::i32, N000); - SDNode *Result = CurDAG->getMachineNode(Hexagon::A2_min, dl, - MVT::i32, - SDValue(SextNode, 0), - N1); - ReplaceUses(N, Result); - return Result; - } - } - } - } - } - } - - return SelectCode(N); -} - - -SDNode *HexagonDAGToDAGISel::SelectTruncate(SDNode *N) { - SDLoc dl(N); - SDValue Shift = N->getOperand(0); - - // - // %conv.i = sext i32 %tmp1 to i64 - // %conv2.i = sext i32 %add to i64 - // %mul.i = mul nsw i64 %conv2.i, %conv.i - // %shr5.i = lshr i64 %mul.i, 32 - // %conv3.i = trunc i64 %shr5.i to i32 - // - // --- match with the following --- - // - // %conv3.i = mpy (%tmp1, %add) - // - // Trunc to i32. - if (N->getValueType(0) == MVT::i32) { - // Trunc from i64. - if (Shift.getNode()->getValueType(0) == MVT::i64) { - // Trunc child is logical shift right. - if (Shift.getOpcode() != ISD::SRL) { - return SelectCode(N); - } - - SDValue ShiftOp0 = Shift.getOperand(0); - SDValue ShiftOp1 = Shift.getOperand(1); - - // Shift by const 32 - if (ShiftOp1.getOpcode() != ISD::Constant) { - return SelectCode(N); - } - - int32_t ShiftConst = - cast<ConstantSDNode>(ShiftOp1.getNode())->getSExtValue(); - if (ShiftConst != 32) { - return SelectCode(N); - } - - // Shifting a i64 signed multiply - SDValue Mul = ShiftOp0; - if (Mul.getOpcode() != ISD::MUL) { - return SelectCode(N); - } - - SDValue MulOp0 = Mul.getOperand(0); - SDValue MulOp1 = Mul.getOperand(1); - - SDValue OP0; - SDValue OP1; - - // Handle sign_extend and sextload - if (MulOp0.getOpcode() == ISD::SIGN_EXTEND) { - SDValue Sext0 = MulOp0.getOperand(0); - if (Sext0.getNode()->getValueType(0) != MVT::i32) { - return SelectCode(N); - } - - OP0 = Sext0; - } else if (MulOp0.getOpcode() == ISD::LOAD) { - LoadSDNode *LD = cast<LoadSDNode>(MulOp0.getNode()); - if (LD->getMemoryVT() != MVT::i32 || - LD->getExtensionType() != ISD::SEXTLOAD || - LD->getAddressingMode() != ISD::UNINDEXED) { - return SelectCode(N); - } - - SDValue Chain = LD->getChain(); - SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); - OP0 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32, - MVT::Other, - LD->getBasePtr(), - TargetConst0, Chain), 0); - } else { - return SelectCode(N); - } - - // Same goes for the second operand. - if (MulOp1.getOpcode() == ISD::SIGN_EXTEND) { - SDValue Sext1 = MulOp1.getOperand(0); - if (Sext1.getNode()->getValueType(0) != MVT::i32) - return SelectCode(N); - - OP1 = Sext1; - } else if (MulOp1.getOpcode() == ISD::LOAD) { - LoadSDNode *LD = cast<LoadSDNode>(MulOp1.getNode()); - if (LD->getMemoryVT() != MVT::i32 || - LD->getExtensionType() != ISD::SEXTLOAD || - LD->getAddressingMode() != ISD::UNINDEXED) { - return SelectCode(N); - } - - SDValue Chain = LD->getChain(); - SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); - OP1 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32, - MVT::Other, - LD->getBasePtr(), - TargetConst0, Chain), 0); - } else { - return SelectCode(N); - } - - // Generate a mpy instruction. - SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_mpy_up, dl, MVT::i32, - OP0, OP1); - ReplaceUses(N, Result); - return Result; - } - } - - return SelectCode(N); -} - - SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) { SDLoc dl(N); if (N->getValueType(0) == MVT::i32) { @@ -1107,7 +661,7 @@ SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) { int32_t MulConst = cast<ConstantSDNode>(Mul_1.getNode())->getSExtValue(); int32_t ValConst = MulConst << ShlConst; - SDValue Val = CurDAG->getTargetConstant(ValConst, + SDValue Val = CurDAG->getTargetConstant(ValConst, dl, MVT::i32); if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val.getNode())) if (isInt<9>(CN->getSExtValue())) { @@ -1135,7 +689,8 @@ SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) { int32_t Shl2Const = cast<ConstantSDNode>(Shl2_1.getNode())->getSExtValue(); int32_t ValConst = 1 << (ShlConst+Shl2Const); - SDValue Val = CurDAG->getTargetConstant(-ValConst, MVT::i32); + SDValue Val = CurDAG->getTargetConstant(-ValConst, dl, + MVT::i32); if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val.getNode())) if (isInt<9>(CN->getSExtValue())) { @@ -1168,6 +723,37 @@ SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) { // SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) { SDLoc dl(N); + + SDValue Op0 = N->getOperand(0); + EVT OpVT = Op0.getValueType(); + unsigned OpBW = OpVT.getSizeInBits(); + + // Special handling for zero-extending a vector of booleans. + if (OpVT.isVector() && OpVT.getVectorElementType() == MVT::i1 && OpBW <= 64) { + SDNode *Mask = CurDAG->getMachineNode(Hexagon::C2_mask, dl, MVT::i64, Op0); + unsigned NE = OpVT.getVectorNumElements(); + EVT ExVT = N->getValueType(0); + unsigned ES = ExVT.getVectorElementType().getSizeInBits(); + uint64_t MV = 0, Bit = 1; + for (unsigned i = 0; i < NE; ++i) { + MV |= Bit; + Bit <<= ES; + } + SDValue Ones = CurDAG->getTargetConstant(MV, dl, MVT::i64); + SDNode *OnesReg = CurDAG->getMachineNode(Hexagon::CONST64_Int_Real, dl, + MVT::i64, Ones); + if (ExVT.getSizeInBits() == 32) { + SDNode *And = CurDAG->getMachineNode(Hexagon::A2_andp, dl, MVT::i64, + SDValue(Mask,0), SDValue(OnesReg,0)); + SDValue SubR = CurDAG->getTargetConstant(Hexagon::subreg_loreg, dl, + MVT::i32); + return CurDAG->getMachineNode(Hexagon::EXTRACT_SUBREG, dl, ExVT, + SDValue(And,0), SubR); + } + return CurDAG->getMachineNode(Hexagon::A2_andp, dl, ExVT, + SDValue(Mask,0), SDValue(OnesReg,0)); + } + SDNode *IsIntrinsic = N->getOperand(0).getNode(); if ((IsIntrinsic->getOpcode() == ISD::INTRINSIC_WO_CHAIN)) { unsigned ID = @@ -1175,8 +761,8 @@ SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) { if (doesIntrinsicReturnPredicate(ID)) { // Now we need to differentiate target data types. if (N->getValueType(0) == MVT::i64) { - // Convert the zero_extend to Rs = Pd followed by COMBINE_rr(0,Rs). - SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); + // Convert the zero_extend to Rs = Pd followed by A2_combinew(0,Rs). + SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32); SDNode *Result_1 = CurDAG->getMachineNode(Hexagon::C2_tfrpr, dl, MVT::i32, SDValue(IsIntrinsic, 0)); @@ -1204,56 +790,227 @@ SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) { return SelectCode(N); } +// +// Checking for intrinsics circular load/store, and bitreverse load/store +// instrisics in order to select the correct lowered operation. +// +SDNode *HexagonDAGToDAGISel::SelectIntrinsicWChain(SDNode *N) { + unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + if (IntNo == Intrinsic::hexagon_circ_ldd || + IntNo == Intrinsic::hexagon_circ_ldw || + IntNo == Intrinsic::hexagon_circ_lduh || + IntNo == Intrinsic::hexagon_circ_ldh || + IntNo == Intrinsic::hexagon_circ_ldub || + IntNo == Intrinsic::hexagon_circ_ldb) { + SDLoc dl(N); + SDValue Chain = N->getOperand(0); + SDValue Base = N->getOperand(2); + SDValue Load = N->getOperand(3); + SDValue ModifierExpr = N->getOperand(4); + SDValue Offset = N->getOperand(5); + + // We need to add the rerurn type for the load. This intrinsic has + // two return types, one for the load and one for the post-increment. + // Only the *_ld instructions push the extra return type, and bump the + // result node operand number correspondingly. + std::vector<EVT> ResTys; + unsigned opc; + unsigned memsize, align; + MVT MvtSize = MVT::i32; + + if (IntNo == Intrinsic::hexagon_circ_ldd) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i64); + opc = Hexagon::L2_loadrd_pci_pseudo; + memsize = 8; + align = 8; + } else if (IntNo == Intrinsic::hexagon_circ_ldw) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadri_pci_pseudo; + memsize = 4; + align = 4; + } else if (IntNo == Intrinsic::hexagon_circ_ldh) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadrh_pci_pseudo; + memsize = 2; + align = 2; + MvtSize = MVT::i16; + } else if (IntNo == Intrinsic::hexagon_circ_lduh) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadruh_pci_pseudo; + memsize = 2; + align = 2; + MvtSize = MVT::i16; + } else if (IntNo == Intrinsic::hexagon_circ_ldb) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadrb_pci_pseudo; + memsize = 1; + align = 1; + MvtSize = MVT::i8; + } else if (IntNo == Intrinsic::hexagon_circ_ldub) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadrub_pci_pseudo; + memsize = 1; + align = 1; + MvtSize = MVT::i8; + } else + llvm_unreachable("no opc"); + + ResTys.push_back(MVT::Other); + + // Copy over the arguments, which are the same mostly. + SmallVector<SDValue, 5> Ops; + Ops.push_back(Base); + Ops.push_back(Load); + Ops.push_back(ModifierExpr); + int32_t Val = cast<ConstantSDNode>(Offset.getNode())->getSExtValue(); + Ops.push_back(CurDAG->getTargetConstant(Val, dl, MVT::i32)); + Ops.push_back(Chain); + SDNode* Result = CurDAG->getMachineNode(opc, dl, ResTys, Ops); + + SDValue ST; + MachineMemOperand *Mem = + MF->getMachineMemOperand(MachinePointerInfo(), + MachineMemOperand::MOStore, memsize, align); + if (MvtSize != MVT::i32) + ST = CurDAG->getTruncStore(Chain, dl, SDValue(Result, 1), Load, + MvtSize, Mem); + else + ST = CurDAG->getStore(Chain, dl, SDValue(Result, 1), Load, Mem); + + SDNode* Store = SelectStore(ST.getNode()); + + const SDValue Froms[] = { SDValue(N, 0), + SDValue(N, 1) }; + const SDValue Tos[] = { SDValue(Result, 0), + SDValue(Store, 0) }; + ReplaceUses(Froms, Tos, 2); + return Result; + } + + if (IntNo == Intrinsic::hexagon_brev_ldd || + IntNo == Intrinsic::hexagon_brev_ldw || + IntNo == Intrinsic::hexagon_brev_ldh || + IntNo == Intrinsic::hexagon_brev_lduh || + IntNo == Intrinsic::hexagon_brev_ldb || + IntNo == Intrinsic::hexagon_brev_ldub) { + SDLoc dl(N); + SDValue Chain = N->getOperand(0); + SDValue Base = N->getOperand(2); + SDValue Load = N->getOperand(3); + SDValue ModifierExpr = N->getOperand(4); + + // We need to add the rerurn type for the load. This intrinsic has + // two return types, one for the load and one for the post-increment. + std::vector<EVT> ResTys; + unsigned opc; + unsigned memsize, align; + MVT MvtSize = MVT::i32; + + if (IntNo == Intrinsic::hexagon_brev_ldd) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i64); + opc = Hexagon::L2_loadrd_pbr_pseudo; + memsize = 8; + align = 8; + } else if (IntNo == Intrinsic::hexagon_brev_ldw) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadri_pbr_pseudo; + memsize = 4; + align = 4; + } else if (IntNo == Intrinsic::hexagon_brev_ldh) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadrh_pbr_pseudo; + memsize = 2; + align = 2; + MvtSize = MVT::i16; + } else if (IntNo == Intrinsic::hexagon_brev_lduh) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadruh_pbr_pseudo; + memsize = 2; + align = 2; + MvtSize = MVT::i16; + } else if (IntNo == Intrinsic::hexagon_brev_ldb) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadrb_pbr_pseudo; + memsize = 1; + align = 1; + MvtSize = MVT::i8; + } else if (IntNo == Intrinsic::hexagon_brev_ldub) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadrub_pbr_pseudo; + memsize = 1; + align = 1; + MvtSize = MVT::i8; + } else + llvm_unreachable("no opc"); + + ResTys.push_back(MVT::Other); + + // Copy over the arguments, which are the same mostly. + SmallVector<SDValue, 4> Ops; + Ops.push_back(Base); + Ops.push_back(Load); + Ops.push_back(ModifierExpr); + Ops.push_back(Chain); + SDNode* Result = CurDAG->getMachineNode(opc, dl, ResTys, Ops); + SDValue ST; + MachineMemOperand *Mem = + MF->getMachineMemOperand(MachinePointerInfo(), + MachineMemOperand::MOStore, memsize, align); + if (MvtSize != MVT::i32) + ST = CurDAG->getTruncStore(Chain, dl, SDValue(Result, 1), Load, + MvtSize, Mem); + else + ST = CurDAG->getStore(Chain, dl, SDValue(Result, 1), Load, Mem); + + SDNode* Store = SelectStore(ST.getNode()); + + const SDValue Froms[] = { SDValue(N, 0), + SDValue(N, 1) }; + const SDValue Tos[] = { SDValue(Result, 0), + SDValue(Store, 0) }; + ReplaceUses(Froms, Tos, 2); + return Result; + } + + return SelectCode(N); +} // // Checking for intrinsics which have predicate registers as operand(s) // and lowering to the actual intrinsic. // SDNode *HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) { - SDLoc dl(N); - unsigned ID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); - unsigned IntrinsicWithPred = doesIntrinsicContainPredicate(ID); - - // We are concerned with only those intrinsics that have predicate registers - // as at least one of the operands. - if (IntrinsicWithPred) { - SmallVector<SDValue, 8> Ops; - const HexagonInstrInfo *TII = static_cast<const HexagonInstrInfo *>( - TM.getSubtargetImpl()->getInstrInfo()); - const MCInstrDesc &MCID = TII->get(IntrinsicWithPred); - const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo(); - - // Iterate over all the operands of the intrinsics. - // For PredRegs, do the transfer. - // For Double/Int Regs, just preserve the value - // For immediates, lower it. - for (unsigned i = 1; i < N->getNumOperands(); ++i) { - SDNode *Arg = N->getOperand(i).getNode(); - const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI, *MF); - - if (RC == &Hexagon::IntRegsRegClass || - RC == &Hexagon::DoubleRegsRegClass) { - Ops.push_back(SDValue(Arg, 0)); - } else if (RC == &Hexagon::PredRegsRegClass) { - // Do the transfer. - SDNode *PdRs = CurDAG->getMachineNode(Hexagon::C2_tfrrp, dl, MVT::i1, - SDValue(Arg, 0)); - Ops.push_back(SDValue(PdRs,0)); - } else if (!RC && (dyn_cast<ConstantSDNode>(Arg) != nullptr)) { - // This is immediate operand. Lower it here making sure that we DO have - // const SDNode for immediate value. - int32_t Val = cast<ConstantSDNode>(Arg)->getSExtValue(); - SDValue SDVal = CurDAG->getTargetConstant(Val, MVT::i32); - Ops.push_back(SDVal); - } else { - llvm_unreachable("Unimplemented"); - } - } - EVT ReturnValueVT = N->getValueType(0); - SDNode *Result = CurDAG->getMachineNode(IntrinsicWithPred, dl, - ReturnValueVT, Ops); - ReplaceUses(N, Result); - return Result; + unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + unsigned Bits; + switch (IID) { + case Intrinsic::hexagon_S2_vsplatrb: + Bits = 8; + break; + case Intrinsic::hexagon_S2_vsplatrh: + Bits = 16; + break; + default: + return SelectCode(N); + } + + SDValue const &V = N->getOperand(1); + SDValue U; + if (isValueExtension(V, Bits, U)) { + SDValue R = CurDAG->getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), + N->getOperand(0), U); + return SelectCode(R.getNode()); } return SelectCode(N); } @@ -1267,47 +1024,30 @@ SDNode *HexagonDAGToDAGISel::SelectConstantFP(SDNode *N) { APFloat APF = CN->getValueAPF(); if (N->getValueType(0) == MVT::f32) { return CurDAG->getMachineNode(Hexagon::TFRI_f, dl, MVT::f32, - CurDAG->getTargetConstantFP(APF.convertToFloat(), MVT::f32)); + CurDAG->getTargetConstantFP(APF.convertToFloat(), dl, MVT::f32)); } else if (N->getValueType(0) == MVT::f64) { return CurDAG->getMachineNode(Hexagon::CONST64_Float_Real, dl, MVT::f64, - CurDAG->getTargetConstantFP(APF.convertToDouble(), MVT::f64)); + CurDAG->getTargetConstantFP(APF.convertToDouble(), dl, MVT::f64)); } return SelectCode(N); } - // // Map predicate true (encoded as -1 in LLVM) to a XOR. // SDNode *HexagonDAGToDAGISel::SelectConstant(SDNode *N) { SDLoc dl(N); if (N->getValueType(0) == MVT::i1) { - SDNode* Result; + SDNode* Result = 0; int32_t Val = cast<ConstantSDNode>(N)->getSExtValue(); if (Val == -1) { - // Create the IntReg = 1 node. - SDNode* IntRegTFR = - CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, - CurDAG->getTargetConstant(0, MVT::i32)); - - // Pd = IntReg - SDNode* Pd = CurDAG->getMachineNode(Hexagon::C2_tfrrp, dl, MVT::i1, - SDValue(IntRegTFR, 0)); - - // not(Pd) - SDNode* NotPd = CurDAG->getMachineNode(Hexagon::C2_not, dl, MVT::i1, - SDValue(Pd, 0)); - - // xor(not(Pd)) - Result = CurDAG->getMachineNode(Hexagon::C2_xor, dl, MVT::i1, - SDValue(Pd, 0), SDValue(NotPd, 0)); - - // We have just built: - // Rs = Pd - // Pd = xor(not(Pd), Pd) - + Result = CurDAG->getMachineNode(Hexagon::TFR_PdTrue, dl, MVT::i1); + } else if (Val == 0) { + Result = CurDAG->getMachineNode(Hexagon::TFR_PdFalse, dl, MVT::i1); + } + if (Result) { ReplaceUses(N, Result); return Result; } @@ -1343,6 +1083,175 @@ SDNode *HexagonDAGToDAGISel::SelectAdd(SDNode *N) { return Result; } +// +// Map the following, where possible. +// AND/FABS -> clrbit +// OR -> setbit +// XOR/FNEG ->toggle_bit. +// +SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) { + SDLoc dl(N); + EVT ValueVT = N->getValueType(0); + + // We handle only 32 and 64-bit bit ops. + if (!(ValueVT == MVT::i32 || ValueVT == MVT::i64 || + ValueVT == MVT::f32 || ValueVT == MVT::f64)) + return SelectCode(N); + + // We handly only fabs and fneg for V5. + unsigned Opc = N->getOpcode(); + if ((Opc == ISD::FABS || Opc == ISD::FNEG) && !HST->hasV5TOps()) + return SelectCode(N); + + int64_t Val = 0; + if (Opc != ISD::FABS && Opc != ISD::FNEG) { + if (N->getOperand(1).getOpcode() == ISD::Constant) + Val = cast<ConstantSDNode>((N)->getOperand(1))->getSExtValue(); + else + return SelectCode(N); + } + + if (Opc == ISD::AND) { + if (((ValueVT == MVT::i32) && + (!((Val & 0x80000000) || (Val & 0x7fffffff)))) || + ((ValueVT == MVT::i64) && + (!((Val & 0x8000000000000000) || (Val & 0x7fffffff))))) + // If it's simple AND, do the normal op. + return SelectCode(N); + else + Val = ~Val; + } + + // If OR or AND is being fed by shl, srl and, sra don't do this change, + // because Hexagon provide |= &= on shl, srl, and sra. + // Traverse the DAG to see if there is shl, srl and sra. + if (Opc == ISD::OR || Opc == ISD::AND) { + switch (N->getOperand(0)->getOpcode()) { + default: break; + case ISD::SRA: + case ISD::SRL: + case ISD::SHL: + return SelectCode(N); + } + } + + // Make sure it's power of 2. + unsigned bitpos = 0; + if (Opc != ISD::FABS && Opc != ISD::FNEG) { + if (((ValueVT == MVT::i32) && !isPowerOf2_32(Val)) || + ((ValueVT == MVT::i64) && !isPowerOf2_64(Val))) + return SelectCode(N); + + // Get the bit position. + bitpos = countTrailingZeros(uint64_t(Val)); + } else { + // For fabs and fneg, it's always the 31st bit. + bitpos = 31; + } + + unsigned BitOpc = 0; + // Set the right opcode for bitwise operations. + switch(Opc) { + default: llvm_unreachable("Only bit-wise/abs/neg operations are allowed."); + case ISD::AND: + case ISD::FABS: + BitOpc = Hexagon::S2_clrbit_i; + break; + case ISD::OR: + BitOpc = Hexagon::S2_setbit_i; + break; + case ISD::XOR: + case ISD::FNEG: + BitOpc = Hexagon::S2_togglebit_i; + break; + } + + SDNode *Result; + // Get the right SDVal for the opcode. + SDValue SDVal = CurDAG->getTargetConstant(bitpos, dl, MVT::i32); + + if (ValueVT == MVT::i32 || ValueVT == MVT::f32) { + Result = CurDAG->getMachineNode(BitOpc, dl, ValueVT, + N->getOperand(0), SDVal); + } else { + // 64-bit gymnastic to use REG_SEQUENCE. But it's worth it. + EVT SubValueVT; + if (ValueVT == MVT::i64) + SubValueVT = MVT::i32; + else + SubValueVT = MVT::f32; + + SDNode *Reg = N->getOperand(0).getNode(); + SDValue RegClass = CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID, + dl, MVT::i64); + + SDValue SubregHiIdx = CurDAG->getTargetConstant(Hexagon::subreg_hireg, dl, + MVT::i32); + SDValue SubregLoIdx = CurDAG->getTargetConstant(Hexagon::subreg_loreg, dl, + MVT::i32); + + SDValue SubregHI = CurDAG->getTargetExtractSubreg(Hexagon::subreg_hireg, dl, + MVT::i32, SDValue(Reg, 0)); + + SDValue SubregLO = CurDAG->getTargetExtractSubreg(Hexagon::subreg_loreg, dl, + MVT::i32, SDValue(Reg, 0)); + + // Clear/set/toggle hi or lo registers depending on the bit position. + if (SubValueVT != MVT::f32 && bitpos < 32) { + SDNode *Result0 = CurDAG->getMachineNode(BitOpc, dl, SubValueVT, + SubregLO, SDVal); + const SDValue Ops[] = { RegClass, SubregHI, SubregHiIdx, + SDValue(Result0, 0), SubregLoIdx }; + Result = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, + dl, ValueVT, Ops); + } else { + if (Opc != ISD::FABS && Opc != ISD::FNEG) + SDVal = CurDAG->getTargetConstant(bitpos - 32, dl, MVT::i32); + SDNode *Result0 = CurDAG->getMachineNode(BitOpc, dl, SubValueVT, + SubregHI, SDVal); + const SDValue Ops[] = { RegClass, SDValue(Result0, 0), SubregHiIdx, + SubregLO, SubregLoIdx }; + Result = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, + dl, ValueVT, Ops); + } + } + + ReplaceUses(N, Result); + return Result; +} + + +SDNode *HexagonDAGToDAGISel::SelectFrameIndex(SDNode *N) { + MachineFrameInfo *MFI = MF->getFrameInfo(); + const HexagonFrameLowering *HFI = HST->getFrameLowering(); + int FX = cast<FrameIndexSDNode>(N)->getIndex(); + unsigned StkA = HFI->getStackAlignment(); + unsigned MaxA = MFI->getMaxAlignment(); + SDValue FI = CurDAG->getTargetFrameIndex(FX, MVT::i32); + SDLoc DL(N); + SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); + SDNode *R = 0; + + // Use TFR_FI when: + // - the object is fixed, or + // - there are no objects with higher-than-default alignment, or + // - there are no dynamically allocated objects. + // Otherwise, use TFR_FIA. + if (FX < 0 || MaxA <= StkA || !MFI->hasVarSizedObjects()) { + R = CurDAG->getMachineNode(Hexagon::TFR_FI, DL, MVT::i32, FI, Zero); + } else { + auto &HMFI = *MF->getInfo<HexagonMachineFunctionInfo>(); + unsigned AR = HMFI.getStackAlignBaseVReg(); + SDValue CH = CurDAG->getEntryNode(); + SDValue Ops[] = { CurDAG->getCopyFromReg(CH, DL, AR, MVT::i32), FI, Zero }; + R = CurDAG->getMachineNode(Hexagon::TFR_FIA, DL, MVT::i32, Ops); + } + + if (N->getHasDebugValue()) + CurDAG->TransferDbgValues(SDValue(N, 0), SDValue(R, 0)); + return R; +} + SDNode *HexagonDAGToDAGISel::Select(SDNode *N) { if (N->isMachineOpcode()) { @@ -1350,7 +1259,6 @@ SDNode *HexagonDAGToDAGISel::Select(SDNode *N) { return nullptr; // Already selected. } - switch (N->getOpcode()) { case ISD::Constant: return SelectConstant(N); @@ -1358,6 +1266,9 @@ SDNode *HexagonDAGToDAGISel::Select(SDNode *N) { case ISD::ConstantFP: return SelectConstantFP(N); + case ISD::FrameIndex: + return SelectFrameIndex(N); + case ISD::ADD: return SelectAdd(N); @@ -1370,18 +1281,22 @@ SDNode *HexagonDAGToDAGISel::Select(SDNode *N) { case ISD::STORE: return SelectStore(N); - case ISD::SELECT: - return SelectSelect(N); - - case ISD::TRUNCATE: - return SelectTruncate(N); - case ISD::MUL: return SelectMul(N); + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::FABS: + case ISD::FNEG: + return SelectBitOp(N); + case ISD::ZERO_EXTEND: return SelectZeroExtend(N); + case ISD::INTRINSIC_W_CHAIN: + return SelectIntrinsicWChain(N); + case ISD::INTRINSIC_WO_CHAIN: return SelectIntrinsicWOChain(N); } @@ -1389,297 +1304,217 @@ SDNode *HexagonDAGToDAGISel::Select(SDNode *N) { return SelectCode(N); } +bool HexagonDAGToDAGISel:: +SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, + std::vector<SDValue> &OutOps) { + SDValue Inp = Op, Res; -// -// Hexagon_TODO: Five functions for ADDRri?! Surely there must be a better way -// to define these instructions. -// -bool HexagonDAGToDAGISel::SelectADDRri(SDValue& Addr, SDValue &Base, - SDValue &Offset) { - if (Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress) - return false; // Direct calls. - - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); - Offset = CurDAG->getTargetConstant(0, MVT::i32); + switch (ConstraintID) { + default: return true; + case InlineAsm::Constraint_i: + case InlineAsm::Constraint_o: // Offsetable. + case InlineAsm::Constraint_v: // Not offsetable. + case InlineAsm::Constraint_m: // Memory. + if (SelectAddrFI(Inp, Res)) + OutOps.push_back(Res); + else + OutOps.push_back(Inp); + break; } - Base = Addr; - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return true; -} - - -bool HexagonDAGToDAGISel::SelectADDRriS11_0(SDValue& Addr, SDValue &Base, - SDValue &Offset) { - if (Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress) - return false; // Direct calls. - - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsS11_0_Offset(Offset.getNode())); - } - Base = Addr; - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsS11_0_Offset(Offset.getNode())); -} - - -bool HexagonDAGToDAGISel::SelectADDRriS11_1(SDValue& Addr, SDValue &Base, - SDValue &Offset) { - if (Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress) - return false; // Direct calls. - - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsS11_1_Offset(Offset.getNode())); - } - Base = Addr; - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsS11_1_Offset(Offset.getNode())); -} - - -bool HexagonDAGToDAGISel::SelectADDRriS11_2(SDValue& Addr, SDValue &Base, - SDValue &Offset) { - if (Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress) - return false; // Direct calls. - - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsS11_2_Offset(Offset.getNode())); - } - Base = Addr; - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsS11_2_Offset(Offset.getNode())); -} - -bool HexagonDAGToDAGISel::SelectADDRriU6_0(SDValue& Addr, SDValue &Base, - SDValue &Offset) { - if (Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress) - return false; // Direct calls. - - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsU6_0_Offset(Offset.getNode())); - } - Base = Addr; - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsU6_0_Offset(Offset.getNode())); + OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32)); + return false; } +void HexagonDAGToDAGISel::PreprocessISelDAG() { + SelectionDAG &DAG = *CurDAG; + std::vector<SDNode*> Nodes; + for (auto I = DAG.allnodes_begin(), E = DAG.allnodes_end(); I != E; ++I) + Nodes.push_back(I); + + // Simplify: (or (select c x 0) z) -> (select c (or x z) z) + // (or (select c 0 y) z) -> (select c z (or y z)) + // This may not be the right thing for all targets, so do it here. + for (auto I: Nodes) { + if (I->getOpcode() != ISD::OR) + continue; + + auto IsZero = [] (const SDValue &V) -> bool { + if (ConstantSDNode *SC = dyn_cast<ConstantSDNode>(V.getNode())) + return SC->isNullValue(); + return false; + }; + auto IsSelect0 = [IsZero] (const SDValue &Op) -> bool { + if (Op.getOpcode() != ISD::SELECT) + return false; + return IsZero(Op.getOperand(1)) || IsZero(Op.getOperand(2)); + }; -bool HexagonDAGToDAGISel::SelectADDRriU6_1(SDValue& Addr, SDValue &Base, - SDValue &Offset) { - if (Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress) - return false; // Direct calls. - - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsU6_1_Offset(Offset.getNode())); + SDValue N0 = I->getOperand(0), N1 = I->getOperand(1); + EVT VT = I->getValueType(0); + bool SelN0 = IsSelect0(N0); + SDValue SOp = SelN0 ? N0 : N1; + SDValue VOp = SelN0 ? N1 : N0; + + if (SOp.getOpcode() == ISD::SELECT && SOp.getNode()->hasOneUse()) { + SDValue SC = SOp.getOperand(0); + SDValue SX = SOp.getOperand(1); + SDValue SY = SOp.getOperand(2); + SDLoc DLS = SOp; + if (IsZero(SY)) { + SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SX, VOp); + SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, NewOr, VOp); + DAG.ReplaceAllUsesWith(I, NewSel.getNode()); + } else if (IsZero(SX)) { + SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SY, VOp); + SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, VOp, NewOr); + DAG.ReplaceAllUsesWith(I, NewSel.getNode()); + } + } } - Base = Addr; - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsU6_1_Offset(Offset.getNode())); } - -bool HexagonDAGToDAGISel::SelectADDRriU6_2(SDValue& Addr, SDValue &Base, - SDValue &Offset) { - if (Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress) - return false; // Direct calls. - - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsU6_2_Offset(Offset.getNode())); - } - Base = Addr; - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsU6_2_Offset(Offset.getNode())); +void HexagonDAGToDAGISel::EmitFunctionEntryCode() { + auto &HST = static_cast<const HexagonSubtarget&>(MF->getSubtarget()); + auto &HFI = *HST.getFrameLowering(); + if (!HFI.needsAligna(*MF)) + return; + + MachineFrameInfo *MFI = MF->getFrameInfo(); + MachineBasicBlock *EntryBB = MF->begin(); + unsigned AR = FuncInfo->CreateReg(MVT::i32); + unsigned MaxA = MFI->getMaxAlignment(); + auto &HII = *HST.getInstrInfo(); + BuildMI(EntryBB, DebugLoc(), HII.get(Hexagon::ALIGNA), AR) + .addImm(MaxA); + MF->getInfo<HexagonMachineFunctionInfo>()->setStackAlignBaseVReg(AR); } - -bool HexagonDAGToDAGISel::SelectMEMriS11_2(SDValue& Addr, SDValue &Base, - SDValue &Offset) { - - if (Addr.getOpcode() != ISD::ADD) { - return(SelectADDRriS11_2(Addr, Base, Offset)); - } - - return SelectADDRriS11_2(Addr, Base, Offset); +// Match a frame index that can be used in an addressing mode. +bool HexagonDAGToDAGISel::SelectAddrFI(SDValue& N, SDValue &R) { + if (N.getOpcode() != ISD::FrameIndex) + return false; + auto &HFI = *HST->getFrameLowering(); + MachineFrameInfo *MFI = MF->getFrameInfo(); + int FX = cast<FrameIndexSDNode>(N)->getIndex(); + if (!MFI->isFixedObjectIndex(FX) && HFI.needsAligna(*MF)) + return false; + R = CurDAG->getTargetFrameIndex(FX, MVT::i32); + return true; } - -bool HexagonDAGToDAGISel::SelectADDRriS11_3(SDValue& Addr, SDValue &Base, - SDValue &Offset) { - if (Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress) - return false; // Direct calls. - - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsS11_3_Offset(Offset.getNode())); - } - Base = Addr; - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsS11_3_Offset(Offset.getNode())); +inline bool HexagonDAGToDAGISel::SelectAddrGA(SDValue &N, SDValue &R) { + return SelectGlobalAddress(N, R, false); } -bool HexagonDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, - SDValue &R2) { - if (Addr.getOpcode() == ISD::FrameIndex) return false; - if (Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress) - return false; // Direct calls. - - if (Addr.getOpcode() == ISD::ADD) { - if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) - if (isInt<13>(CN->getSExtValue())) - return false; // Let the reg+imm pattern catch this! - R1 = Addr.getOperand(0); - R2 = Addr.getOperand(1); - return true; - } - - R1 = Addr; - - return true; +inline bool HexagonDAGToDAGISel::SelectAddrGP(SDValue &N, SDValue &R) { + return SelectGlobalAddress(N, R, true); } - -// Handle generic address case. It is accessed from inlined asm =m constraints, -// which could have any kind of pointer. -bool HexagonDAGToDAGISel::SelectAddr(SDNode *Op, SDValue Addr, - SDValue &Base, SDValue &Offset) { - if (Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress) - return false; // Direct calls. - - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return true; +bool HexagonDAGToDAGISel::SelectGlobalAddress(SDValue &N, SDValue &R, + bool UseGP) { + switch (N.getOpcode()) { + case ISD::ADD: { + SDValue N0 = N.getOperand(0); + SDValue N1 = N.getOperand(1); + unsigned GAOpc = N0.getOpcode(); + if (UseGP && GAOpc != HexagonISD::CONST32_GP) + return false; + if (!UseGP && GAOpc != HexagonISD::CONST32) + return false; + if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N1)) { + SDValue Addr = N0.getOperand(0); + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Addr)) { + if (GA->getOpcode() == ISD::TargetGlobalAddress) { + uint64_t NewOff = GA->getOffset() + (uint64_t)Const->getSExtValue(); + R = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(Const), + N.getValueType(), NewOff); + return true; + } + } + } + break; } - - if (Addr.getOpcode() == ISD::ADD) { - Base = Addr.getOperand(0); - Offset = Addr.getOperand(1); - return true; + case HexagonISD::CONST32: + // The operand(0) of CONST32 is TargetGlobalAddress, which is what we + // want in the instruction. + if (!UseGP) + R = N.getOperand(0); + return !UseGP; + case HexagonISD::CONST32_GP: + if (UseGP) + R = N.getOperand(0); + return UseGP; + default: + return false; } - Base = Addr; - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return true; + return false; } - -bool HexagonDAGToDAGISel:: -SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, - std::vector<SDValue> &OutOps) { - SDValue Op0, Op1; - - switch (ConstraintCode) { - case 'o': // Offsetable. - case 'v': // Not offsetable. - default: return true; - case 'm': // Memory. - if (!SelectAddr(Op.getNode(), Op, Op0, Op1)) +bool HexagonDAGToDAGISel::isValueExtension(const SDValue &Val, + unsigned FromBits, SDValue &Src) { + unsigned Opc = Val.getOpcode(); + switch (Opc) { + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: { + SDValue const &Op0 = Val.getOperand(0); + EVT T = Op0.getValueType(); + if (T.isInteger() && T.getSizeInBits() == FromBits) { + Src = Op0; return true; + } break; } - - OutOps.push_back(Op0); - OutOps.push_back(Op1); - return false; -} - -bool HexagonDAGToDAGISel::isConstExtProfitable(SDNode *N) const { - unsigned UseCount = 0; - for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { - UseCount++; + case ISD::SIGN_EXTEND_INREG: + case ISD::AssertSext: + case ISD::AssertZext: + if (Val.getOperand(0).getValueType().isInteger()) { + VTSDNode *T = cast<VTSDNode>(Val.getOperand(1)); + if (T->getVT().getSizeInBits() == FromBits) { + Src = Val.getOperand(0); + return true; + } + } + break; + case ISD::AND: { + // Check if this is an AND with "FromBits" of lower bits set to 1. + uint64_t FromMask = (1 << FromBits) - 1; + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(0))) { + if (C->getZExtValue() == FromMask) { + Src = Val.getOperand(1); + return true; + } + } + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(1))) { + if (C->getZExtValue() == FromMask) { + Src = Val.getOperand(0); + return true; + } + } + break; } - - return (UseCount <= 1); - -} - -//===--------------------------------------------------------------------===// -// Return 'true' if use count of the global address is below threshold. -//===--------------------------------------------------------------------===// -bool HexagonDAGToDAGISel::hasNumUsesBelowThresGA(SDNode *N) const { - assert(N->getOpcode() == ISD::TargetGlobalAddress && - "Expecting a target global address"); - - // Always try to fold the address. - if (TM.getOptLevel() == CodeGenOpt::Aggressive) - return true; - - GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N); - DenseMap<const GlobalValue *, unsigned>::const_iterator GI = - GlobalAddressUseCountMap.find(GA->getGlobal()); - - if (GI == GlobalAddressUseCountMap.end()) - return false; - - return GI->second <= MaxNumOfUsesForConstExtenders; -} - -//===--------------------------------------------------------------------===// -// Return true if the non-GP-relative global address can be folded. -//===--------------------------------------------------------------------===// -inline bool HexagonDAGToDAGISel::foldGlobalAddress(SDValue &N, SDValue &R) { - return foldGlobalAddressImpl(N, R, false); -} - -//===--------------------------------------------------------------------===// -// Return true if the GP-relative global address can be folded. -//===--------------------------------------------------------------------===// -inline bool HexagonDAGToDAGISel::foldGlobalAddressGP(SDValue &N, SDValue &R) { - return foldGlobalAddressImpl(N, R, true); -} - -//===--------------------------------------------------------------------===// -// Fold offset of the global address if number of uses are below threshold. -//===--------------------------------------------------------------------===// -bool HexagonDAGToDAGISel::foldGlobalAddressImpl(SDValue &N, SDValue &R, - bool ShouldLookForGP) { - if (N.getOpcode() == ISD::ADD) { - SDValue N0 = N.getOperand(0); - SDValue N1 = N.getOperand(1); - if ((ShouldLookForGP && (N0.getOpcode() == HexagonISD::CONST32_GP)) || - (!ShouldLookForGP && (N0.getOpcode() == HexagonISD::CONST32))) { - ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N1); - GlobalAddressSDNode *GA = - dyn_cast<GlobalAddressSDNode>(N0.getOperand(0)); - - if (Const && GA && - (GA->getOpcode() == ISD::TargetGlobalAddress)) { - if ((N0.getOpcode() == HexagonISD::CONST32) && - !hasNumUsesBelowThresGA(GA)) - return false; - R = CurDAG->getTargetGlobalAddress(GA->getGlobal(), - SDLoc(Const), - N.getValueType(), - GA->getOffset() + - (uint64_t)Const->getSExtValue()); + case ISD::OR: + case ISD::XOR: { + // OR/XOR with the lower "FromBits" bits set to 0. + uint64_t FromMask = (1 << FromBits) - 1; + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(0))) { + if ((C->getZExtValue() & FromMask) == 0) { + Src = Val.getOperand(1); + return true; + } + } + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(1))) { + if ((C->getZExtValue() & FromMask) == 0) { + Src = Val.getOperand(0); return true; } } } + default: + break; + } return false; } diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index ef5d6b97fd6f..ed5676c1fbb6 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -43,11 +43,48 @@ using namespace llvm; static cl::opt<bool> EmitJumpTables("hexagon-emit-jump-tables", cl::init(true), cl::Hidden, - cl::desc("Control jump table emission on Hexagon target")); + cl::desc("Control jump table emission on Hexagon target")); + +static cl::opt<bool> EnableHexSDNodeSched("enable-hexagon-sdnode-sched", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Enable Hexagon SDNode scheduling")); + +static cl::opt<bool> EnableFastMath("ffast-math", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Enable Fast Math processing")); + +static cl::opt<int> MinimumJumpTables("minimum-jump-tables", + cl::Hidden, cl::ZeroOrMore, cl::init(5), + cl::desc("Set minimum jump tables")); + +static cl::opt<int> MaxStoresPerMemcpyCL("max-store-memcpy", + cl::Hidden, cl::ZeroOrMore, cl::init(6), + cl::desc("Max #stores to inline memcpy")); + +static cl::opt<int> MaxStoresPerMemcpyOptSizeCL("max-store-memcpy-Os", + cl::Hidden, cl::ZeroOrMore, cl::init(4), + cl::desc("Max #stores to inline memcpy")); + +static cl::opt<int> MaxStoresPerMemmoveCL("max-store-memmove", + cl::Hidden, cl::ZeroOrMore, cl::init(6), + cl::desc("Max #stores to inline memmove")); + +static cl::opt<int> MaxStoresPerMemmoveOptSizeCL("max-store-memmove-Os", + cl::Hidden, cl::ZeroOrMore, cl::init(4), + cl::desc("Max #stores to inline memmove")); + +static cl::opt<int> MaxStoresPerMemsetCL("max-store-memset", + cl::Hidden, cl::ZeroOrMore, cl::init(8), + cl::desc("Max #stores to inline memset")); + +static cl::opt<int> MaxStoresPerMemsetOptSizeCL("max-store-memset-Os", + cl::Hidden, cl::ZeroOrMore, cl::init(4), + cl::desc("Max #stores to inline memset")); + namespace { class HexagonCCState : public CCState { - int NumNamedVarArgParams; + unsigned NumNamedVarArgParams; public: HexagonCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, @@ -56,7 +93,7 @@ public: : CCState(CC, isVarArg, MF, locs, C), NumNamedVarArgParams(NumNamedVarArgParams) {} - int getNumNamedVarArgParams() const { return NumNamedVarArgParams; } + unsigned getNumNamedVarArgParams() const { return NumNamedVarArgParams; } }; } @@ -97,11 +134,7 @@ CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT, ISD::ArgFlagsTy ArgFlags, CCState &State) { HexagonCCState &HState = static_cast<HexagonCCState &>(State); - // NumNamedVarArgParams can not be zero for a VarArg function. - assert((HState.getNumNamedVarArgParams() > 0) && - "NumNamedVarArgParams is not bigger than zero."); - - if ((int)ValNo < HState.getNumNamedVarArgParams()) { + if (ValNo < HState.getNumNamedVarArgParams()) { // Deal with named arguments. return CC_Hexagon(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State); } @@ -111,9 +144,8 @@ CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT, if (ArgFlags.isByVal()) { // If pass-by-value, the size allocated on stack is decided // by ArgFlags.getByValSize(), not by the size of LocVT. - assert ((ArgFlags.getByValSize() > 8) && - "ByValSize must be bigger than 8 bytes"); - ofst = State.AllocateStack(ArgFlags.getByValSize(), 4); + ofst = State.AllocateStack(ArgFlags.getByValSize(), + ArgFlags.getByValAlign()); State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); return false; } @@ -148,9 +180,8 @@ CC_Hexagon (unsigned ValNo, MVT ValVT, if (ArgFlags.isByVal()) { // Passed on stack. - assert ((ArgFlags.getByValSize() > 8) && - "ByValSize must be bigger than 8 bytes"); - unsigned Offset = State.AllocateStack(ArgFlags.getByValSize(), 4); + unsigned Offset = State.AllocateStack(ArgFlags.getByValSize(), + ArgFlags.getByValAlign()); State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); return false; } @@ -164,6 +195,12 @@ CC_Hexagon (unsigned ValNo, MVT ValVT, LocInfo = CCValAssign::ZExt; else LocInfo = CCValAssign::AExt; + } else if (LocVT == MVT::v4i8 || LocVT == MVT::v2i16) { + LocVT = MVT::i32; + LocInfo = CCValAssign::BCvt; + } else if (LocVT == MVT::v8i8 || LocVT == MVT::v4i16 || LocVT == MVT::v2i32) { + LocVT = MVT::i64; + LocInfo = CCValAssign::BCvt; } if (LocVT == MVT::i32 || LocVT == MVT::f32) { @@ -188,7 +225,7 @@ static bool CC_Hexagon32(unsigned ValNo, MVT ValVT, Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4, Hexagon::R5 }; - if (unsigned Reg = State.AllocateReg(RegList, 6)) { + if (unsigned Reg = State.AllocateReg(RegList)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } @@ -213,7 +250,7 @@ static bool CC_Hexagon64(unsigned ValNo, MVT ValVT, static const MCPhysReg RegList2[] = { Hexagon::R1, Hexagon::R3 }; - if (unsigned Reg = State.AllocateReg(RegList1, RegList2, 2)) { + if (unsigned Reg = State.AllocateReg(RegList1, RegList2)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } @@ -239,6 +276,12 @@ static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT, LocInfo = CCValAssign::ZExt; else LocInfo = CCValAssign::AExt; + } else if (LocVT == MVT::v4i8 || LocVT == MVT::v2i16) { + LocVT = MVT::i32; + LocInfo = CCValAssign::BCvt; + } else if (LocVT == MVT::v8i8 || LocVT == MVT::v4i16 || LocVT == MVT::v2i32) { + LocVT = MVT::i64; + LocInfo = CCValAssign::BCvt; } if (LocVT == MVT::i32 || LocVT == MVT::f32) { @@ -301,9 +344,10 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, SDLoc dl) { - SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); + SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32); return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), /*isVolatile=*/false, /*AlwaysInline=*/false, + /*isTailCall=*/false, MachinePointerInfo(), MachinePointerInfo()); } @@ -351,8 +395,13 @@ HexagonTargetLowering::LowerReturn(SDValue Chain, return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, RetOps); } +bool HexagonTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { + // If either no tail call or told not to tail call at all, don't. + if (!CI->isTailCall() || HTM.Options.DisableTailCalls) + return false; - + return true; +} /// LowerCallResult - Lower the result values of an ISD::CALL into the /// appropriate copies out of appropriate physical registers. This assumes that @@ -404,8 +453,10 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool &isTailCall = CLI.IsTailCall; CallingConv::ID CallConv = CLI.CallConv; bool isVarArg = CLI.IsVarArg; + bool doesNotReturn = CLI.DoesNotReturn; bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); + MachineFunction &MF = DAG.getMachineFunction(); // Check for varargs. int NumNamedVarArgParams = -1; @@ -430,42 +481,39 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, HexagonCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext(), NumNamedVarArgParams); - if (NumNamedVarArgParams > 0) + if (isVarArg) CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_VarArg); else CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon); + if (DAG.getTarget().Options.DisableTailCalls) + isTailCall = false; - if(isTailCall) { - bool StructAttrFlag = - DAG.getMachineFunction().getFunction()->hasStructRetAttr(); + if (isTailCall) { + bool StructAttrFlag = MF.getFunction()->hasStructRetAttr(); isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, IsStructRet, StructAttrFlag, Outs, OutVals, Ins, DAG); - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i){ + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; if (VA.isMemLoc()) { isTailCall = false; break; } } - if (isTailCall) { - DEBUG(dbgs () << "Eligible for Tail Call\n"); - } else { - DEBUG(dbgs () << - "Argument must be passed on stack. Not eligible for Tail Call\n"); - } + DEBUG(dbgs() << (isTailCall ? "Eligible for Tail Call\n" + : "Argument must be passed on stack. " + "Not eligible for Tail Call\n")); } // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass; SmallVector<SDValue, 8> MemOpChains; - const HexagonRegisterInfo *QRI = static_cast<const HexagonRegisterInfo *>( - DAG.getSubtarget().getRegisterInfo()); - SDValue StackPtr = - DAG.getCopyFromReg(Chain, dl, QRI->getStackRegister(), getPointerTy()); + auto &HRI = *Subtarget.getRegisterInfo(); + SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, HRI.getStackRegister(), + getPointerTy()); // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { @@ -478,6 +526,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, default: // Loc info must be one of Full, SExt, ZExt, or AExt. llvm_unreachable("Unknown loc info!"); + case CCValAssign::BCvt: case CCValAssign::Full: break; case CCValAssign::SExt: @@ -493,41 +542,38 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (VA.isMemLoc()) { unsigned LocMemOffset = VA.getLocMemOffset(); - SDValue PtrOff = DAG.getConstant(LocMemOffset, StackPtr.getValueType()); - PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff); - + SDValue MemAddr = DAG.getConstant(LocMemOffset, dl, + StackPtr.getValueType()); + MemAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, MemAddr); if (Flags.isByVal()) { // The argument is a struct passed by value. According to LLVM, "Arg" // is is pointer. - MemOpChains.push_back(CreateCopyOfByValArgument(Arg, PtrOff, Chain, + MemOpChains.push_back(CreateCopyOfByValArgument(Arg, MemAddr, Chain, Flags, DAG, dl)); } else { - // The argument is not passed by value. "Arg" is a buildin type. It is - // not a pointer. - MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, - MachinePointerInfo(),false, false, - 0)); + MachinePointerInfo LocPI = MachinePointerInfo::getStack(LocMemOffset); + SDValue S = DAG.getStore(Chain, dl, Arg, MemAddr, LocPI, false, + false, 0); + MemOpChains.push_back(S); } continue; } // Arguments that can be passed on register must be kept at RegsToPass // vector. - if (VA.isRegLoc()) { + if (VA.isRegLoc()) RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); - } } // Transform all store nodes into one single node because all store // nodes are independent of each other. - if (!MemOpChains.empty()) { + if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); - } - if (!isTailCall) - Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes, - getPointerTy(), true), - dl); + if (!isTailCall) { + SDValue C = DAG.getConstant(NumBytes, dl, getPointerTy(), true); + Chain = DAG.getCALLSEQ_START(Chain, C, dl); + } // Build a sequence of copy-to-reg nodes chained together with token // chain and flag operands which copy the outgoing args into registers. @@ -540,10 +586,9 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } - } - - // For tail calls lower the arguments to the 'real' stack slot. - if (isTailCall) { + } else { + // For tail calls lower the arguments to the 'real' stack slot. + // // Force all the incoming stack arguments to be loaded from the stack // before any new outgoing arguments are stored to the stack, because the // outgoing stack slots may alias the incoming argument stack slots, and @@ -558,7 +603,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } - InFlag =SDValue(); + InFlag = SDValue(); } // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every @@ -567,8 +612,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (flag_aligned_memcpy) { const char *MemcpyName = "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes"; - Callee = - DAG.getTargetExternalSymbol(MemcpyName, getPointerTy()); + Callee = DAG.getTargetExternalSymbol(MemcpyName, getPointerTy()); flag_aligned_memcpy = false; } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy()); @@ -590,19 +634,21 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, RegsToPass[i].second.getValueType())); } - if (InFlag.getNode()) { + if (InFlag.getNode()) Ops.push_back(InFlag); - } - if (isTailCall) + if (isTailCall) { + MF.getFrameInfo()->setHasTailCall(); return DAG.getNode(HexagonISD::TC_RETURN, dl, NodeTys, Ops); + } - Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, Ops); + int OpCode = doesNotReturn ? HexagonISD::CALLv3nr : HexagonISD::CALLv3; + Chain = DAG.getNode(OpCode, dl, NodeTys, Ops); InFlag = Chain.getValue(1); // Create the CALLSEQ_END node. - Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), - DAG.getIntPtrConstant(0, true), InFlag, dl); + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), + DAG.getIntPtrConstant(0, dl, true), InFlag, dl); InFlag = Chain.getValue(1); // Handle result values, copying them out of physregs into vregs that we @@ -616,7 +662,7 @@ static bool getIndexedAddressParts(SDNode *Ptr, EVT VT, SDValue &Offset, bool &isInc, SelectionDAG &DAG) { if (Ptr->getOpcode() != ISD::ADD) - return false; + return false; if (VT == MVT::i64 || VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) { isInc = (Ptr->getOpcode() == ISD::ADD); @@ -688,8 +734,7 @@ SDValue HexagonTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); MachineFunction &MF = DAG.getMachineFunction(); - HexagonMachineFunctionInfo *FuncInfo = - MF.getInfo<HexagonMachineFunctionInfo>(); + auto &FuncInfo = *MF.getInfo<HexagonMachineFunctionInfo>(); switch (Node->getOpcode()) { case ISD::INLINEASM: { unsigned NumOps = Node->getNumOperands(); @@ -697,7 +742,7 @@ SDValue HexagonTargetLowering::LowerINLINEASM(SDValue Op, --NumOps; // Ignore the flag operand. for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { - if (FuncInfo->hasClobberLR()) + if (FuncInfo.hasClobberLR()) break; unsigned Flags = cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); @@ -720,11 +765,9 @@ SDValue HexagonTargetLowering::LowerINLINEASM(SDValue Op, cast<RegisterSDNode>(Node->getOperand(i))->getReg(); // Check it to be lr - const HexagonRegisterInfo *QRI = - static_cast<const HexagonRegisterInfo *>( - DAG.getSubtarget().getRegisterInfo()); + const HexagonRegisterInfo *QRI = Subtarget.getRegisterInfo(); if (Reg == QRI->getRARegister()) { - FuncInfo->setHasClobberLR(true); + FuncInfo.setHasClobberLR(true); break; } } @@ -765,10 +808,10 @@ LowerBR_JT(SDValue Op, SelectionDAG &DAG) const BlockAddress::get(const_cast<BasicBlock *>(MBB->getBasicBlock())); } - SDValue JumpTableBase = DAG.getNode(HexagonISD::WrapperJT, dl, + SDValue JumpTableBase = DAG.getNode(HexagonISD::JT, dl, getPointerTy(), TargetJT); SDValue ShiftIndex = DAG.getNode(ISD::SHL, dl, MVT::i32, Index, - DAG.getConstant(2, MVT::i32)); + DAG.getConstant(2, dl, MVT::i32)); SDValue JTAddress = DAG.getNode(ISD::ADD, dl, MVT::i32, JumpTableBase, ShiftIndex); SDValue LoadTarget = DAG.getLoad(MVT::i32, dl, Chain, JTAddress, @@ -783,44 +826,27 @@ HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Size = Op.getOperand(1); + SDValue Align = Op.getOperand(2); SDLoc dl(Op); - unsigned SPReg = getStackPointerRegisterToSaveRestore(); + ConstantSDNode *AlignConst = dyn_cast<ConstantSDNode>(Align); + assert(AlignConst && "Non-constant Align in LowerDYNAMIC_STACKALLOC"); - // Get a reference to the stack pointer. - SDValue StackPointer = DAG.getCopyFromReg(Chain, dl, SPReg, MVT::i32); + unsigned A = AlignConst->getSExtValue(); + auto &HFI = *Subtarget.getFrameLowering(); + // "Zero" means natural stack alignment. + if (A == 0) + A = HFI.getStackAlignment(); - // Subtract the dynamic size from the actual stack size to - // obtain the new stack size. - SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i32, StackPointer, Size); + DEBUG({ + dbgs () << LLVM_FUNCTION_NAME << " Align: " << A << " Size: "; + Size.getNode()->dump(&DAG); + dbgs() << "\n"; + }); - // - // For Hexagon, the outgoing memory arguments area should be on top of the - // alloca area on the stack i.e., the outgoing memory arguments should be - // at a lower address than the alloca area. Move the alloca area down the - // stack by adding back the space reserved for outgoing arguments to SP - // here. - // - // We do not know what the size of the outgoing args is at this point. - // So, we add a pseudo instruction ADJDYNALLOC that will adjust the - // stack pointer. We patch this instruction with the correct, known - // offset in emitPrologue(). - // - // Use a placeholder immediate (zero) for now. This will be patched up - // by emitPrologue(). - SDValue ArgAdjust = DAG.getNode(HexagonISD::ADJDYNALLOC, dl, - MVT::i32, - Sub, - DAG.getConstant(0, MVT::i32)); - - // The Sub result contains the new stack start address, so it - // must be placed in the stack pointer register. - const HexagonRegisterInfo *QRI = static_cast<const HexagonRegisterInfo *>( - DAG.getSubtarget().getRegisterInfo()); - SDValue CopyChain = DAG.getCopyToReg(Chain, dl, QRI->getStackRegister(), Sub); - - SDValue Ops[2] = { ArgAdjust, CopyChain }; - return DAG.getMergeValues(Ops, dl); + SDValue AC = DAG.getConstant(A, dl, MVT::i32); + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); + return DAG.getNode(HexagonISD::ALLOCA, dl, VTs, Chain, Size, AC); } SDValue @@ -836,9 +862,7 @@ const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); - HexagonMachineFunctionInfo *FuncInfo = - MF.getInfo<HexagonMachineFunctionInfo>(); - + auto &FuncInfo = *MF.getInfo<HexagonMachineFunctionInfo>(); // Assign locations to all of the incoming arguments. SmallVector<CCValAssign, 16> ArgLocs; @@ -875,7 +899,7 @@ const { RegInfo.createVirtualRegister(&Hexagon::IntRegsRegClass); RegInfo.addLiveIn(VA.getLocReg(), VReg); InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); - } else if (RegVT == MVT::i64) { + } else if (RegVT == MVT::i64 || RegVT == MVT::f64) { unsigned VReg = RegInfo.createVirtualRegister(&Hexagon::DoubleRegsRegClass); RegInfo.addLiveIn(VA.getLocReg(), VReg); @@ -927,7 +951,7 @@ const { HEXAGON_LRFP_SIZE + CCInfo.getNextStackOffset(), true); - FuncInfo->setVarArgsFrameIndex(FrameIndex); + FuncInfo.setVarArgsFrameIndex(FrameIndex); } return Chain; @@ -946,6 +970,192 @@ HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { false, 0); } +// Creates a SPLAT instruction for a constant value VAL. +static SDValue createSplat(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue Val) { + if (VT.getSimpleVT() == MVT::v4i8) + return DAG.getNode(HexagonISD::VSPLATB, dl, VT, Val); + + if (VT.getSimpleVT() == MVT::v4i16) + return DAG.getNode(HexagonISD::VSPLATH, dl, VT, Val); + + return SDValue(); +} + +static bool isSExtFree(SDValue N) { + // A sign-extend of a truncate of a sign-extend is free. + if (N.getOpcode() == ISD::TRUNCATE && + N.getOperand(0).getOpcode() == ISD::AssertSext) + return true; + // We have sign-extended loads. + if (N.getOpcode() == ISD::LOAD) + return true; + return false; +} + +SDValue HexagonTargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl(Op); + SDValue InpVal = Op.getOperand(0); + if (isa<ConstantSDNode>(InpVal)) { + uint64_t V = cast<ConstantSDNode>(InpVal)->getZExtValue(); + return DAG.getTargetConstant(countPopulation(V), dl, MVT::i64); + } + SDValue PopOut = DAG.getNode(HexagonISD::POPCOUNT, dl, MVT::i32, InpVal); + return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, PopOut); +} + +SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl(Op); + + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue Cmp = Op.getOperand(2); + ISD::CondCode CC = cast<CondCodeSDNode>(Cmp)->get(); + + EVT VT = Op.getValueType(); + EVT LHSVT = LHS.getValueType(); + EVT RHSVT = RHS.getValueType(); + + if (LHSVT == MVT::v2i16) { + assert(ISD::isSignedIntSetCC(CC) || ISD::isUnsignedIntSetCC(CC)); + unsigned ExtOpc = ISD::isSignedIntSetCC(CC) ? ISD::SIGN_EXTEND + : ISD::ZERO_EXTEND; + SDValue LX = DAG.getNode(ExtOpc, dl, MVT::v2i32, LHS); + SDValue RX = DAG.getNode(ExtOpc, dl, MVT::v2i32, RHS); + SDValue SC = DAG.getNode(ISD::SETCC, dl, MVT::v2i1, LX, RX, Cmp); + return SC; + } + + // Treat all other vector types as legal. + if (VT.isVector()) + return Op; + + // Equals and not equals should use sign-extend, not zero-extend, since + // we can represent small negative values in the compare instructions. + // The LLVM default is to use zero-extend arbitrarily in these cases. + if ((CC == ISD::SETEQ || CC == ISD::SETNE) && + (RHSVT == MVT::i8 || RHSVT == MVT::i16) && + (LHSVT == MVT::i8 || LHSVT == MVT::i16)) { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS); + if (C && C->getAPIntValue().isNegative()) { + LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, LHS); + RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, RHS); + return DAG.getNode(ISD::SETCC, dl, Op.getValueType(), + LHS, RHS, Op.getOperand(2)); + } + if (isSExtFree(LHS) || isSExtFree(RHS)) { + LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, LHS); + RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, RHS); + return DAG.getNode(ISD::SETCC, dl, Op.getValueType(), + LHS, RHS, Op.getOperand(2)); + } + } + return SDValue(); +} + +SDValue HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) + const { + SDValue PredOp = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1), Op2 = Op.getOperand(2); + EVT OpVT = Op1.getValueType(); + SDLoc DL(Op); + + if (OpVT == MVT::v2i16) { + SDValue X1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i32, Op1); + SDValue X2 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i32, Op2); + SDValue SL = DAG.getNode(ISD::VSELECT, DL, MVT::v2i32, PredOp, X1, X2); + SDValue TR = DAG.getNode(ISD::TRUNCATE, DL, MVT::v2i16, SL); + return TR; + } + + return SDValue(); +} + +// Handle only specific vector loads. +SDValue HexagonTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + SDLoc DL(Op); + LoadSDNode *LoadNode = cast<LoadSDNode>(Op); + SDValue Chain = LoadNode->getChain(); + SDValue Ptr = Op.getOperand(1); + SDValue LoweredLoad; + SDValue Result; + SDValue Base = LoadNode->getBasePtr(); + ISD::LoadExtType Ext = LoadNode->getExtensionType(); + unsigned Alignment = LoadNode->getAlignment(); + SDValue LoadChain; + + if(Ext == ISD::NON_EXTLOAD) + Ext = ISD::ZEXTLOAD; + + if (VT == MVT::v4i16) { + if (Alignment == 2) { + SDValue Loads[4]; + // Base load. + Loads[0] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Base, + LoadNode->getPointerInfo(), MVT::i16, + LoadNode->isVolatile(), + LoadNode->isNonTemporal(), + LoadNode->isInvariant(), + Alignment); + // Base+2 load. + SDValue Increment = DAG.getConstant(2, DL, MVT::i32); + Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment); + Loads[1] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr, + LoadNode->getPointerInfo(), MVT::i16, + LoadNode->isVolatile(), + LoadNode->isNonTemporal(), + LoadNode->isInvariant(), + Alignment); + // SHL 16, then OR base and base+2. + SDValue ShiftAmount = DAG.getConstant(16, DL, MVT::i32); + SDValue Tmp1 = DAG.getNode(ISD::SHL, DL, MVT::i32, Loads[1], ShiftAmount); + SDValue Tmp2 = DAG.getNode(ISD::OR, DL, MVT::i32, Tmp1, Loads[0]); + // Base + 4. + Increment = DAG.getConstant(4, DL, MVT::i32); + Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment); + Loads[2] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr, + LoadNode->getPointerInfo(), MVT::i16, + LoadNode->isVolatile(), + LoadNode->isNonTemporal(), + LoadNode->isInvariant(), + Alignment); + // Base + 6. + Increment = DAG.getConstant(6, DL, MVT::i32); + Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment); + Loads[3] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr, + LoadNode->getPointerInfo(), MVT::i16, + LoadNode->isVolatile(), + LoadNode->isNonTemporal(), + LoadNode->isInvariant(), + Alignment); + // SHL 16, then OR base+4 and base+6. + Tmp1 = DAG.getNode(ISD::SHL, DL, MVT::i32, Loads[3], ShiftAmount); + SDValue Tmp4 = DAG.getNode(ISD::OR, DL, MVT::i32, Tmp1, Loads[2]); + // Combine to i64. This could be optimised out later if we can + // affect reg allocation of this code. + Result = DAG.getNode(HexagonISD::COMBINE, DL, MVT::i64, Tmp4, Tmp2); + LoadChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + Loads[0].getValue(1), Loads[1].getValue(1), + Loads[2].getValue(1), Loads[3].getValue(1)); + } else { + // Perform default type expansion. + Result = DAG.getLoad(MVT::i64, DL, Chain, Ptr, LoadNode->getPointerInfo(), + LoadNode->isVolatile(), LoadNode->isNonTemporal(), + LoadNode->isInvariant(), LoadNode->getAlignment()); + LoadChain = Result.getValue(1); + } + } else + llvm_unreachable("Custom lowering unsupported load"); + + Result = DAG.getNode(ISD::BITCAST, DL, VT, Result); + // Since we pretend to lower a load, we need the original chain + // info attached to the result. + SDValue Ops[] = { Result, LoadChain }; + + return DAG.getMergeValues(Ops, DL); +} + + SDValue HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { EVT ValTy = Op.getValueType(); @@ -958,15 +1168,15 @@ HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { else Res = DAG.getTargetConstantPool(CP->getConstVal(), ValTy, CP->getAlignment()); - return DAG.getNode(HexagonISD::CONST32, dl, ValTy, Res); + return DAG.getNode(HexagonISD::CP, dl, ValTy, Res); } SDValue HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { - const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo(); + const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo(); MachineFunction &MF = DAG.getMachineFunction(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - MFI->setReturnAddressIsTaken(true); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + MFI.setReturnAddressIsTaken(true); if (verifyReturnAddressArgumentIsConstant(Op, DAG)) return SDValue(); @@ -976,29 +1186,28 @@ HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); if (Depth) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); - SDValue Offset = DAG.getConstant(4, MVT::i32); + SDValue Offset = DAG.getConstant(4, dl, MVT::i32); return DAG.getLoad(VT, dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), MachinePointerInfo(), false, false, false, 0); } // Return LR, which contains the return address. Mark it an implicit live-in. - unsigned Reg = MF.addLiveIn(TRI->getRARegister(), getRegClassFor(MVT::i32)); + unsigned Reg = MF.addLiveIn(HRI.getRARegister(), getRegClassFor(MVT::i32)); return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); } SDValue HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { - const HexagonRegisterInfo *TRI = static_cast<const HexagonRegisterInfo *>( - DAG.getSubtarget().getRegisterInfo()); - MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); - MFI->setFrameAddressIsTaken(true); + const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo(); + MachineFrameInfo &MFI = *DAG.getMachineFunction().getFrameInfo(); + MFI.setFrameAddressIsTaken(true); EVT VT = Op.getValueType(); SDLoc dl(Op); unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, - TRI->getFrameRegister(), VT); + HRI.getFrameRegister(), VT); while (Depth--) FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, MachinePointerInfo(), @@ -1021,15 +1230,29 @@ SDValue HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, SDLoc dl(Op); Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset); - const HexagonTargetObjectFile &TLOF = - static_cast<const HexagonTargetObjectFile &>(getObjFileLowering()); - if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) { + const HexagonTargetObjectFile *TLOF = + static_cast<const HexagonTargetObjectFile *>( + getTargetMachine().getObjFileLowering()); + if (TLOF->IsGlobalInSmallSection(GV, getTargetMachine())) { return DAG.getNode(HexagonISD::CONST32_GP, dl, getPointerTy(), Result); } return DAG.getNode(HexagonISD::CONST32, dl, getPointerTy(), Result); } +// Specifies that for loads and stores VT can be promoted to PromotedLdStVT. +void HexagonTargetLowering::promoteLdStType(EVT VT, EVT PromotedLdStVT) { + if (VT != PromotedLdStVT) { + setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote); + AddPromotedToType(ISD::LOAD, VT.getSimpleVT(), + PromotedLdStVT.getSimpleVT()); + + setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote); + AddPromotedToType(ISD::STORE, VT.getSimpleVT(), + PromotedLdStVT.getSimpleVT()); + } +} + SDValue HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); @@ -1042,465 +1265,971 @@ HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { // TargetLowering Implementation //===----------------------------------------------------------------------===// -HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &targetmachine) - : TargetLowering(targetmachine), - TM(targetmachine) { +HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, + const HexagonSubtarget &STI) + : TargetLowering(TM), HTM(static_cast<const HexagonTargetMachine&>(TM)), + Subtarget(STI) { + bool IsV4 = !Subtarget.hasV5TOps(); + auto &HRI = *Subtarget.getRegisterInfo(); + + setPrefLoopAlignment(4); + setPrefFunctionAlignment(4); + setMinFunctionAlignment(2); + setInsertFencesForAtomic(false); + setExceptionPointerRegister(Hexagon::R0); + setExceptionSelectorRegister(Hexagon::R1); + setStackPointerRegisterToSaveRestore(HRI.getStackRegister()); + + if (EnableHexSDNodeSched) + setSchedulingPreference(Sched::VLIW); + else + setSchedulingPreference(Sched::Source); + + // Limits for inline expansion of memcpy/memmove + MaxStoresPerMemcpy = MaxStoresPerMemcpyCL; + MaxStoresPerMemcpyOptSize = MaxStoresPerMemcpyOptSizeCL; + MaxStoresPerMemmove = MaxStoresPerMemmoveCL; + MaxStoresPerMemmoveOptSize = MaxStoresPerMemmoveOptSizeCL; + MaxStoresPerMemset = MaxStoresPerMemsetCL; + MaxStoresPerMemsetOptSize = MaxStoresPerMemsetOptSizeCL; - const HexagonSubtarget &Subtarget = TM.getSubtarget<HexagonSubtarget>(); + // + // Set up register classes. + // - // Set up the register classes. - addRegisterClass(MVT::i32, &Hexagon::IntRegsRegClass); - addRegisterClass(MVT::i64, &Hexagon::DoubleRegsRegClass); + addRegisterClass(MVT::i1, &Hexagon::PredRegsRegClass); + addRegisterClass(MVT::v2i1, &Hexagon::PredRegsRegClass); // bbbbaaaa + addRegisterClass(MVT::v4i1, &Hexagon::PredRegsRegClass); // ddccbbaa + addRegisterClass(MVT::v8i1, &Hexagon::PredRegsRegClass); // hgfedcba + addRegisterClass(MVT::i32, &Hexagon::IntRegsRegClass); + addRegisterClass(MVT::v4i8, &Hexagon::IntRegsRegClass); + addRegisterClass(MVT::v2i16, &Hexagon::IntRegsRegClass); + addRegisterClass(MVT::i64, &Hexagon::DoubleRegsRegClass); + addRegisterClass(MVT::v8i8, &Hexagon::DoubleRegsRegClass); + addRegisterClass(MVT::v4i16, &Hexagon::DoubleRegsRegClass); + addRegisterClass(MVT::v2i32, &Hexagon::DoubleRegsRegClass); if (Subtarget.hasV5TOps()) { addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass); addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass); } - addRegisterClass(MVT::i1, &Hexagon::PredRegsRegClass); + // + // Handling of scalar operations. + // + // All operations default to "legal", except: + // - indexed loads and stores (pre-/post-incremented), + // - ANY_EXTEND_VECTOR_INREG, ATOMIC_CMP_SWAP_WITH_SUCCESS, CONCAT_VECTORS, + // ConstantFP, DEBUGTRAP, FCEIL, FCOPYSIGN, FEXP, FEXP2, FFLOOR, FGETSIGN, + // FLOG, FLOG2, FLOG10, FMAXNUM, FMINNUM, FNEARBYINT, FRINT, FROUND, TRAP, + // FTRUNC, PREFETCH, SIGN_EXTEND_VECTOR_INREG, ZERO_EXTEND_VECTOR_INREG, + // which default to "expand" for at least one type. + + // Misc operations. + setOperationAction(ISD::ConstantFP, MVT::f32, Legal); // Default: expand + setOperationAction(ISD::ConstantFP, MVT::f64, Legal); // Default: expand + + setOperationAction(ISD::ConstantPool, MVT::i32, Custom); + setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + setOperationAction(ISD::INLINEASM, MVT::Other, Custom); + setOperationAction(ISD::EH_RETURN, MVT::Other, Custom); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + + // Custom legalize GlobalAddress nodes into CONST32. + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::GlobalAddress, MVT::i8, Custom); + setOperationAction(ISD::BlockAddress, MVT::i32, Custom); - computeRegisterProperties(); + // Hexagon needs to optimize cases with negative constants. + setOperationAction(ISD::SETCC, MVT::i8, Custom); + setOperationAction(ISD::SETCC, MVT::i16, Custom); - // Align loop entry - setPrefLoopAlignment(4); + // VASTART needs to be custom lowered to use the VarArgsFrameIndex. + setOperationAction(ISD::VASTART, MVT::Other, Custom); + setOperationAction(ISD::VAEND, MVT::Other, Expand); + setOperationAction(ISD::VAARG, MVT::Other, Expand); - // Limits for inline expansion of memcpy/memmove - MaxStoresPerMemcpy = 6; - MaxStoresPerMemmove = 6; + setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); - // - // Library calls for unsupported operations - // + if (EmitJumpTables) + setOperationAction(ISD::BR_JT, MVT::Other, Custom); + else + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + // Increase jump tables cutover to 5, was 4. + setMinimumJumpTableEntries(MinimumJumpTables); + + // Hexagon has instructions for add/sub with carry. The problem with + // modeling these instructions is that they produce 2 results: Rdd and Px. + // To model the update of Px, we will have to use Defs[p0..p3] which will + // cause any predicate live range to spill. So, we pretend we dont't have + // these instructions. + setOperationAction(ISD::ADDE, MVT::i8, Expand); + setOperationAction(ISD::ADDE, MVT::i16, Expand); + setOperationAction(ISD::ADDE, MVT::i32, Expand); + setOperationAction(ISD::ADDE, MVT::i64, Expand); + setOperationAction(ISD::SUBE, MVT::i8, Expand); + setOperationAction(ISD::SUBE, MVT::i16, Expand); + setOperationAction(ISD::SUBE, MVT::i32, Expand); + setOperationAction(ISD::SUBE, MVT::i64, Expand); + setOperationAction(ISD::ADDC, MVT::i8, Expand); + setOperationAction(ISD::ADDC, MVT::i16, Expand); + setOperationAction(ISD::ADDC, MVT::i32, Expand); + setOperationAction(ISD::ADDC, MVT::i64, Expand); + setOperationAction(ISD::SUBC, MVT::i8, Expand); + setOperationAction(ISD::SUBC, MVT::i16, Expand); + setOperationAction(ISD::SUBC, MVT::i32, Expand); + setOperationAction(ISD::SUBC, MVT::i64, Expand); - setLibcallName(RTLIB::SINTTOFP_I128_F64, "__hexagon_floattidf"); - setLibcallName(RTLIB::SINTTOFP_I128_F32, "__hexagon_floattisf"); + // Only add and sub that detect overflow are the saturating ones. + for (MVT VT : MVT::integer_valuetypes()) { + setOperationAction(ISD::UADDO, VT, Expand); + setOperationAction(ISD::SADDO, VT, Expand); + setOperationAction(ISD::USUBO, VT, Expand); + setOperationAction(ISD::SSUBO, VT, Expand); + } - setLibcallName(RTLIB::FPTOUINT_F32_I128, "__hexagon_fixunssfti"); - setLibcallName(RTLIB::FPTOUINT_F64_I128, "__hexagon_fixunsdfti"); + setOperationAction(ISD::CTLZ, MVT::i8, Promote); + setOperationAction(ISD::CTLZ, MVT::i16, Promote); + setOperationAction(ISD::CTTZ, MVT::i8, Promote); + setOperationAction(ISD::CTTZ, MVT::i16, Promote); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8, Promote); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Promote); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i8, Promote); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Promote); + + // In V5, popcount can count # of 1s in i64 but returns i32. + // On V4 it will be expanded (set later). + setOperationAction(ISD::CTPOP, MVT::i8, Promote); + setOperationAction(ISD::CTPOP, MVT::i16, Promote); + setOperationAction(ISD::CTPOP, MVT::i32, Promote); + setOperationAction(ISD::CTPOP, MVT::i64, Custom); + + // We custom lower i64 to i64 mul, so that it is not considered as a legal + // operation. There is a pattern that will match i64 mul and transform it + // to a series of instructions. + setOperationAction(ISD::MUL, MVT::i64, Expand); + setOperationAction(ISD::MULHS, MVT::i64, Expand); + + for (unsigned IntExpOp : + {ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, ISD::SDIVREM, ISD::UDIVREM, + ISD::ROTL, ISD::ROTR, ISD::BSWAP, ISD::SHL_PARTS, ISD::SRA_PARTS, + ISD::SRL_PARTS, ISD::SMUL_LOHI, ISD::UMUL_LOHI}) { + setOperationAction(IntExpOp, MVT::i32, Expand); + setOperationAction(IntExpOp, MVT::i64, Expand); + } - setLibcallName(RTLIB::FPTOSINT_F32_I128, "__hexagon_fixsfti"); - setLibcallName(RTLIB::FPTOSINT_F64_I128, "__hexagon_fixdfti"); + for (unsigned FPExpOp : + {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS, ISD::FSINCOS, + ISD::FPOW, ISD::FCOPYSIGN}) { + setOperationAction(FPExpOp, MVT::f32, Expand); + setOperationAction(FPExpOp, MVT::f64, Expand); + } - setLibcallName(RTLIB::SDIV_I32, "__hexagon_divsi3"); - setOperationAction(ISD::SDIV, MVT::i32, Expand); - setLibcallName(RTLIB::SREM_I32, "__hexagon_umodsi3"); - setOperationAction(ISD::SREM, MVT::i32, Expand); + // No extending loads from i32. + for (MVT VT : MVT::integer_valuetypes()) { + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i32, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::i32, Expand); + } + // Turn FP truncstore into trunc + store. + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + // Turn FP extload into load/fextend. + for (MVT VT : MVT::fp_valuetypes()) + setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); - setLibcallName(RTLIB::SDIV_I64, "__hexagon_divdi3"); - setOperationAction(ISD::SDIV, MVT::i64, Expand); - setLibcallName(RTLIB::SREM_I64, "__hexagon_moddi3"); - setOperationAction(ISD::SREM, MVT::i64, Expand); + // Expand BR_CC and SELECT_CC for all integer and fp types. + for (MVT VT : MVT::integer_valuetypes()) { + setOperationAction(ISD::BR_CC, VT, Expand); + setOperationAction(ISD::SELECT_CC, VT, Expand); + } + for (MVT VT : MVT::fp_valuetypes()) { + setOperationAction(ISD::BR_CC, VT, Expand); + setOperationAction(ISD::SELECT_CC, VT, Expand); + } + setOperationAction(ISD::BR_CC, MVT::Other, Expand); - setLibcallName(RTLIB::UDIV_I32, "__hexagon_udivsi3"); - setOperationAction(ISD::UDIV, MVT::i32, Expand); + // + // Handling of vector operations. + // - setLibcallName(RTLIB::UDIV_I64, "__hexagon_udivdi3"); - setOperationAction(ISD::UDIV, MVT::i64, Expand); + // Custom lower v4i16 load only. Let v4i16 store to be + // promoted for now. + promoteLdStType(MVT::v4i8, MVT::i32); + promoteLdStType(MVT::v2i16, MVT::i32); + promoteLdStType(MVT::v8i8, MVT::i64); + promoteLdStType(MVT::v2i32, MVT::i64); + + setOperationAction(ISD::LOAD, MVT::v4i16, Custom); + setOperationAction(ISD::STORE, MVT::v4i16, Promote); + AddPromotedToType(ISD::LOAD, MVT::v4i16, MVT::i64); + AddPromotedToType(ISD::STORE, MVT::v4i16, MVT::i64); + + // Set the action for vector operations to "expand", then override it with + // either "custom" or "legal" for specific cases. + static unsigned VectExpOps[] = { + // Integer arithmetic: + ISD::ADD, ISD::SUB, ISD::MUL, ISD::SDIV, ISD::UDIV, + ISD::SREM, ISD::UREM, ISD::SDIVREM, ISD::UDIVREM, ISD::ADDC, + ISD::SUBC, ISD::SADDO, ISD::UADDO, ISD::SSUBO, ISD::USUBO, + ISD::SMUL_LOHI, ISD::UMUL_LOHI, + // Logical/bit: + ISD::AND, ISD::OR, ISD::XOR, ISD::ROTL, ISD::ROTR, + ISD::CTPOP, ISD::CTLZ, ISD::CTTZ, ISD::CTLZ_ZERO_UNDEF, + ISD::CTTZ_ZERO_UNDEF, + // Floating point arithmetic/math functions: + ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FMA, ISD::FDIV, + ISD::FREM, ISD::FNEG, ISD::FABS, ISD::FSQRT, ISD::FSIN, + ISD::FCOS, ISD::FPOWI, ISD::FPOW, ISD::FLOG, ISD::FLOG2, + ISD::FLOG10, ISD::FEXP, ISD::FEXP2, ISD::FCEIL, ISD::FTRUNC, + ISD::FRINT, ISD::FNEARBYINT, ISD::FROUND, ISD::FFLOOR, + ISD::FMINNUM, ISD::FMAXNUM, ISD::FSINCOS, + // Misc: + ISD::SELECT, ISD::ConstantPool, + // Vector: + ISD::BUILD_VECTOR, ISD::SCALAR_TO_VECTOR, + ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT, + ISD::EXTRACT_SUBVECTOR, ISD::INSERT_SUBVECTOR, + ISD::CONCAT_VECTORS, ISD::VECTOR_SHUFFLE + }; - setLibcallName(RTLIB::UREM_I32, "__hexagon_umodsi3"); - setOperationAction(ISD::UREM, MVT::i32, Expand); + for (MVT VT : MVT::vector_valuetypes()) { + for (unsigned VectExpOp : VectExpOps) + setOperationAction(VectExpOp, VT, Expand); - setLibcallName(RTLIB::UREM_I64, "__hexagon_umoddi3"); - setOperationAction(ISD::UREM, MVT::i64, Expand); + // Expand all extended loads and truncating stores: + for (MVT TargetVT : MVT::vector_valuetypes()) { + setLoadExtAction(ISD::EXTLOAD, TargetVT, VT, Expand); + setTruncStoreAction(VT, TargetVT, Expand); + } - setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3"); - setOperationAction(ISD::FDIV, MVT::f32, Expand); + setOperationAction(ISD::SRA, VT, Custom); + setOperationAction(ISD::SHL, VT, Custom); + setOperationAction(ISD::SRL, VT, Custom); + } - setLibcallName(RTLIB::DIV_F64, "__hexagon_divdf3"); - setOperationAction(ISD::FDIV, MVT::f64, Expand); + // Types natively supported: + for (MVT NativeVT : {MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v32i1, MVT::v64i1, + MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16, MVT::v1i32, + MVT::v2i32, MVT::v1i64}) { + setOperationAction(ISD::BUILD_VECTOR, NativeVT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, NativeVT, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, NativeVT, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, NativeVT, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, NativeVT, Custom); + setOperationAction(ISD::CONCAT_VECTORS, NativeVT, Custom); + + setOperationAction(ISD::ADD, NativeVT, Legal); + setOperationAction(ISD::SUB, NativeVT, Legal); + setOperationAction(ISD::MUL, NativeVT, Legal); + setOperationAction(ISD::AND, NativeVT, Legal); + setOperationAction(ISD::OR, NativeVT, Legal); + setOperationAction(ISD::XOR, NativeVT, Legal); + } - setOperationAction(ISD::FSQRT, MVT::f32, Expand); - setOperationAction(ISD::FSQRT, MVT::f64, Expand); - setOperationAction(ISD::FSIN, MVT::f32, Expand); - setOperationAction(ISD::FSIN, MVT::f64, Expand); + setOperationAction(ISD::SETCC, MVT::v2i16, Custom); + setOperationAction(ISD::VSELECT, MVT::v2i16, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); + // Subtarget-specific operation actions. + // if (Subtarget.hasV5TOps()) { - // Hexagon V5 Support. - setOperationAction(ISD::FADD, MVT::f32, Legal); - setOperationAction(ISD::FADD, MVT::f64, Legal); - setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal); - setCondCodeAction(ISD::SETOEQ, MVT::f32, Legal); - setCondCodeAction(ISD::SETOEQ, MVT::f64, Legal); - setCondCodeAction(ISD::SETUEQ, MVT::f32, Legal); - setCondCodeAction(ISD::SETUEQ, MVT::f64, Legal); - - setCondCodeAction(ISD::SETOGE, MVT::f32, Legal); - setCondCodeAction(ISD::SETOGE, MVT::f64, Legal); - setCondCodeAction(ISD::SETUGE, MVT::f32, Legal); - setCondCodeAction(ISD::SETUGE, MVT::f64, Legal); - - setCondCodeAction(ISD::SETOGT, MVT::f32, Legal); - setCondCodeAction(ISD::SETOGT, MVT::f64, Legal); - setCondCodeAction(ISD::SETUGT, MVT::f32, Legal); - setCondCodeAction(ISD::SETUGT, MVT::f64, Legal); - - setCondCodeAction(ISD::SETOLE, MVT::f32, Legal); - setCondCodeAction(ISD::SETOLE, MVT::f64, Legal); - setCondCodeAction(ISD::SETOLT, MVT::f32, Legal); - setCondCodeAction(ISD::SETOLT, MVT::f64, Legal); - - setOperationAction(ISD::ConstantFP, MVT::f32, Legal); - setOperationAction(ISD::ConstantFP, MVT::f64, Legal); - - setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote); - setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote); - setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote); - setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote); - - setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote); - setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote); - setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote); - setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); + setOperationAction(ISD::FMA, MVT::f64, Expand); + setOperationAction(ISD::FADD, MVT::f64, Expand); + setOperationAction(ISD::FSUB, MVT::f64, Expand); + setOperationAction(ISD::FMUL, MVT::f64, Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote); + setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote); setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote); + setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote); + setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote); setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote); setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote); + setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote); + setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote); - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal); - setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal); - setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal); - setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal); + } else { // V4 + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); + setOperationAction(ISD::SINT_TO_FP, MVT::i64, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); + setOperationAction(ISD::FP_TO_SINT, MVT::f64, Expand); + setOperationAction(ISD::FP_TO_SINT, MVT::f32, Expand); + setOperationAction(ISD::FP_EXTEND, MVT::f32, Expand); + setOperationAction(ISD::FP_ROUND, MVT::f64, Expand); + setCondCodeAction(ISD::SETUNE, MVT::f64, Expand); - setOperationAction(ISD::FP_TO_UINT, MVT::i64, Legal); - setOperationAction(ISD::FP_TO_SINT, MVT::i64, Legal); - setOperationAction(ISD::UINT_TO_FP, MVT::i64, Legal); - setOperationAction(ISD::SINT_TO_FP, MVT::i64, Legal); + setOperationAction(ISD::CTPOP, MVT::i8, Expand); + setOperationAction(ISD::CTPOP, MVT::i16, Expand); + setOperationAction(ISD::CTPOP, MVT::i32, Expand); + setOperationAction(ISD::CTPOP, MVT::i64, Expand); - setOperationAction(ISD::FABS, MVT::f32, Legal); - setOperationAction(ISD::FABS, MVT::f64, Expand); + // Expand these operations for both f32 and f64: + for (unsigned FPExpOpV4 : + {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FABS, ISD::FNEG, ISD::FMA}) { + setOperationAction(FPExpOpV4, MVT::f32, Expand); + setOperationAction(FPExpOpV4, MVT::f64, Expand); + } - setOperationAction(ISD::FNEG, MVT::f32, Legal); - setOperationAction(ISD::FNEG, MVT::f64, Expand); - } else { + for (ISD::CondCode FPExpCCV4 : + {ISD::SETOEQ, ISD::SETOGT, ISD::SETOLT, ISD::SETOGE, ISD::SETOLE, + ISD::SETUO, ISD::SETO}) { + setCondCodeAction(FPExpCCV4, MVT::f32, Expand); + setCondCodeAction(FPExpCCV4, MVT::f64, Expand); + } + } - // Expand fp<->uint. - setOperationAction(ISD::FP_TO_SINT, MVT::i32, Expand); - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); + // Handling of indexed loads/stores: default is "expand". + // + for (MVT LSXTy : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) { + setIndexedLoadAction(ISD::POST_INC, LSXTy, Legal); + setIndexedStoreAction(ISD::POST_INC, LSXTy, Legal); + } - setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); - setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); + computeRegisterProperties(&HRI); - setLibcallName(RTLIB::SINTTOFP_I64_F32, "__hexagon_floatdisf"); - setLibcallName(RTLIB::UINTTOFP_I64_F32, "__hexagon_floatundisf"); + // + // Library calls for unsupported operations + // + bool FastMath = EnableFastMath; - setLibcallName(RTLIB::UINTTOFP_I32_F32, "__hexagon_floatunsisf"); - setLibcallName(RTLIB::SINTTOFP_I32_F32, "__hexagon_floatsisf"); + setLibcallName(RTLIB::SDIV_I32, "__hexagon_divsi3"); + setLibcallName(RTLIB::SDIV_I64, "__hexagon_divdi3"); + setLibcallName(RTLIB::UDIV_I32, "__hexagon_udivsi3"); + setLibcallName(RTLIB::UDIV_I64, "__hexagon_udivdi3"); + setLibcallName(RTLIB::SREM_I32, "__hexagon_modsi3"); + setLibcallName(RTLIB::SREM_I64, "__hexagon_moddi3"); + setLibcallName(RTLIB::UREM_I32, "__hexagon_umodsi3"); + setLibcallName(RTLIB::UREM_I64, "__hexagon_umoddi3"); - setLibcallName(RTLIB::SINTTOFP_I64_F64, "__hexagon_floatdidf"); - setLibcallName(RTLIB::UINTTOFP_I64_F64, "__hexagon_floatundidf"); + setLibcallName(RTLIB::SINTTOFP_I128_F64, "__hexagon_floattidf"); + setLibcallName(RTLIB::SINTTOFP_I128_F32, "__hexagon_floattisf"); + setLibcallName(RTLIB::FPTOUINT_F32_I128, "__hexagon_fixunssfti"); + setLibcallName(RTLIB::FPTOUINT_F64_I128, "__hexagon_fixunsdfti"); + setLibcallName(RTLIB::FPTOSINT_F32_I128, "__hexagon_fixsfti"); + setLibcallName(RTLIB::FPTOSINT_F64_I128, "__hexagon_fixdfti"); - setLibcallName(RTLIB::UINTTOFP_I32_F64, "__hexagon_floatunsidf"); - setLibcallName(RTLIB::SINTTOFP_I32_F64, "__hexagon_floatsidf"); + if (IsV4) { + // Handle single-precision floating point operations on V4. + if (FastMath) { + setLibcallName(RTLIB::ADD_F32, "__hexagon_fast_addsf3"); + setLibcallName(RTLIB::SUB_F32, "__hexagon_fast_subsf3"); + setLibcallName(RTLIB::MUL_F32, "__hexagon_fast_mulsf3"); + setLibcallName(RTLIB::OGT_F32, "__hexagon_fast_gtsf2"); + setLibcallName(RTLIB::OLT_F32, "__hexagon_fast_ltsf2"); + // Double-precision compares. + setLibcallName(RTLIB::OGT_F64, "__hexagon_fast_gtdf2"); + setLibcallName(RTLIB::OLT_F64, "__hexagon_fast_ltdf2"); + } else { + setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3"); + setLibcallName(RTLIB::SUB_F32, "__hexagon_subsf3"); + setLibcallName(RTLIB::MUL_F32, "__hexagon_mulsf3"); + setLibcallName(RTLIB::OGT_F32, "__hexagon_gtsf2"); + setLibcallName(RTLIB::OLT_F32, "__hexagon_ltsf2"); + // Double-precision compares. + setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2"); + setLibcallName(RTLIB::OLT_F64, "__hexagon_ltdf2"); + } + } + // This is the only fast library function for sqrtd. + if (FastMath) + setLibcallName(RTLIB::SQRT_F64, "__hexagon_fast2_sqrtdf2"); + + // Prefix is: nothing for "slow-math", + // "fast2_" for V4 fast-math and V5+ fast-math double-precision + // (actually, keep fast-math and fast-math2 separate for now) + if (FastMath) { + setLibcallName(RTLIB::ADD_F64, "__hexagon_fast_adddf3"); + setLibcallName(RTLIB::SUB_F64, "__hexagon_fast_subdf3"); + setLibcallName(RTLIB::MUL_F64, "__hexagon_fast_muldf3"); + setLibcallName(RTLIB::DIV_F64, "__hexagon_fast_divdf3"); + // Calling __hexagon_fast2_divsf3 with fast-math on V5 (ok). + setLibcallName(RTLIB::DIV_F32, "__hexagon_fast_divsf3"); + } else { + setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3"); + setLibcallName(RTLIB::SUB_F64, "__hexagon_subdf3"); + setLibcallName(RTLIB::MUL_F64, "__hexagon_muldf3"); + setLibcallName(RTLIB::DIV_F64, "__hexagon_divdf3"); + setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3"); + } + + if (Subtarget.hasV5TOps()) { + if (FastMath) + setLibcallName(RTLIB::SQRT_F32, "__hexagon_fast2_sqrtf"); + else + setLibcallName(RTLIB::SQRT_F32, "__hexagon_sqrtf"); + } else { + // V4 + setLibcallName(RTLIB::SINTTOFP_I32_F32, "__hexagon_floatsisf"); + setLibcallName(RTLIB::SINTTOFP_I32_F64, "__hexagon_floatsidf"); + setLibcallName(RTLIB::SINTTOFP_I64_F32, "__hexagon_floatdisf"); + setLibcallName(RTLIB::SINTTOFP_I64_F64, "__hexagon_floatdidf"); + setLibcallName(RTLIB::UINTTOFP_I32_F32, "__hexagon_floatunsisf"); + setLibcallName(RTLIB::UINTTOFP_I32_F64, "__hexagon_floatunsidf"); + setLibcallName(RTLIB::UINTTOFP_I64_F32, "__hexagon_floatundisf"); + setLibcallName(RTLIB::UINTTOFP_I64_F64, "__hexagon_floatundidf"); setLibcallName(RTLIB::FPTOUINT_F32_I32, "__hexagon_fixunssfsi"); setLibcallName(RTLIB::FPTOUINT_F32_I64, "__hexagon_fixunssfdi"); - - setLibcallName(RTLIB::FPTOSINT_F64_I64, "__hexagon_fixdfdi"); - setLibcallName(RTLIB::FPTOSINT_F32_I64, "__hexagon_fixsfdi"); - setLibcallName(RTLIB::FPTOUINT_F64_I32, "__hexagon_fixunsdfsi"); setLibcallName(RTLIB::FPTOUINT_F64_I64, "__hexagon_fixunsdfdi"); - - setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3"); - setOperationAction(ISD::FADD, MVT::f64, Expand); - - setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3"); - setOperationAction(ISD::FADD, MVT::f32, Expand); - - setLibcallName(RTLIB::FPEXT_F32_F64, "__hexagon_extendsfdf2"); - setOperationAction(ISD::FP_EXTEND, MVT::f32, Expand); - + setLibcallName(RTLIB::FPTOSINT_F32_I32, "__hexagon_fixsfsi"); + setLibcallName(RTLIB::FPTOSINT_F32_I64, "__hexagon_fixsfdi"); + setLibcallName(RTLIB::FPTOSINT_F64_I32, "__hexagon_fixdfsi"); + setLibcallName(RTLIB::FPTOSINT_F64_I64, "__hexagon_fixdfdi"); + setLibcallName(RTLIB::FPEXT_F32_F64, "__hexagon_extendsfdf2"); + setLibcallName(RTLIB::FPROUND_F64_F32, "__hexagon_truncdfsf2"); setLibcallName(RTLIB::OEQ_F32, "__hexagon_eqsf2"); - setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand); - setLibcallName(RTLIB::OEQ_F64, "__hexagon_eqdf2"); - setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand); - setLibcallName(RTLIB::OGE_F32, "__hexagon_gesf2"); - setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); - setLibcallName(RTLIB::OGE_F64, "__hexagon_gedf2"); - setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); - - setLibcallName(RTLIB::OGT_F32, "__hexagon_gtsf2"); - setCondCodeAction(ISD::SETOGT, MVT::f32, Expand); - - setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2"); - setCondCodeAction(ISD::SETOGT, MVT::f64, Expand); + setLibcallName(RTLIB::OLE_F32, "__hexagon_lesf2"); + setLibcallName(RTLIB::OLE_F64, "__hexagon_ledf2"); + setLibcallName(RTLIB::UNE_F32, "__hexagon_nesf2"); + setLibcallName(RTLIB::UNE_F64, "__hexagon_nedf2"); + setLibcallName(RTLIB::UO_F32, "__hexagon_unordsf2"); + setLibcallName(RTLIB::UO_F64, "__hexagon_unorddf2"); + setLibcallName(RTLIB::O_F32, "__hexagon_unordsf2"); + setLibcallName(RTLIB::O_F64, "__hexagon_unorddf2"); + } - setLibcallName(RTLIB::FPTOSINT_F64_I32, "__hexagon_fixdfsi"); - setOperationAction(ISD::FP_TO_SINT, MVT::f64, Expand); + // These cause problems when the shift amount is non-constant. + setLibcallName(RTLIB::SHL_I128, nullptr); + setLibcallName(RTLIB::SRL_I128, nullptr); + setLibcallName(RTLIB::SRA_I128, nullptr); +} - setLibcallName(RTLIB::FPTOSINT_F32_I32, "__hexagon_fixsfsi"); - setOperationAction(ISD::FP_TO_SINT, MVT::f32, Expand); - setLibcallName(RTLIB::OLE_F64, "__hexagon_ledf2"); - setCondCodeAction(ISD::SETOLE, MVT::f64, Expand); +const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { + switch ((HexagonISD::NodeType)Opcode) { + case HexagonISD::ALLOCA: return "HexagonISD::ALLOCA"; + case HexagonISD::ARGEXTEND: return "HexagonISD::ARGEXTEND"; + case HexagonISD::AT_GOT: return "HexagonISD::AT_GOT"; + case HexagonISD::AT_PCREL: return "HexagonISD::AT_PCREL"; + case HexagonISD::BARRIER: return "HexagonISD::BARRIER"; + case HexagonISD::BR_JT: return "HexagonISD::BR_JT"; + case HexagonISD::CALLR: return "HexagonISD::CALLR"; + case HexagonISD::CALLv3nr: return "HexagonISD::CALLv3nr"; + case HexagonISD::CALLv3: return "HexagonISD::CALLv3"; + case HexagonISD::COMBINE: return "HexagonISD::COMBINE"; + case HexagonISD::CONST32_GP: return "HexagonISD::CONST32_GP"; + case HexagonISD::CONST32: return "HexagonISD::CONST32"; + case HexagonISD::CP: return "HexagonISD::CP"; + case HexagonISD::DCFETCH: return "HexagonISD::DCFETCH"; + case HexagonISD::EH_RETURN: return "HexagonISD::EH_RETURN"; + case HexagonISD::EXTRACTU: return "HexagonISD::EXTRACTU"; + case HexagonISD::EXTRACTURP: return "HexagonISD::EXTRACTURP"; + case HexagonISD::FCONST32: return "HexagonISD::FCONST32"; + case HexagonISD::INSERT: return "HexagonISD::INSERT"; + case HexagonISD::INSERTRP: return "HexagonISD::INSERTRP"; + case HexagonISD::JT: return "HexagonISD::JT"; + case HexagonISD::PACKHL: return "HexagonISD::PACKHL"; + case HexagonISD::PIC_ADD: return "HexagonISD::PIC_ADD"; + case HexagonISD::POPCOUNT: return "HexagonISD::POPCOUNT"; + case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG"; + case HexagonISD::SHUFFEB: return "HexagonISD::SHUFFEB"; + case HexagonISD::SHUFFEH: return "HexagonISD::SHUFFEH"; + case HexagonISD::SHUFFOB: return "HexagonISD::SHUFFOB"; + case HexagonISD::SHUFFOH: return "HexagonISD::SHUFFOH"; + case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN"; + case HexagonISD::VCMPBEQ: return "HexagonISD::VCMPBEQ"; + case HexagonISD::VCMPBGT: return "HexagonISD::VCMPBGT"; + case HexagonISD::VCMPBGTU: return "HexagonISD::VCMPBGTU"; + case HexagonISD::VCMPHEQ: return "HexagonISD::VCMPHEQ"; + case HexagonISD::VCMPHGT: return "HexagonISD::VCMPHGT"; + case HexagonISD::VCMPHGTU: return "HexagonISD::VCMPHGTU"; + case HexagonISD::VCMPWEQ: return "HexagonISD::VCMPWEQ"; + case HexagonISD::VCMPWGT: return "HexagonISD::VCMPWGT"; + case HexagonISD::VCMPWGTU: return "HexagonISD::VCMPWGTU"; + case HexagonISD::VSHLH: return "HexagonISD::VSHLH"; + case HexagonISD::VSHLW: return "HexagonISD::VSHLW"; + case HexagonISD::VSPLATB: return "HexagonISD::VSPLTB"; + case HexagonISD::VSPLATH: return "HexagonISD::VSPLATH"; + case HexagonISD::VSRAH: return "HexagonISD::VSRAH"; + case HexagonISD::VSRAW: return "HexagonISD::VSRAW"; + case HexagonISD::VSRLH: return "HexagonISD::VSRLH"; + case HexagonISD::VSRLW: return "HexagonISD::VSRLW"; + case HexagonISD::VSXTBH: return "HexagonISD::VSXTBH"; + case HexagonISD::VSXTBW: return "HexagonISD::VSXTBW"; + case HexagonISD::OP_END: break; + } + return nullptr; +} - setLibcallName(RTLIB::OLE_F32, "__hexagon_lesf2"); - setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); +bool HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { + EVT MTy1 = EVT::getEVT(Ty1); + EVT MTy2 = EVT::getEVT(Ty2); + if (!MTy1.isSimple() || !MTy2.isSimple()) + return false; + return (MTy1.getSimpleVT() == MVT::i64) && (MTy2.getSimpleVT() == MVT::i32); +} - setLibcallName(RTLIB::OLT_F64, "__hexagon_ltdf2"); - setCondCodeAction(ISD::SETOLT, MVT::f64, Expand); +bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { + if (!VT1.isSimple() || !VT2.isSimple()) + return false; + return (VT1.getSimpleVT() == MVT::i64) && (VT2.getSimpleVT() == MVT::i32); +} - setLibcallName(RTLIB::OLT_F32, "__hexagon_ltsf2"); - setCondCodeAction(ISD::SETOLT, MVT::f32, Expand); +// shouldExpandBuildVectorWithShuffles +// Should we expand the build vector with shuffles? +bool +HexagonTargetLowering::shouldExpandBuildVectorWithShuffles(EVT VT, + unsigned DefinedValues) const { - setLibcallName(RTLIB::MUL_F64, "__hexagon_muldf3"); - setOperationAction(ISD::FMUL, MVT::f64, Expand); + // Hexagon vector shuffle operates on element sizes of bytes or halfwords + EVT EltVT = VT.getVectorElementType(); + int EltBits = EltVT.getSizeInBits(); + if ((EltBits != 8) && (EltBits != 16)) + return false; - setLibcallName(RTLIB::MUL_F32, "__hexagon_mulsf3"); - setOperationAction(ISD::MUL, MVT::f32, Expand); + return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues); +} - setLibcallName(RTLIB::UNE_F64, "__hexagon_nedf2"); - setCondCodeAction(ISD::SETUNE, MVT::f64, Expand); +// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3). V1 and +// V2 are the two vectors to select data from, V3 is the permutation. +static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { + const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op); + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + SDLoc dl(Op); + EVT VT = Op.getValueType(); - setLibcallName(RTLIB::UNE_F32, "__hexagon_nesf2"); + if (V2.getOpcode() == ISD::UNDEF) + V2 = V1; + + if (SVN->isSplat()) { + int Lane = SVN->getSplatIndex(); + if (Lane == -1) Lane = 0; + + // Test if V1 is a SCALAR_TO_VECTOR. + if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) + return createSplat(DAG, dl, VT, V1.getOperand(0)); + + // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR + // (and probably will turn into a SCALAR_TO_VECTOR once legalization + // reaches it). + if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR && + !isa<ConstantSDNode>(V1.getOperand(0))) { + bool IsScalarToVector = true; + for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i) + if (V1.getOperand(i).getOpcode() != ISD::UNDEF) { + IsScalarToVector = false; + break; + } + if (IsScalarToVector) + return createSplat(DAG, dl, VT, V1.getOperand(0)); + } + return createSplat(DAG, dl, VT, DAG.getConstant(Lane, dl, MVT::i32)); + } - setLibcallName(RTLIB::SUB_F64, "__hexagon_subdf3"); - setOperationAction(ISD::SUB, MVT::f64, Expand); + // FIXME: We need to support more general vector shuffles. See + // below the comment from the ARM backend that deals in the general + // case with the vector shuffles. For now, let expand handle these. + return SDValue(); - setLibcallName(RTLIB::SUB_F32, "__hexagon_subsf3"); - setOperationAction(ISD::SUB, MVT::f32, Expand); + // If the shuffle is not directly supported and it has 4 elements, use + // the PerfectShuffle-generated table to synthesize it from other shuffles. +} - setLibcallName(RTLIB::FPROUND_F64_F32, "__hexagon_truncdfsf2"); - setOperationAction(ISD::FP_ROUND, MVT::f64, Expand); +// If BUILD_VECTOR has same base element repeated several times, +// report true. +static bool isCommonSplatElement(BuildVectorSDNode *BVN) { + unsigned NElts = BVN->getNumOperands(); + SDValue V0 = BVN->getOperand(0); - setLibcallName(RTLIB::UO_F64, "__hexagon_unorddf2"); - setCondCodeAction(ISD::SETUO, MVT::f64, Expand); + for (unsigned i = 1, e = NElts; i != e; ++i) { + if (BVN->getOperand(i) != V0) + return false; + } + return true; +} - setLibcallName(RTLIB::O_F64, "__hexagon_unorddf2"); - setCondCodeAction(ISD::SETO, MVT::f64, Expand); +// LowerVECTOR_SHIFT - Lower a vector shift. Try to convert +// <VT> = SHL/SRA/SRL <VT> by <VT> to Hexagon specific +// <VT> = SHL/SRA/SRL <VT> by <IT/i32>. +static SDValue LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) { + BuildVectorSDNode *BVN = 0; + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + SDValue V3; + SDLoc dl(Op); + EVT VT = Op.getValueType(); - setLibcallName(RTLIB::O_F32, "__hexagon_unordsf2"); - setCondCodeAction(ISD::SETO, MVT::f32, Expand); + if ((BVN = dyn_cast<BuildVectorSDNode>(V1.getNode())) && + isCommonSplatElement(BVN)) + V3 = V2; + else if ((BVN = dyn_cast<BuildVectorSDNode>(V2.getNode())) && + isCommonSplatElement(BVN)) + V3 = V1; + else + return SDValue(); - setLibcallName(RTLIB::UO_F32, "__hexagon_unordsf2"); - setCondCodeAction(ISD::SETUO, MVT::f32, Expand); + SDValue CommonSplat = BVN->getOperand(0); + SDValue Result; - setOperationAction(ISD::FABS, MVT::f32, Expand); - setOperationAction(ISD::FABS, MVT::f64, Expand); - setOperationAction(ISD::FNEG, MVT::f32, Expand); - setOperationAction(ISD::FNEG, MVT::f64, Expand); + if (VT.getSimpleVT() == MVT::v4i16) { + switch (Op.getOpcode()) { + case ISD::SRA: + Result = DAG.getNode(HexagonISD::VSRAH, dl, VT, V3, CommonSplat); + break; + case ISD::SHL: + Result = DAG.getNode(HexagonISD::VSHLH, dl, VT, V3, CommonSplat); + break; + case ISD::SRL: + Result = DAG.getNode(HexagonISD::VSRLH, dl, VT, V3, CommonSplat); + break; + default: + return SDValue(); + } + } else if (VT.getSimpleVT() == MVT::v2i32) { + switch (Op.getOpcode()) { + case ISD::SRA: + Result = DAG.getNode(HexagonISD::VSRAW, dl, VT, V3, CommonSplat); + break; + case ISD::SHL: + Result = DAG.getNode(HexagonISD::VSHLW, dl, VT, V3, CommonSplat); + break; + case ISD::SRL: + Result = DAG.getNode(HexagonISD::VSRLW, dl, VT, V3, CommonSplat); + break; + default: + return SDValue(); + } + } else { + return SDValue(); } - setLibcallName(RTLIB::SREM_I32, "__hexagon_modsi3"); - setOperationAction(ISD::SREM, MVT::i32, Expand); + return DAG.getNode(ISD::BITCAST, dl, VT, Result); +} - setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal); - setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal); - setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal); - setIndexedLoadAction(ISD::POST_INC, MVT::i64, Legal); +SDValue +HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { + BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode()); + SDLoc dl(Op); + EVT VT = Op.getValueType(); - setIndexedStoreAction(ISD::POST_INC, MVT::i8, Legal); - setIndexedStoreAction(ISD::POST_INC, MVT::i16, Legal); - setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal); - setIndexedStoreAction(ISD::POST_INC, MVT::i64, Legal); + unsigned Size = VT.getSizeInBits(); - setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); + // A vector larger than 64 bits cannot be represented in Hexagon. + // Expand will split the vector. + if (Size > 64) + return SDValue(); - // Turn FP extload into load/fextend. - for (MVT VT : MVT::fp_valuetypes()) - setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); - // Hexagon has a i1 sign extending load. - for (MVT VT : MVT::integer_valuetypes()) - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand); - // Turn FP truncstore into trunc + store. - setTruncStoreAction(MVT::f64, MVT::f32, Expand); + APInt APSplatBits, APSplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + unsigned NElts = BVN->getNumOperands(); + + // Try to generate a SPLAT instruction. + if ((VT.getSimpleVT() == MVT::v4i8 || VT.getSimpleVT() == MVT::v4i16) && + (BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, + HasAnyUndefs, 0, true) && SplatBitSize <= 16)) { + unsigned SplatBits = APSplatBits.getZExtValue(); + int32_t SextVal = ((int32_t) (SplatBits << (32 - SplatBitSize)) >> + (32 - SplatBitSize)); + return createSplat(DAG, dl, VT, DAG.getConstant(SextVal, dl, MVT::i32)); + } - // Custom legalize GlobalAddress nodes into CONST32. - setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); - setOperationAction(ISD::GlobalAddress, MVT::i8, Custom); - setOperationAction(ISD::BlockAddress, MVT::i32, Custom); - // Truncate action? - setOperationAction(ISD::TRUNCATE, MVT::i64, Expand); + // Try to generate COMBINE to build v2i32 vectors. + if (VT.getSimpleVT() == MVT::v2i32) { + SDValue V0 = BVN->getOperand(0); + SDValue V1 = BVN->getOperand(1); + + if (V0.getOpcode() == ISD::UNDEF) + V0 = DAG.getConstant(0, dl, MVT::i32); + if (V1.getOpcode() == ISD::UNDEF) + V1 = DAG.getConstant(0, dl, MVT::i32); + + ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(V0); + ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(V1); + // If the element isn't a constant, it is in a register: + // generate a COMBINE Register Register instruction. + if (!C0 || !C1) + return DAG.getNode(HexagonISD::COMBINE, dl, VT, V1, V0); + + // If one of the operands is an 8 bit integer constant, generate + // a COMBINE Immediate Immediate instruction. + if (isInt<8>(C0->getSExtValue()) || + isInt<8>(C1->getSExtValue())) + return DAG.getNode(HexagonISD::COMBINE, dl, VT, V1, V0); + } - // Hexagon doesn't have sext_inreg, replace them with shl/sra. - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + // Try to generate a S2_packhl to build v2i16 vectors. + if (VT.getSimpleVT() == MVT::v2i16) { + for (unsigned i = 0, e = NElts; i != e; ++i) { + if (BVN->getOperand(i).getOpcode() == ISD::UNDEF) + continue; + ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(BVN->getOperand(i)); + // If the element isn't a constant, it is in a register: + // generate a S2_packhl instruction. + if (!Cst) { + SDValue pack = DAG.getNode(HexagonISD::PACKHL, dl, MVT::v4i16, + BVN->getOperand(1), BVN->getOperand(0)); + + return DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, MVT::v2i16, + pack); + } + } + } - // Hexagon has no REM or DIVREM operations. - setOperationAction(ISD::UREM, MVT::i32, Expand); - setOperationAction(ISD::SREM, MVT::i32, Expand); - setOperationAction(ISD::SDIVREM, MVT::i32, Expand); - setOperationAction(ISD::UDIVREM, MVT::i32, Expand); - setOperationAction(ISD::SREM, MVT::i64, Expand); - setOperationAction(ISD::SDIVREM, MVT::i64, Expand); - setOperationAction(ISD::UDIVREM, MVT::i64, Expand); + // In the general case, generate a CONST32 or a CONST64 for constant vectors, + // and insert_vector_elt for all the other cases. + uint64_t Res = 0; + unsigned EltSize = Size / NElts; + SDValue ConstVal; + uint64_t Mask = ~uint64_t(0ULL) >> (64 - EltSize); + bool HasNonConstantElements = false; + + for (unsigned i = 0, e = NElts; i != e; ++i) { + // LLVM's BUILD_VECTOR operands are in Little Endian mode, whereas Hexagon's + // combine, const64, etc. are Big Endian. + unsigned OpIdx = NElts - i - 1; + SDValue Operand = BVN->getOperand(OpIdx); + if (Operand.getOpcode() == ISD::UNDEF) + continue; - setOperationAction(ISD::BSWAP, MVT::i64, Expand); + int64_t Val = 0; + if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Operand)) + Val = Cst->getSExtValue(); + else + HasNonConstantElements = true; - // Lower SELECT_CC to SETCC and SELECT. - setOperationAction(ISD::SELECT_CC, MVT::i1, Expand); - setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); - setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); + Val &= Mask; + Res = (Res << EltSize) | Val; + } - if (Subtarget.hasV5TOps()) { + if (Size == 64) + ConstVal = DAG.getConstant(Res, dl, MVT::i64); + else + ConstVal = DAG.getConstant(Res, dl, MVT::i32); + + // When there are non constant operands, add them with INSERT_VECTOR_ELT to + // ConstVal, the constant part of the vector. + if (HasNonConstantElements) { + EVT EltVT = VT.getVectorElementType(); + SDValue Width = DAG.getConstant(EltVT.getSizeInBits(), dl, MVT::i64); + SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width, + DAG.getConstant(32, dl, MVT::i64)); + + for (unsigned i = 0, e = NElts; i != e; ++i) { + // LLVM's BUILD_VECTOR operands are in Little Endian mode, whereas Hexagon + // is Big Endian. + unsigned OpIdx = NElts - i - 1; + SDValue Operand = BVN->getOperand(OpIdx); + if (isa<ConstantSDNode>(Operand)) + // This operand is already in ConstVal. + continue; + + if (VT.getSizeInBits() == 64 && + Operand.getValueType().getSizeInBits() == 32) { + SDValue C = DAG.getConstant(0, dl, MVT::i32); + Operand = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Operand); + } - // We need to make the operation type of SELECT node to be Custom, - // such that we don't go into the infinite loop of - // select -> setcc -> select_cc -> select loop. - setOperationAction(ISD::SELECT, MVT::f32, Custom); - setOperationAction(ISD::SELECT, MVT::f64, Custom); + SDValue Idx = DAG.getConstant(OpIdx, dl, MVT::i64); + SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i64, Idx, Width); + SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset); + const SDValue Ops[] = {ConstVal, Operand, Combined}; - setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); - setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); + if (VT.getSizeInBits() == 32) + ConstVal = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, Ops); + else + ConstVal = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, Ops); + } + } - } else { + return DAG.getNode(ISD::BITCAST, dl, VT, ConstVal); +} - // Hexagon has no select or setcc: expand to SELECT_CC. - setOperationAction(ISD::SELECT, MVT::f32, Expand); - setOperationAction(ISD::SELECT, MVT::f64, Expand); +SDValue +HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + EVT VT = Op.getValueType(); + unsigned NElts = Op.getNumOperands(); + SDValue Vec = Op.getOperand(0); + EVT VecVT = Vec.getValueType(); + SDValue Width = DAG.getConstant(VecVT.getSizeInBits(), dl, MVT::i64); + SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width, + DAG.getConstant(32, dl, MVT::i64)); + SDValue ConstVal = DAG.getConstant(0, dl, MVT::i64); + + ConstantSDNode *W = dyn_cast<ConstantSDNode>(Width); + ConstantSDNode *S = dyn_cast<ConstantSDNode>(Shifted); + + if ((VecVT.getSimpleVT() == MVT::v2i16) && (NElts == 2) && W && S) { + if ((W->getZExtValue() == 32) && ((S->getZExtValue() >> 32) == 32)) { + // We are trying to concat two v2i16 to a single v4i16. + SDValue Vec0 = Op.getOperand(1); + SDValue Combined = DAG.getNode(HexagonISD::COMBINE, dl, VT, Vec0, Vec); + return DAG.getNode(ISD::BITCAST, dl, VT, Combined); + } } - if (EmitJumpTables) { - setOperationAction(ISD::BR_JT, MVT::Other, Custom); - } else { - setOperationAction(ISD::BR_JT, MVT::Other, Expand); + if ((VecVT.getSimpleVT() == MVT::v4i8) && (NElts == 2) && W && S) { + if ((W->getZExtValue() == 32) && ((S->getZExtValue() >> 32) == 32)) { + // We are trying to concat two v4i8 to a single v8i8. + SDValue Vec0 = Op.getOperand(1); + SDValue Combined = DAG.getNode(HexagonISD::COMBINE, dl, VT, Vec0, Vec); + return DAG.getNode(ISD::BITCAST, dl, VT, Combined); + } } - // Increase jump tables cutover to 5, was 4. - setMinimumJumpTableEntries(5); - - setOperationAction(ISD::BR_CC, MVT::f32, Expand); - setOperationAction(ISD::BR_CC, MVT::f64, Expand); - setOperationAction(ISD::BR_CC, MVT::i1, Expand); - setOperationAction(ISD::BR_CC, MVT::i32, Expand); - setOperationAction(ISD::BR_CC, MVT::i64, Expand); - setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + for (unsigned i = 0, e = NElts; i != e; ++i) { + unsigned OpIdx = NElts - i - 1; + SDValue Operand = Op.getOperand(OpIdx); - setOperationAction(ISD::FSIN, MVT::f64, Expand); - setOperationAction(ISD::FCOS, MVT::f64, Expand); - setOperationAction(ISD::FREM, MVT::f64, Expand); - setOperationAction(ISD::FSIN, MVT::f32, Expand); - setOperationAction(ISD::FCOS, MVT::f32, Expand); - setOperationAction(ISD::FREM, MVT::f32, Expand); - setOperationAction(ISD::FSINCOS, MVT::f64, Expand); - setOperationAction(ISD::FSINCOS, MVT::f32, Expand); - - // In V4, we have double word add/sub with carry. The problem with - // modelling this instruction is that it produces 2 results - Rdd and Px. - // To model update of Px, we will have to use Defs[p0..p3] which will - // cause any predicate live range to spill. So, we pretend we dont't - // have these instructions. - setOperationAction(ISD::ADDE, MVT::i8, Expand); - setOperationAction(ISD::ADDE, MVT::i16, Expand); - setOperationAction(ISD::ADDE, MVT::i32, Expand); - setOperationAction(ISD::ADDE, MVT::i64, Expand); - setOperationAction(ISD::SUBE, MVT::i8, Expand); - setOperationAction(ISD::SUBE, MVT::i16, Expand); - setOperationAction(ISD::SUBE, MVT::i32, Expand); - setOperationAction(ISD::SUBE, MVT::i64, Expand); - setOperationAction(ISD::ADDC, MVT::i8, Expand); - setOperationAction(ISD::ADDC, MVT::i16, Expand); - setOperationAction(ISD::ADDC, MVT::i32, Expand); - setOperationAction(ISD::ADDC, MVT::i64, Expand); - setOperationAction(ISD::SUBC, MVT::i8, Expand); - setOperationAction(ISD::SUBC, MVT::i16, Expand); - setOperationAction(ISD::SUBC, MVT::i32, Expand); - setOperationAction(ISD::SUBC, MVT::i64, Expand); - - setOperationAction(ISD::CTPOP, MVT::i32, Expand); - setOperationAction(ISD::CTPOP, MVT::i64, Expand); - setOperationAction(ISD::CTTZ, MVT::i32, Expand); - setOperationAction(ISD::CTTZ, MVT::i64, Expand); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); - setOperationAction(ISD::CTLZ, MVT::i32, Expand); - setOperationAction(ISD::CTLZ, MVT::i64, Expand); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); - setOperationAction(ISD::ROTL, MVT::i32, Expand); - setOperationAction(ISD::ROTR, MVT::i32, Expand); - setOperationAction(ISD::BSWAP, MVT::i32, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); - setOperationAction(ISD::FPOW, MVT::f64, Expand); - setOperationAction(ISD::FPOW, MVT::f32, Expand); - - setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); - setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); - setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); - - setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); - setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); - - setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); - setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); + if (VT.getSizeInBits() == 64 && + Operand.getValueType().getSizeInBits() == 32) { + SDValue C = DAG.getConstant(0, dl, MVT::i32); + Operand = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Operand); + } - setOperationAction(ISD::EH_RETURN, MVT::Other, Custom); + SDValue Idx = DAG.getConstant(OpIdx, dl, MVT::i64); + SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i64, Idx, Width); + SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset); + const SDValue Ops[] = {ConstVal, Operand, Combined}; - if (Subtarget.isSubtargetV2()) { - setExceptionPointerRegister(Hexagon::R20); - setExceptionSelectorRegister(Hexagon::R21); - } else { - setExceptionPointerRegister(Hexagon::R0); - setExceptionSelectorRegister(Hexagon::R1); + if (VT.getSizeInBits() == 32) + ConstVal = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, Ops); + else + ConstVal = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, Ops); } - // VASTART needs to be custom lowered to use the VarArgsFrameIndex. - setOperationAction(ISD::VASTART, MVT::Other, Custom); + return DAG.getNode(ISD::BITCAST, dl, VT, ConstVal); +} - // Use the default implementation. - setOperationAction(ISD::VAARG, MVT::Other, Expand); - setOperationAction(ISD::VACOPY, MVT::Other, Expand); - setOperationAction(ISD::VAEND, MVT::Other, Expand); - setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); - setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); +SDValue +HexagonTargetLowering::LowerEXTRACT_VECTOR(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + int VTN = VT.isVector() ? VT.getVectorNumElements() : 1; + SDLoc dl(Op); + SDValue Idx = Op.getOperand(1); + SDValue Vec = Op.getOperand(0); + EVT VecVT = Vec.getValueType(); + EVT EltVT = VecVT.getVectorElementType(); + int EltSize = EltVT.getSizeInBits(); + SDValue Width = DAG.getConstant(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT ? + EltSize : VTN * EltSize, dl, MVT::i64); + + // Constant element number. + if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Idx)) { + uint64_t X = CI->getZExtValue(); + SDValue Offset = DAG.getConstant(X * EltSize, dl, MVT::i32); + const SDValue Ops[] = {Vec, Width, Offset}; + + ConstantSDNode *CW = dyn_cast<ConstantSDNode>(Width); + assert(CW && "Non constant width in LowerEXTRACT_VECTOR"); + + SDValue N; + MVT SVT = VecVT.getSimpleVT(); + uint64_t W = CW->getZExtValue(); + + if (W == 32) { + // Translate this node into EXTRACT_SUBREG. + unsigned Subreg = (X == 0) ? Hexagon::subreg_loreg : 0; + + if (X == 0) + Subreg = Hexagon::subreg_loreg; + else if (SVT == MVT::v2i32 && X == 1) + Subreg = Hexagon::subreg_hireg; + else if (SVT == MVT::v4i16 && X == 2) + Subreg = Hexagon::subreg_hireg; + else if (SVT == MVT::v8i8 && X == 4) + Subreg = Hexagon::subreg_hireg; + else + llvm_unreachable("Bad offset"); + N = DAG.getTargetExtractSubreg(Subreg, dl, MVT::i32, Vec); + + } else if (VecVT.getSizeInBits() == 32) { + N = DAG.getNode(HexagonISD::EXTRACTU, dl, MVT::i32, Ops); + } else { + N = DAG.getNode(HexagonISD::EXTRACTU, dl, MVT::i64, Ops); + if (VT.getSizeInBits() == 32) + N = DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, MVT::i32, N); + } - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); - setOperationAction(ISD::INLINEASM, MVT::Other, Custom); + return DAG.getNode(ISD::BITCAST, dl, VT, N); + } - setMinFunctionAlignment(2); + // Variable element number. + SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i32, Idx, + DAG.getConstant(EltSize, dl, MVT::i32)); + SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width, + DAG.getConstant(32, dl, MVT::i64)); + SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset); - // Needed for DYNAMIC_STACKALLOC expansion. - const HexagonRegisterInfo *QRI = static_cast<const HexagonRegisterInfo *>( - TM.getSubtargetImpl()->getRegisterInfo()); - setStackPointerRegisterToSaveRestore(QRI->getStackRegister()); - setSchedulingPreference(Sched::VLIW); -} + const SDValue Ops[] = {Vec, Combined}; -const char* -HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { - switch (Opcode) { - default: return nullptr; - case HexagonISD::CONST32: return "HexagonISD::CONST32"; - case HexagonISD::CONST32_GP: return "HexagonISD::CONST32_GP"; - case HexagonISD::CONST32_Int_Real: return "HexagonISD::CONST32_Int_Real"; - case HexagonISD::ADJDYNALLOC: return "HexagonISD::ADJDYNALLOC"; - case HexagonISD::CMPICC: return "HexagonISD::CMPICC"; - case HexagonISD::CMPFCC: return "HexagonISD::CMPFCC"; - case HexagonISD::BRICC: return "HexagonISD::BRICC"; - case HexagonISD::BRFCC: return "HexagonISD::BRFCC"; - case HexagonISD::SELECT_ICC: return "HexagonISD::SELECT_ICC"; - case HexagonISD::SELECT_FCC: return "HexagonISD::SELECT_FCC"; - case HexagonISD::Hi: return "HexagonISD::Hi"; - case HexagonISD::Lo: return "HexagonISD::Lo"; - case HexagonISD::FTOI: return "HexagonISD::FTOI"; - case HexagonISD::ITOF: return "HexagonISD::ITOF"; - case HexagonISD::CALL: return "HexagonISD::CALL"; - case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG"; - case HexagonISD::BR_JT: return "HexagonISD::BR_JT"; - case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN"; - case HexagonISD::EH_RETURN: return "HexagonISD::EH_RETURN"; + SDValue N; + if (VecVT.getSizeInBits() == 32) { + N = DAG.getNode(HexagonISD::EXTRACTURP, dl, MVT::i32, Ops); + } else { + N = DAG.getNode(HexagonISD::EXTRACTURP, dl, MVT::i64, Ops); + if (VT.getSizeInBits() == 32) + N = DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, MVT::i32, N); } + return DAG.getNode(ISD::BITCAST, dl, VT, N); } -bool -HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { - EVT MTy1 = EVT::getEVT(Ty1); - EVT MTy2 = EVT::getEVT(Ty2); - if (!MTy1.isSimple() || !MTy2.isSimple()) { - return false; +SDValue +HexagonTargetLowering::LowerINSERT_VECTOR(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + int VTN = VT.isVector() ? VT.getVectorNumElements() : 1; + SDLoc dl(Op); + SDValue Vec = Op.getOperand(0); + SDValue Val = Op.getOperand(1); + SDValue Idx = Op.getOperand(2); + EVT VecVT = Vec.getValueType(); + EVT EltVT = VecVT.getVectorElementType(); + int EltSize = EltVT.getSizeInBits(); + SDValue Width = DAG.getConstant(Op.getOpcode() == ISD::INSERT_VECTOR_ELT ? + EltSize : VTN * EltSize, dl, MVT::i64); + + if (ConstantSDNode *C = cast<ConstantSDNode>(Idx)) { + SDValue Offset = DAG.getConstant(C->getSExtValue() * EltSize, dl, MVT::i32); + const SDValue Ops[] = {Vec, Val, Width, Offset}; + + SDValue N; + if (VT.getSizeInBits() == 32) + N = DAG.getNode(HexagonISD::INSERT, dl, MVT::i32, Ops); + else + N = DAG.getNode(HexagonISD::INSERT, dl, MVT::i64, Ops); + + return DAG.getNode(ISD::BITCAST, dl, VT, N); } - return ((MTy1.getSimpleVT() == MVT::i64) && (MTy2.getSimpleVT() == MVT::i32)); -} -bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { - if (!VT1.isSimple() || !VT2.isSimple()) { - return false; + // Variable element number. + SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i32, Idx, + DAG.getConstant(EltSize, dl, MVT::i32)); + SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width, + DAG.getConstant(32, dl, MVT::i64)); + SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset); + + if (VT.getSizeInBits() == 64 && + Val.getValueType().getSizeInBits() == 32) { + SDValue C = DAG.getConstant(0, dl, MVT::i32); + Val = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Val); } - return ((VT1.getSimpleVT() == MVT::i64) && (VT2.getSimpleVT() == MVT::i32)); + + const SDValue Ops[] = {Vec, Val, Combined}; + + SDValue N; + if (VT.getSizeInBits() == 32) + N = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, Ops); + else + N = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, Ops); + + return DAG.getNode(ISD::BITCAST, dl, VT, N); } bool @@ -1532,7 +2261,7 @@ HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), DAG.getRegister(Hexagon::R30, getPointerTy()), - DAG.getIntPtrConstant(4)); + DAG.getIntPtrConstant(4, dl)); Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(), false, false, 0); Chain = DAG.getCopyToReg(Chain, dl, OffsetReg, Offset); @@ -1545,43 +2274,54 @@ HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { SDValue HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { - switch (Op.getOpcode()) { - default: llvm_unreachable("Should not custom lower this!"); - case ISD::ConstantPool: return LowerConstantPool(Op, DAG); - case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); - // Frame & Return address. Currently unimplemented. - case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); - case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); - case ISD::GlobalTLSAddress: - llvm_unreachable("TLS not implemented for Hexagon."); - case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG); - case ISD::GlobalAddress: return LowerGLOBALADDRESS(Op, DAG); - case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); - case ISD::VASTART: return LowerVASTART(Op, DAG); - case ISD::BR_JT: return LowerBR_JT(Op, DAG); - - case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); - case ISD::SELECT: return Op; - case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); - case ISD::INLINEASM: return LowerINLINEASM(Op, DAG); - + unsigned Opc = Op.getOpcode(); + switch (Opc) { + default: +#ifndef NDEBUG + Op.getNode()->dumpr(&DAG); + if (Opc > HexagonISD::OP_BEGIN && Opc < HexagonISD::OP_END) + errs() << "Check for a non-legal type in this operation\n"; +#endif + llvm_unreachable("Should not custom lower this!"); + case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); + case ISD::INSERT_SUBVECTOR: return LowerINSERT_VECTOR(Op, DAG); + case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR(Op, DAG); + case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_VECTOR(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR(Op, DAG); + case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); + case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); + case ISD::SRA: + case ISD::SHL: + case ISD::SRL: return LowerVECTOR_SHIFT(Op, DAG); + case ISD::ConstantPool: return LowerConstantPool(Op, DAG); + case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); + // Frame & Return address. Currently unimplemented. + case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); + case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); + case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG); + case ISD::GlobalAddress: return LowerGLOBALADDRESS(Op, DAG); + case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); + case ISD::VASTART: return LowerVASTART(Op, DAG); + case ISD::BR_JT: return LowerBR_JT(Op, DAG); + // Custom lower some vector loads. + case ISD::LOAD: return LowerLOAD(Op, DAG); + case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); + case ISD::SETCC: return LowerSETCC(Op, DAG); + case ISD::VSELECT: return LowerVSELECT(Op, DAG); + case ISD::CTPOP: return LowerCTPOP(Op, DAG); + case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::INLINEASM: return LowerINLINEASM(Op, DAG); } } - - -//===----------------------------------------------------------------------===// -// Hexagon Scheduler Hooks -//===----------------------------------------------------------------------===// MachineBasicBlock * HexagonTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) -const { + const { switch (MI->getOpcode()) { - case Hexagon::ADJDYNALLOC: { + case Hexagon::ALLOCA: { MachineFunction *MF = BB->getParent(); - HexagonMachineFunctionInfo *FuncInfo = - MF->getInfo<HexagonMachineFunctionInfo>(); + auto *FuncInfo = MF->getInfo<HexagonMachineFunctionInfo>(); FuncInfo->addAllocaAdjustInst(MI); return BB; } @@ -1593,10 +2333,10 @@ const { // Inline Assembly Support //===----------------------------------------------------------------------===// -std::pair<unsigned, const TargetRegisterClass*> -HexagonTargetLowering::getRegForInlineAsmConstraint(const - std::string &Constraint, - MVT VT) const { +std::pair<unsigned, const TargetRegisterClass *> +HexagonTargetLowering::getRegForInlineAsmConstraint( + const TargetRegisterInfo *TRI, const std::string &Constraint, + MVT VT) const { if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': // R0-R31 @@ -1617,14 +2357,14 @@ HexagonTargetLowering::getRegForInlineAsmConstraint(const } } - return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); + return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); } /// isFPImmLegal - Returns true if the target can instruction select the /// specified FP immediate natively. If false, the legalizer will /// materialize the FP immediate as a load from a constant pool. bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { - return TM.getSubtarget<HexagonSubtarget>().hasV5TOps(); + return Subtarget.hasV5TOps(); } /// isLegalAddressingMode - Return true if the addressing mode represented by @@ -1632,14 +2372,12 @@ bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { bool HexagonTargetLowering::isLegalAddressingMode(const AddrMode &AM, Type *Ty) const { // Allows a signed-extended 11-bit immediate field. - if (AM.BaseOffs <= -(1LL << 13) || AM.BaseOffs >= (1LL << 13)-1) { + if (AM.BaseOffs <= -(1LL << 13) || AM.BaseOffs >= (1LL << 13)-1) return false; - } // No global is ever allowed as a base. - if (AM.BaseGV) { + if (AM.BaseGV) return false; - } int Scale = AM.Scale; if (Scale < 0) Scale = -Scale; diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h index d03b1b8d9f4a..584c2c57c7ca 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.h +++ b/lib/Target/Hexagon/HexagonISelLowering.h @@ -26,127 +26,145 @@ namespace llvm { bool isPositiveHalfWord(SDNode *N); namespace HexagonISD { - enum { - FIRST_NUMBER = ISD::BUILTIN_OP_END, + enum NodeType : unsigned { + OP_BEGIN = ISD::BUILTIN_OP_END, - CONST32, + CONST32 = OP_BEGIN, CONST32_GP, // For marking data present in GP. - CONST32_Int_Real, FCONST32, - SETCC, - ADJDYNALLOC, + ALLOCA, ARGEXTEND, - CMPICC, // Compare two GPR operands, set icc. - CMPFCC, // Compare two FP operands, set fcc. - BRICC, // Branch to dest on icc condition - BRFCC, // Branch to dest on fcc condition - SELECT_ICC, // Select between two values using the current ICC flags. - SELECT_FCC, // Select between two values using the current FCC flags. + PIC_ADD, + AT_GOT, + AT_PCREL, - Hi, Lo, // Hi/Lo operations, typically on a global address. + CALLv3, // A V3+ call instruction. + CALLv3nr, // A V3+ call instruction that doesn't return. + CALLR, - FTOI, // FP to Int within a FP register. - ITOF, // Int to FP within a FP register. - - CALL, // A call instruction. RET_FLAG, // Return with a flag operand. - BR_JT, // Jump table. - BARRIER, // Memory barrier + BR_JT, // Branch through jump table. + BARRIER, // Memory barrier. + JT, // Jump table. + CP, // Constant pool. + POPCOUNT, COMBINE, - WrapperJT, - WrapperCP, - WrapperCombineII, - WrapperCombineRR, - WrapperCombineRI_V4, - WrapperCombineIR_V4, - WrapperPackhl, - WrapperSplatB, - WrapperSplatH, - WrapperShuffEB, - WrapperShuffEH, - WrapperShuffOB, - WrapperShuffOH, + PACKHL, + VSPLATB, + VSPLATH, + SHUFFEB, + SHUFFEH, + SHUFFOB, + SHUFFOH, + VSXTBH, + VSXTBW, + VSRAW, + VSRAH, + VSRLW, + VSRLH, + VSHLW, + VSHLH, + VCMPBEQ, + VCMPBGT, + VCMPBGTU, + VCMPHEQ, + VCMPHGT, + VCMPHGTU, + VCMPWEQ, + VCMPWGT, + VCMPWGTU, + + INSERT, + INSERTRP, + EXTRACTU, + EXTRACTURP, TC_RETURN, EH_RETURN, - DCFETCH + DCFETCH, + + OP_END }; } + class HexagonSubtarget; + class HexagonTargetLowering : public TargetLowering { int VarArgsFrameOffset; // Frame offset to start of varargs area. - bool CanReturnSmallStruct(const Function* CalleeFn, - unsigned& RetSize) const; + bool CanReturnSmallStruct(const Function* CalleeFn, unsigned& RetSize) + const; + void promoteLdStType(EVT VT, EVT PromotedLdStVT); + const HexagonTargetMachine &HTM; + const HexagonSubtarget &Subtarget; public: - const TargetMachine &TM; - explicit HexagonTargetLowering(const TargetMachine &targetmachine); + explicit HexagonTargetLowering(const TargetMachine &TM, + const HexagonSubtarget &ST); /// IsEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. Targets which want to do tail call /// optimization should implement this function. - bool - IsEligibleForTailCallOptimization(SDValue Callee, - CallingConv::ID CalleeCC, - bool isVarArg, - bool isCalleeStructRet, - bool isCallerStructRet, - const - SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - const SmallVectorImpl<ISD::InputArg> &Ins, - SelectionDAG& DAG) const; + bool IsEligibleForTailCallOptimization(SDValue Callee, + CallingConv::ID CalleeCC, bool isVarArg, bool isCalleeStructRet, + bool isCallerStructRet, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG& DAG) const; bool isTruncateFree(Type *Ty1, Type *Ty2) const override; bool isTruncateFree(EVT VT1, EVT VT2) const override; bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; - SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; + // Should we expand the build vector with shuffles? + bool shouldExpandBuildVectorWithShuffles(EVT VT, + unsigned DefinedValues) const override; + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; const char *getTargetNodeName(unsigned Opcode) const override; - SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEXTRACT_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINSERT_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFormalArguments(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, - SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const override; + SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, + bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, + SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const override; SDValue LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, - SmallVectorImpl<SDValue> &InVals) const override; - + SmallVectorImpl<SDValue> &InVals) const override; SDValue LowerCallResult(SDValue Chain, SDValue InFlag, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, - SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals, - const SmallVectorImpl<SDValue> &OutVals, - SDValue Callee) const; - + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, + SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, + const SmallVectorImpl<SDValue> &OutVals, SDValue Callee) const; + + SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const; SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerReturn(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - SDLoc dl, SelectionDAG &DAG) const override; + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, + bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, SDLoc dl, + SelectionDAG &DAG) const override; - MachineBasicBlock * - EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB) const override; + bool mayBeEmittedAsTailCall(CallInst *CI) const override; + MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) const override; - SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; EVT getSetCCResultType(LLVMContext &C, EVT VT) const override { if (!VT.isVector()) return MVT::i1; @@ -159,10 +177,20 @@ bool isPositiveHalfWord(SDNode *N); ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override; - std::pair<unsigned, const TargetRegisterClass*> - getRegForInlineAsmConstraint(const std::string &Constraint, + std::pair<unsigned, const TargetRegisterClass *> + getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + const std::string &Constraint, MVT VT) const override; + unsigned getInlineAsmMemConstraint( + const std::string &ConstraintCode) const override { + if (ConstraintCode == "o") + return InlineAsm::Constraint_o; + else if (ConstraintCode == "v") + return InlineAsm::Constraint_v; + return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); + } + // Intrinsics SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; /// isLegalAddressingMode - Return true if the addressing mode represented diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td index 8373652c8f64..36a7e9f642c6 100644 --- a/lib/Target/Hexagon/HexagonInstrFormats.td +++ b/lib/Target/Hexagon/HexagonInstrFormats.td @@ -28,20 +28,12 @@ def TypeXTYPE : IType<8>; def TypeENDLOOP: IType<31>; // Maintain list of valid subtargets for each instruction. -class SubTarget<bits<4> value> { - bits<4> Value = value; +class SubTarget<bits<6> value> { + bits<6> Value = value; } -def HasV2SubT : SubTarget<0xf>; -def HasV2SubTOnly : SubTarget<0x1>; -def NoV2SubT : SubTarget<0x0>; -def HasV3SubT : SubTarget<0xe>; -def HasV3SubTOnly : SubTarget<0x2>; -def NoV3SubT : SubTarget<0x1>; -def HasV4SubT : SubTarget<0xc>; -def NoV4SubT : SubTarget<0x3>; -def HasV5SubT : SubTarget<0x8>; -def NoV5SubT : SubTarget<0x7>; +def HasAnySubT : SubTarget<0x3f>; // 111111 +def HasV5SubT : SubTarget<0x3e>; // 111110 // Addressing modes for load/store instructions class AddrModeType<bits<3> value> { @@ -56,8 +48,8 @@ def BaseLongOffset : AddrModeType<4>; // Indirect with long offset def BaseRegOffset : AddrModeType<5>; // Indirect with register offset def PostInc : AddrModeType<6>; // Post increment addressing mode -class MemAccessSize<bits<3> value> { - bits<3> Value = value; +class MemAccessSize<bits<4> value> { + bits<4> Value = value; } def NoMemAccess : MemAccessSize<0>;// Not a memory acces instruction. @@ -84,7 +76,7 @@ class OpcodeHexagon { class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern, string cstr, InstrItinClass itin, IType type> - : Instruction, OpcodeHexagon { + : Instruction { let Namespace = "Hexagon"; dag OutOperandList = outs; @@ -92,18 +84,18 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern, let AsmString = asmstr; let Pattern = pattern; let Constraints = cstr; - let Itinerary = itin;
- let Size = 4;
-
- // SoftFail is a field the disassembler can use to provide a way for
- // instructions to not match without killing the whole decode process. It is
- // mainly used for ARM, but Tablegen expects this field to exist or it fails
- // to build the decode table.
- field bits<32> SoftFail = 0;
-
- // *** Must match MCTargetDesc/HexagonBaseInfo.h ***
-
- // Instruction type according to the ISA.
+ let Itinerary = itin; + let Size = 4; + + // SoftFail is a field the disassembler can use to provide a way for + // instructions to not match without killing the whole decode process. It is + // mainly used for ARM, but Tablegen expects this field to exist or it fails + // to build the decode table. + field bits<32> SoftFail = 0; + + // *** Must match MCTargetDesc/HexagonBaseInfo.h *** + + // Instruction type according to the ISA. IType Type = type; let TSFlags{4-0} = Type.Value; @@ -157,11 +149,11 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern, bits<2> opExtentAlign = 0; let TSFlags{33-32} = opExtentAlign; // Alignment exponent before extending. - // If an instruction is valid on a subtarget (v2-v5), set the corresponding - // bit from validSubTargets. v2 is the least significant bit. + // If an instruction is valid on a subtarget, set the corresponding + // bit from validSubTargets. // By default, instruction is valid on all subtargets. - SubTarget validSubTargets = HasV2SubT; - let TSFlags{37-34} = validSubTargets.Value; + SubTarget validSubTargets = HasAnySubT; + let TSFlags{39-34} = validSubTargets.Value; // Addressing mode for load/store instructions. AddrModeType addrMode = NoAddrMode; @@ -169,7 +161,7 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern, // Memory access size for mem access instructions (load/store) MemAccessSize accessSize = NoMemAccess; - let TSFlags{45-43} = accessSize.Value; + let TSFlags{46-43} = accessSize.Value; bits<1> isTaken = 0; let TSFlags {47} = isTaken; // Branch prediction. @@ -192,7 +184,6 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern, ""); let PNewValue = !if(isPredicatedNew, "new", ""); let NValueST = !if(isNVStore, "true", "false"); - let isCodeGenOnly = 1; // *** Must match MCTargetDesc/HexagonBaseInfo.h *** } @@ -206,7 +197,7 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern, let mayLoad = 1 in class LDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = LD_tc_ld_SLOT01> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>, OpcodeHexagon; let mayLoad = 1 in class LDInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [], @@ -226,7 +217,7 @@ class LDInstPost<dag outs, dag ins, string asmstr, list<dag> pattern = [], let mayLoad = 1 in class LD0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin=LD_tc_ld_SLOT0> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>, OpcodeHexagon; // ST Instruction Class in V2/V3 can take SLOT0 only. // ST Instruction Class in V4 can take SLOT0 & SLOT1. @@ -234,7 +225,7 @@ class LD0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [], let mayStore = 1 in class STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = ST_tc_st_SLOT01> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>, OpcodeHexagon; class STInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = ""> @@ -243,7 +234,7 @@ class STInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [], let mayStore = 1 in class ST0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = ST_tc_ld_SLOT0> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>, OpcodeHexagon; // ST Instruction Class in V2/V3 can take SLOT0 only. // ST Instruction Class in V4 can take SLOT0 & SLOT1. @@ -256,13 +247,14 @@ class STInstPost<dag outs, dag ins, string asmstr, list<dag> pattern = [], // In V2/V3 we used ST for this but in v4 ST can take SLOT0 or SLOT1. class SYSInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = ST_tc_3stall_SLOT0> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeSYSTEM>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeSYSTEM>, + OpcodeHexagon; // ALU32 Instruction Class in V2/V3/V4. // Definition of the instruction class NOT CHANGED. class ALU32Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeALU32>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeALU32>, OpcodeHexagon; // ALU64 Instruction Class in V2/V3. // XTYPE Instruction Class in V4. @@ -270,7 +262,8 @@ class ALU32Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [], // Name of the Instruction Class changed from ALU64 to XTYPE from V2/V3 to V4. class ALU64Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = ALU64_tc_2_SLOT23> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>, + OpcodeHexagon; class ALU64_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = ALU64_tc_2_SLOT23> @@ -283,7 +276,8 @@ class ALU64_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [], // Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4. class MInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = M_tc_3x_SLOT23> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>, + OpcodeHexagon; // M Instruction Class in V2/V3. // XTYPE Instruction Class in V4. @@ -299,7 +293,8 @@ class MInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [], // Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4. class SInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = S_2op_tc_1_SLOT23> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>, + OpcodeHexagon; // S Instruction Class in V2/V3. // XTYPE Instruction Class in V4. @@ -313,34 +308,37 @@ class SInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [], // Definition of the instruction class NOT CHANGED. class JInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = J_tc_2early_SLOT23> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeJ>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeJ>, OpcodeHexagon; // JR Instruction Class in V2/V3/V4. // Definition of the instruction class NOT CHANGED. class JRInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = J_tc_2early_SLOT2> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeJR>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeJR>, OpcodeHexagon; // CR Instruction Class in V2/V3/V4. // Definition of the instruction class NOT CHANGED. class CRInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = CR_tc_2early_SLOT3> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCR>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCR>, OpcodeHexagon; let isCodeGenOnly = 1, isPseudo = 1 in class Endloop<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = J_tc_2early_SLOT0123> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeENDLOOP>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeENDLOOP>, + OpcodeHexagon; let isCodeGenOnly = 1, isPseudo = 1 in class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = ""> - : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDO, TypePSEUDO>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDO, TypePSEUDO>, + OpcodeHexagon; let isCodeGenOnly = 1, isPseudo = 1 in class PseudoM<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr=""> - : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDOM, TypePSEUDO>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDOM, TypePSEUDO>, + OpcodeHexagon; //===----------------------------------------------------------------------===// // Instruction Classes Definitions - @@ -366,7 +364,6 @@ class ALU32_ii<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123> : ALU32Inst<outs, ins, asmstr, pattern, cstr, itin>; - // // ALU64 patterns. // diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/lib/Target/Hexagon/HexagonInstrFormatsV4.td index 5fec80bb570a..7f7b2c96dba7 100644 --- a/lib/Target/Hexagon/HexagonInstrFormatsV4.td +++ b/lib/Target/Hexagon/HexagonInstrFormatsV4.td @@ -17,10 +17,88 @@ // *** Must match BaseInfo.h *** //----------------------------------------------------------------------------// -def TypeMEMOP : IType<9>; -def TypeNV : IType<10>; +def TypeMEMOP : IType<9>; +def TypeNV : IType<10>; +def TypeDUPLEX : IType<11>; def TypeCOMPOUND : IType<12>; -def TypePREFIX : IType<30>; +def TypeAG_VX : IType<28>; +def TypeAG_VM : IType<29>; +def TypePREFIX : IType<30>; + +// Duplex Instruction Class Declaration +//===----------------------------------------------------------------------===// + +class OpcodeDuplex { + field bits<32> Inst = ?; // Default to an invalid insn. + bits<4> IClass = 0; // ICLASS + bits<13> ISubHi = 0; // Low sub-insn + bits<13> ISubLo = 0; // High sub-insn + + let Inst{31-29} = IClass{3-1}; + let Inst{13} = IClass{0}; + let Inst{15-14} = 0; + let Inst{28-16} = ISubHi; + let Inst{12-0} = ISubLo; +} + +class InstDuplex<bits<4> iClass, list<dag> pattern = [], + string cstr = ""> + : Instruction, OpcodeDuplex { + let Namespace = "Hexagon"; + IType Type = TypeDUPLEX; // uses slot 0,1 + let isCodeGenOnly = 1; + let hasSideEffects = 0; + dag OutOperandList = (outs); + dag InOperandList = (ins); + let IClass = iClass; + let Constraints = cstr; + let Itinerary = DUPLEX; + let Size = 4; + + // SoftFail is a field the disassembler can use to provide a way for + // instructions to not match without killing the whole decode process. It is + // mainly used for ARM, but Tablegen expects this field to exist or it fails + // to build the decode table. + field bits<32> SoftFail = 0; + + // *** Must match MCTargetDesc/HexagonBaseInfo.h *** + + let TSFlags{4-0} = Type.Value; + + // Predicated instructions. + bits<1> isPredicated = 0; + let TSFlags{6} = isPredicated; + bits<1> isPredicatedFalse = 0; + let TSFlags{7} = isPredicatedFalse; + bits<1> isPredicatedNew = 0; + let TSFlags{8} = isPredicatedNew; + + // New-value insn helper fields. + bits<1> isNewValue = 0; + let TSFlags{9} = isNewValue; // New-value consumer insn. + bits<1> hasNewValue = 0; + let TSFlags{10} = hasNewValue; // New-value producer insn. + bits<3> opNewValue = 0; + let TSFlags{13-11} = opNewValue; // New-value produced operand. + bits<1> isNVStorable = 0; + let TSFlags{14} = isNVStorable; // Store that can become new-value store. + bits<1> isNVStore = 0; + let TSFlags{15} = isNVStore; // New-value store insn. + + // Immediate extender helper fields. + bits<1> isExtendable = 0; + let TSFlags{16} = isExtendable; // Insn may be extended. + bits<1> isExtended = 0; + let TSFlags{17} = isExtended; // Insn must be extended. + bits<3> opExtendable = 0; + let TSFlags{20-18} = opExtendable; // Which operand may be extended. + bits<1> isExtentSigned = 0; + let TSFlags{21} = isExtentSigned; // Signed or unsigned range. + bits<5> opExtentBits = 0; + let TSFlags{26-22} = opExtentBits; //Number of bits of range before extending. + bits<2> opExtentAlign = 0; + let TSFlags{28-27} = opExtentAlign; // Alignment exponent before extending. +} //----------------------------------------------------------------------------// // Instruction Classes Definitions @@ -31,7 +109,7 @@ def TypePREFIX : IType<30>; // class NVInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = NCJ_tc_3or4stall_SLOT0> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeNV>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeNV>, OpcodeHexagon; class NVInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = NCJ_tc_3or4stall_SLOT0> @@ -56,7 +134,8 @@ class NCJInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], let mayLoad = 1, mayStore = 1 in class MEMInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = V4LDST_tc_st_SLOT0> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeMEMOP>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeMEMOP>, + OpcodeHexagon; class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = V4LDST_tc_st_SLOT0> @@ -65,8 +144,9 @@ class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [], let isCodeGenOnly = 1 in class EXTENDERInst<dag outs, dag ins, string asmstr, list<dag> pattern = []> : InstHexagon<outs, ins, asmstr, pattern, "", EXTENDER_tc_1_SLOT0123, - TypePREFIX>; + TypePREFIX>, OpcodeHexagon; class CJInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = ""> - : InstHexagon<outs, ins, asmstr, pattern, cstr, COMPOUND, TypeCOMPOUND>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, COMPOUND, TypeCOMPOUND>, + OpcodeHexagon; diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index 5d962590a705..49b4517698d5 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -62,10 +62,8 @@ const int Hexagon_MEMB_AUTOINC_MIN = -8; void HexagonInstrInfo::anchor() {} HexagonInstrInfo::HexagonInstrInfo(HexagonSubtarget &ST) - : HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP), - RI(ST), Subtarget(ST) { -} - + : HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP), + RI(), Subtarget(ST) {} /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of @@ -117,68 +115,172 @@ unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr *MI, return 0; } +// Find the hardware loop instruction used to set-up the specified loop. +// On Hexagon, we have two instructions used to set-up the hardware loop +// (LOOP0, LOOP1) with corresponding endloop (ENDLOOP0, ENDLOOP1) instructions +// to indicate the end of a loop. +static MachineInstr * +findLoopInstr(MachineBasicBlock *BB, int EndLoopOp, + SmallPtrSet<MachineBasicBlock *, 8> &Visited) { + int LOOPi; + int LOOPr; + if (EndLoopOp == Hexagon::ENDLOOP0) { + LOOPi = Hexagon::J2_loop0i; + LOOPr = Hexagon::J2_loop0r; + } else { // EndLoopOp == Hexagon::EndLOOP1 + LOOPi = Hexagon::J2_loop1i; + LOOPr = Hexagon::J2_loop1r; + } -unsigned -HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond, - DebugLoc DL) const{ - - int BOpc = Hexagon::J2_jump; - int BccOpc = Hexagon::J2_jumpt; - - assert(TBB && "InsertBranch must not be told to insert a fallthrough"); - - int regPos = 0; - // Check if ReverseBranchCondition has asked to reverse this branch - // If we want to reverse the branch an odd number of times, we want - // JMP_f. - if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) { - BccOpc = Hexagon::J2_jumpf; - regPos = 1; + // The loop set-up instruction will be in a predecessor block + for (MachineBasicBlock::pred_iterator PB = BB->pred_begin(), + PE = BB->pred_end(); PB != PE; ++PB) { + // If this has been visited, already skip it. + if (!Visited.insert(*PB).second) + continue; + if (*PB == BB) + continue; + for (MachineBasicBlock::reverse_instr_iterator I = (*PB)->instr_rbegin(), + E = (*PB)->instr_rend(); I != E; ++I) { + int Opc = I->getOpcode(); + if (Opc == LOOPi || Opc == LOOPr) + return &*I; + // We've reached a different loop, which means the loop0 has been removed. + if (Opc == EndLoopOp) + return 0; } + // Check the predecessors for the LOOP instruction. + MachineInstr *loop = findLoopInstr(*PB, EndLoopOp, Visited); + if (loop) + return loop; + } + return 0; +} - if (!FBB) { - if (Cond.empty()) { - // Due to a bug in TailMerging/CFG Optimization, we need to add a - // special case handling of a predicated jump followed by an - // unconditional jump. If not, Tail Merging and CFG Optimization go - // into an infinite loop. - MachineBasicBlock *NewTBB, *NewFBB; - SmallVector<MachineOperand, 4> Cond; - MachineInstr *Term = MBB.getFirstTerminator(); - if (isPredicated(Term) && !AnalyzeBranch(MBB, NewTBB, NewFBB, Cond, - false)) { - MachineBasicBlock *NextBB = - std::next(MachineFunction::iterator(&MBB)); - if (NewTBB == NextBB) { - ReverseBranchCondition(Cond); - RemoveBranch(MBB); - return InsertBranch(MBB, TBB, nullptr, Cond, DL); - } +unsigned HexagonInstrInfo::InsertBranch( + MachineBasicBlock &MBB,MachineBasicBlock *TBB, MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const { + + Opcode_t BOpc = Hexagon::J2_jump; + Opcode_t BccOpc = Hexagon::J2_jumpt; + + assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + + // Check if ReverseBranchCondition has asked to reverse this branch + // If we want to reverse the branch an odd number of times, we want + // J2_jumpf. + if (!Cond.empty() && Cond[0].isImm()) + BccOpc = Cond[0].getImm(); + + if (!FBB) { + if (Cond.empty()) { + // Due to a bug in TailMerging/CFG Optimization, we need to add a + // special case handling of a predicated jump followed by an + // unconditional jump. If not, Tail Merging and CFG Optimization go + // into an infinite loop. + MachineBasicBlock *NewTBB, *NewFBB; + SmallVector<MachineOperand, 4> Cond; + MachineInstr *Term = MBB.getFirstTerminator(); + if (Term != MBB.end() && isPredicated(Term) && + !AnalyzeBranch(MBB, NewTBB, NewFBB, Cond, false)) { + MachineBasicBlock *NextBB = + std::next(MachineFunction::iterator(&MBB)); + if (NewTBB == NextBB) { + ReverseBranchCondition(Cond); + RemoveBranch(MBB); + return InsertBranch(MBB, TBB, nullptr, Cond, DL); } - BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); - } else { - BuildMI(&MBB, DL, - get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB); } - return 1; + BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); + } else if (isEndLoopN(Cond[0].getImm())) { + int EndLoopOp = Cond[0].getImm(); + assert(Cond[1].isMBB()); + // Since we're adding an ENDLOOP, there better be a LOOP instruction. + // Check for it, and change the BB target if needed. + SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs; + MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs); + assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP"); + Loop->getOperand(0).setMBB(TBB); + // Add the ENDLOOP after the finding the LOOP0. + BuildMI(&MBB, DL, get(EndLoopOp)).addMBB(TBB); + } else if (isNewValueJump(Cond[0].getImm())) { + assert((Cond.size() == 3) && "Only supporting rr/ri version of nvjump"); + // New value jump + // (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset) + // (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset) + unsigned Flags1 = getUndefRegState(Cond[1].isUndef()); + DEBUG(dbgs() << "\nInserting NVJump for BB#" << MBB.getNumber();); + if (Cond[2].isReg()) { + unsigned Flags2 = getUndefRegState(Cond[2].isUndef()); + BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1). + addReg(Cond[2].getReg(), Flags2).addMBB(TBB); + } else if(Cond[2].isImm()) { + BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1). + addImm(Cond[2].getImm()).addMBB(TBB); + } else + llvm_unreachable("Invalid condition for branching"); + } else { + assert((Cond.size() == 2) && "Malformed cond vector"); + const MachineOperand &RO = Cond[1]; + unsigned Flags = getUndefRegState(RO.isUndef()); + BuildMI(&MBB, DL, get(BccOpc)).addReg(RO.getReg(), Flags).addMBB(TBB); } + return 1; + } + assert((!Cond.empty()) && + "Cond. cannot be empty when multiple branchings are required"); + assert((!isNewValueJump(Cond[0].getImm())) && + "NV-jump cannot be inserted with another branch"); + // Special case for hardware loops. The condition is a basic block. + if (isEndLoopN(Cond[0].getImm())) { + int EndLoopOp = Cond[0].getImm(); + assert(Cond[1].isMBB()); + // Since we're adding an ENDLOOP, there better be a LOOP instruction. + // Check for it, and change the BB target if needed. + SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs; + MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs); + assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP"); + Loop->getOperand(0).setMBB(TBB); + // Add the ENDLOOP after the finding the LOOP0. + BuildMI(&MBB, DL, get(EndLoopOp)).addMBB(TBB); + } else { + const MachineOperand &RO = Cond[1]; + unsigned Flags = getUndefRegState(RO.isUndef()); + BuildMI(&MBB, DL, get(BccOpc)).addReg(RO.getReg(), Flags).addMBB(TBB); + } + BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB); - BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB); - BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB); - - return 2; + return 2; } +/// This function can analyze one/two way branching only and should (mostly) be +/// called by target independent side. +/// First entry is always the opcode of the branching instruction, except when +/// the Cond vector is supposed to be empty, e.g., when AnalyzeBranch fails, a +/// BB with only unconditional jump. Subsequent entries depend upon the opcode, +/// e.g. Jump_c p will have +/// Cond[0] = Jump_c +/// Cond[1] = p +/// HW-loop ENDLOOP: +/// Cond[0] = ENDLOOP +/// Cond[1] = MBB +/// New value jump: +/// Cond[0] = Hexagon::CMPEQri_f_Jumpnv_t_V4 -- specific opcode +/// Cond[1] = R +/// Cond[2] = Imm +/// @note Related function is \fn findInstrPredicate which fills in +/// Cond. vector when a predicated instruction is passed to it. +/// We follow same protocol in that case too. +/// bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl<MachineOperand> &Cond, - bool AllowModify) const { + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const { TBB = nullptr; FBB = nullptr; + Cond.clear(); // If the block has no terminators, it just falls into the block after it. MachineBasicBlock::instr_iterator I = MBB.instr_end(); @@ -200,6 +302,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, do { --I; if (I->isEHLabel()) + // Don't analyze EH branches. return true; } while (I != MBB.instr_begin()); @@ -211,9 +314,11 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, return false; --I; } - - // Delete the JMP if it's equivalent to a fall-through. - if (AllowModify && I->getOpcode() == Hexagon::J2_jump && + + bool JumpToBlock = I->getOpcode() == Hexagon::J2_jump && + I->getOperand(0).isMBB(); + // Delete the J2_jump if it's equivalent to a fall-through. + if (AllowModify && JumpToBlock && MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { DEBUG(dbgs()<< "\nErasing the jump to successor block\n";); I->eraseFromParent(); @@ -243,9 +348,17 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, } while(I); int LastOpcode = LastInst->getOpcode(); + int SecLastOpcode = SecondLastInst ? SecondLastInst->getOpcode() : 0; + // If the branch target is not a basic block, it could be a tail call. + // (It is, if the target is a function.) + if (LastOpcode == Hexagon::J2_jump && !LastInst->getOperand(0).isMBB()) + return true; + if (SecLastOpcode == Hexagon::J2_jump && + !SecondLastInst->getOperand(0).isMBB()) + return true; bool LastOpcodeHasJMP_c = PredOpcodeHasJMP_c(LastOpcode); - bool LastOpcodeHasNot = PredOpcodeHasNot(LastOpcode); + bool LastOpcodeHasNVJump = isNewValueJump(LastInst); // If there is only one terminator instruction, process it. if (LastInst && !SecondLastInst) { @@ -253,32 +366,50 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, TBB = LastInst->getOperand(0).getMBB(); return false; } - if (LastOpcode == Hexagon::ENDLOOP0) { + if (isEndLoopN(LastOpcode)) { TBB = LastInst->getOperand(0).getMBB(); + Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); Cond.push_back(LastInst->getOperand(0)); return false; } if (LastOpcodeHasJMP_c) { TBB = LastInst->getOperand(1).getMBB(); - if (LastOpcodeHasNot) { - Cond.push_back(MachineOperand::CreateImm(0)); - } + Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); Cond.push_back(LastInst->getOperand(0)); return false; } + // Only supporting rr/ri versions of new-value jumps. + if (LastOpcodeHasNVJump && (LastInst->getNumExplicitOperands() == 3)) { + TBB = LastInst->getOperand(2).getMBB(); + Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); + Cond.push_back(LastInst->getOperand(0)); + Cond.push_back(LastInst->getOperand(1)); + return false; + } + DEBUG(dbgs() << "\nCant analyze BB#" << MBB.getNumber() + << " with one jump\n";); // Otherwise, don't know what this is. return true; } - int SecLastOpcode = SecondLastInst->getOpcode(); - bool SecLastOpcodeHasJMP_c = PredOpcodeHasJMP_c(SecLastOpcode); - bool SecLastOpcodeHasNot = PredOpcodeHasNot(SecLastOpcode); + bool SecLastOpcodeHasNVJump = isNewValueJump(SecondLastInst); if (SecLastOpcodeHasJMP_c && (LastOpcode == Hexagon::J2_jump)) { TBB = SecondLastInst->getOperand(1).getMBB(); - if (SecLastOpcodeHasNot) - Cond.push_back(MachineOperand::CreateImm(0)); + Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode())); + Cond.push_back(SecondLastInst->getOperand(0)); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } + + // Only supporting rr/ri versions of new-value jumps. + if (SecLastOpcodeHasNVJump && + (SecondLastInst->getNumExplicitOperands() == 3) && + (LastOpcode == Hexagon::J2_jump)) { + TBB = SecondLastInst->getOperand(2).getMBB(); + Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode())); Cond.push_back(SecondLastInst->getOperand(0)); + Cond.push_back(SecondLastInst->getOperand(1)); FBB = LastInst->getOperand(0).getMBB(); return false; } @@ -293,48 +424,40 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, return false; } - // If the block ends with an ENDLOOP, and JMP, handle it. - if (SecLastOpcode == Hexagon::ENDLOOP0 && - LastOpcode == Hexagon::J2_jump) { + // If the block ends with an ENDLOOP, and J2_jump, handle it. + if (isEndLoopN(SecLastOpcode) && LastOpcode == Hexagon::J2_jump) { TBB = SecondLastInst->getOperand(0).getMBB(); + Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode())); Cond.push_back(SecondLastInst->getOperand(0)); FBB = LastInst->getOperand(0).getMBB(); return false; } - + DEBUG(dbgs() << "\nCant analyze BB#" << MBB.getNumber() + << " with two jumps";); // Otherwise, can't handle this. return true; } - unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { - int BOpc = Hexagon::J2_jump; - int BccOpc = Hexagon::J2_jumpt; - int BccOpcNot = Hexagon::J2_jumpf; - + DEBUG(dbgs() << "\nRemoving branches out of BB#" << MBB.getNumber()); MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) return 0; - --I; - if (I->getOpcode() != BOpc && I->getOpcode() != BccOpc && - I->getOpcode() != BccOpcNot) - return 0; - - // Remove the branch. - I->eraseFromParent(); - - I = MBB.end(); - - if (I == MBB.begin()) return 1; - --I; - if (I->getOpcode() != BccOpc && I->getOpcode() != BccOpcNot) - return 1; - - // Remove the branch. - I->eraseFromParent(); - return 2; + unsigned Count = 0; + while (I != MBB.begin()) { + --I; + if (I->isDebugValue()) + continue; + // Only removing branches from end of MBB. + if (!I->isBranch()) + return Count; + if (Count && (I->getOpcode() == Hexagon::J2_jump)) + llvm_unreachable("Malformed basic block: unconditional branch not last"); + MBB.erase(&MBB.back()); + I = MBB.end(); + ++Count; + } + return Count; } - /// \brief For a comparison instruction, return the source registers in /// \p SrcReg and \p SrcReg2 if having two register operands, and the value it /// compares against in CmpValue. Return true if the comparison instruction @@ -346,33 +469,39 @@ bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI, // Set mask and the first source register. switch (Opc) { - case Hexagon::C2_cmpeqp: - case Hexagon::C2_cmpeqi: case Hexagon::C2_cmpeq: + case Hexagon::C2_cmpeqp: + case Hexagon::C2_cmpgt: case Hexagon::C2_cmpgtp: - case Hexagon::C2_cmpgtup: - case Hexagon::C2_cmpgtui: case Hexagon::C2_cmpgtu: + case Hexagon::C2_cmpgtup: + case Hexagon::C4_cmpneq: + case Hexagon::C4_cmplte: + case Hexagon::C4_cmplteu: + case Hexagon::C2_cmpeqi: case Hexagon::C2_cmpgti: - case Hexagon::C2_cmpgt: + case Hexagon::C2_cmpgtui: + case Hexagon::C4_cmpneqi: + case Hexagon::C4_cmplteui: + case Hexagon::C4_cmpltei: SrcReg = MI->getOperand(1).getReg(); Mask = ~0; break; - case Hexagon::CMPbEQri_V4: - case Hexagon::CMPbEQrr_sbsb_V4: - case Hexagon::CMPbEQrr_ubub_V4: - case Hexagon::CMPbGTUri_V4: - case Hexagon::CMPbGTUrr_V4: - case Hexagon::CMPbGTrr_V4: + case Hexagon::A4_cmpbeq: + case Hexagon::A4_cmpbgt: + case Hexagon::A4_cmpbgtu: + case Hexagon::A4_cmpbeqi: + case Hexagon::A4_cmpbgti: + case Hexagon::A4_cmpbgtui: SrcReg = MI->getOperand(1).getReg(); Mask = 0xFF; break; - case Hexagon::CMPhEQri_V4: - case Hexagon::CMPhEQrr_shl_V4: - case Hexagon::CMPhEQrr_xor_V4: - case Hexagon::CMPhGTUri_V4: - case Hexagon::CMPhGTUrr_V4: - case Hexagon::CMPhGTrr_shl_V4: + case Hexagon::A4_cmpheq: + case Hexagon::A4_cmphgt: + case Hexagon::A4_cmphgtu: + case Hexagon::A4_cmpheqi: + case Hexagon::A4_cmphgti: + case Hexagon::A4_cmphgtui: SrcReg = MI->getOperand(1).getReg(); Mask = 0xFFFF; break; @@ -380,30 +509,36 @@ bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI, // Set the value/second source register. switch (Opc) { - case Hexagon::C2_cmpeqp: case Hexagon::C2_cmpeq: + case Hexagon::C2_cmpeqp: + case Hexagon::C2_cmpgt: case Hexagon::C2_cmpgtp: - case Hexagon::C2_cmpgtup: case Hexagon::C2_cmpgtu: - case Hexagon::C2_cmpgt: - case Hexagon::CMPbEQrr_sbsb_V4: - case Hexagon::CMPbEQrr_ubub_V4: - case Hexagon::CMPbGTUrr_V4: - case Hexagon::CMPbGTrr_V4: - case Hexagon::CMPhEQrr_shl_V4: - case Hexagon::CMPhEQrr_xor_V4: - case Hexagon::CMPhGTUrr_V4: - case Hexagon::CMPhGTrr_shl_V4: + case Hexagon::C2_cmpgtup: + case Hexagon::A4_cmpbeq: + case Hexagon::A4_cmpbgt: + case Hexagon::A4_cmpbgtu: + case Hexagon::A4_cmpheq: + case Hexagon::A4_cmphgt: + case Hexagon::A4_cmphgtu: + case Hexagon::C4_cmpneq: + case Hexagon::C4_cmplte: + case Hexagon::C4_cmplteu: SrcReg2 = MI->getOperand(2).getReg(); return true; case Hexagon::C2_cmpeqi: case Hexagon::C2_cmpgtui: case Hexagon::C2_cmpgti: - case Hexagon::CMPbEQri_V4: - case Hexagon::CMPbGTUri_V4: - case Hexagon::CMPhEQri_V4: - case Hexagon::CMPhGTUri_V4: + case Hexagon::C4_cmpneqi: + case Hexagon::C4_cmplteui: + case Hexagon::C4_cmpltei: + case Hexagon::A4_cmpbeqi: + case Hexagon::A4_cmpbgti: + case Hexagon::A4_cmpbgtui: + case Hexagon::A4_cmpheqi: + case Hexagon::A4_cmphgti: + case Hexagon::A4_cmphgtui: SrcReg2 = 0; Value = MI->getOperand(2).getImm(); return true; @@ -553,12 +688,101 @@ void HexagonInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, SmallVectorImpl<MachineInstr*> &NewMIs) const { llvm_unreachable("Unimplemented"); } +bool +HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { + const HexagonRegisterInfo &TRI = getRegisterInfo(); + MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + MachineBasicBlock &MBB = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + unsigned Opc = MI->getOpcode(); + + switch (Opc) { + case Hexagon::ALIGNA: + BuildMI(MBB, MI, DL, get(Hexagon::A2_andir), MI->getOperand(0).getReg()) + .addReg(TRI.getFrameRegister()) + .addImm(-MI->getOperand(1).getImm()); + MBB.erase(MI); + return true; + case Hexagon::TFR_PdTrue: { + unsigned Reg = MI->getOperand(0).getReg(); + BuildMI(MBB, MI, DL, get(Hexagon::C2_orn), Reg) + .addReg(Reg, RegState::Undef) + .addReg(Reg, RegState::Undef); + MBB.erase(MI); + return true; + } + case Hexagon::TFR_PdFalse: { + unsigned Reg = MI->getOperand(0).getReg(); + BuildMI(MBB, MI, DL, get(Hexagon::C2_andn), Reg) + .addReg(Reg, RegState::Undef) + .addReg(Reg, RegState::Undef); + MBB.erase(MI); + return true; + } + case Hexagon::VMULW: { + // Expand a 64-bit vector multiply into 2 32-bit scalar multiplies. + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned Src1Reg = MI->getOperand(1).getReg(); + unsigned Src2Reg = MI->getOperand(2).getReg(); + unsigned Src1SubHi = TRI.getSubReg(Src1Reg, Hexagon::subreg_hireg); + unsigned Src1SubLo = TRI.getSubReg(Src1Reg, Hexagon::subreg_loreg); + unsigned Src2SubHi = TRI.getSubReg(Src2Reg, Hexagon::subreg_hireg); + unsigned Src2SubLo = TRI.getSubReg(Src2Reg, Hexagon::subreg_loreg); + BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_mpyi), + TRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi) + .addReg(Src2SubHi); + BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_mpyi), + TRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo) + .addReg(Src2SubLo); + MBB.erase(MI); + MRI.clearKillFlags(Src1SubHi); + MRI.clearKillFlags(Src1SubLo); + MRI.clearKillFlags(Src2SubHi); + MRI.clearKillFlags(Src2SubLo); + return true; + } + case Hexagon::VMULW_ACC: { + // Expand 64-bit vector multiply with addition into 2 scalar multiplies. + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned Src1Reg = MI->getOperand(1).getReg(); + unsigned Src2Reg = MI->getOperand(2).getReg(); + unsigned Src3Reg = MI->getOperand(3).getReg(); + unsigned Src1SubHi = TRI.getSubReg(Src1Reg, Hexagon::subreg_hireg); + unsigned Src1SubLo = TRI.getSubReg(Src1Reg, Hexagon::subreg_loreg); + unsigned Src2SubHi = TRI.getSubReg(Src2Reg, Hexagon::subreg_hireg); + unsigned Src2SubLo = TRI.getSubReg(Src2Reg, Hexagon::subreg_loreg); + unsigned Src3SubHi = TRI.getSubReg(Src3Reg, Hexagon::subreg_hireg); + unsigned Src3SubLo = TRI.getSubReg(Src3Reg, Hexagon::subreg_loreg); + BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_maci), + TRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi) + .addReg(Src2SubHi).addReg(Src3SubHi); + BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_maci), + TRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo) + .addReg(Src2SubLo).addReg(Src3SubLo); + MBB.erase(MI); + MRI.clearKillFlags(Src1SubHi); + MRI.clearKillFlags(Src1SubLo); + MRI.clearKillFlags(Src2SubHi); + MRI.clearKillFlags(Src2SubLo); + MRI.clearKillFlags(Src3SubHi); + MRI.clearKillFlags(Src3SubLo); + return true; + } + case Hexagon::TCRETURNi: + MI->setDesc(get(Hexagon::J2_jump)); + return true; + case Hexagon::TCRETURNr: + MI->setDesc(get(Hexagon::J2_jumpr)); + return true; + } + return false; +} MachineInstr *HexagonInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - int FI) const { + MachineInstr *MI, + ArrayRef<unsigned> Ops, + int FI) const { // Hexagon_TODO: Implement. return nullptr; } @@ -582,10 +806,6 @@ unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const { } bool HexagonInstrInfo::isExtendable(const MachineInstr *MI) const { - // Constant extenders are allowed only for V4 and above. - if (!Subtarget.hasV4TOps()) - return false; - const MCInstrDesc &MID = MI->getDesc(); const uint64_t F = MID.TSFlags; if ((F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask) @@ -635,6 +855,16 @@ bool HexagonInstrInfo::isNewValueInst(const MachineInstr *MI) const { return false; } +bool HexagonInstrInfo::isNewValue(const MachineInstr* MI) const { + const uint64_t F = MI->getDesc().TSFlags; + return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask); +} + +bool HexagonInstrInfo::isNewValue(Opcode_t Opcode) const { + const uint64_t F = get(Opcode).TSFlags; + return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask); +} + bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr *MI) const { return MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4; } @@ -649,7 +879,7 @@ bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const { switch(Opc) { case Hexagon::A2_tfrsi: - return isInt<12>(MI->getOperand(1).getImm()); + return (isOperandExtended(MI, 1) && isConstExtended(MI)) || isInt<12>(MI->getOperand(1).getImm()); case Hexagon::S2_storerd_io: return isShiftedUInt<6,3>(MI->getOperand(1).getImm()); @@ -700,7 +930,7 @@ bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const { return (isUInt<6>(MI->getOperand(1).getImm()) && isInt<6>(MI->getOperand(2).getImm())); - case Hexagon::ADD_ri: + case Hexagon::A2_addi: return isInt<8>(MI->getOperand(2).getImm()); case Hexagon::A2_aslh: @@ -709,7 +939,7 @@ bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const { case Hexagon::A2_sxth: case Hexagon::A2_zxtb: case Hexagon::A2_zxth: - return Subtarget.hasV4TOps(); + return true; } return true; @@ -755,8 +985,7 @@ bool HexagonInstrInfo::isNewValueStore(unsigned Opcode) const { return ((F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask); } -int HexagonInstrInfo:: -getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const { +int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const { enum Hexagon::PredSense inPredSense; inPredSense = invertPredicate ? Hexagon::PredSense_false : Hexagon::PredSense_true; @@ -774,14 +1003,6 @@ getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const { return !invertPredicate ? Hexagon::C2_ccombinewt : Hexagon::C2_ccombinewf; - // Word. - case Hexagon::STriw_f: - return !invertPredicate ? Hexagon::S2_pstorerit_io: - Hexagon::S2_pstorerif_io; - case Hexagon::STriw_indexed_f: - return !invertPredicate ? Hexagon::S2_pstorerit_io: - Hexagon::S2_pstorerif_io; - // DEALLOC_RETURN. case Hexagon::L4_return: return !invertPredicate ? Hexagon::L4_return_t: @@ -794,148 +1015,51 @@ getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const { bool HexagonInstrInfo:: PredicateInstruction(MachineInstr *MI, const SmallVectorImpl<MachineOperand> &Cond) const { + if (Cond.empty() || isEndLoopN(Cond[0].getImm())) { + DEBUG(dbgs() << "\nCannot predicate:"; MI->dump();); + return false; + } int Opc = MI->getOpcode(); assert (isPredicable(MI) && "Expected predicable instruction"); - bool invertJump = (!Cond.empty() && Cond[0].isImm() && - (Cond[0].getImm() == 0)); - - // This will change MI's opcode to its predicate version. - // However, its operand list is still the old one, i.e. the - // non-predicate one. - MI->setDesc(get(getMatchingCondBranchOpcode(Opc, invertJump))); - - int oper = -1; - unsigned int GAIdx = 0; - - // Indicates whether the current MI has a GlobalAddress operand - bool hasGAOpnd = false; - std::vector<MachineOperand> tmpOpnds; - - // Indicates whether we need to shift operands to right. - bool needShift = true; - - // The predicate is ALWAYS the FIRST input operand !!! - if (MI->getNumOperands() == 0) { - // The non-predicate version of MI does not take any operands, - // i.e. no outs and no ins. In this condition, the predicate - // operand will be directly placed at Operands[0]. No operand - // shift is needed. - // Example: BARRIER - needShift = false; - oper = -1; - } - else if ( MI->getOperand(MI->getNumOperands()-1).isReg() - && MI->getOperand(MI->getNumOperands()-1).isDef() - && !MI->getOperand(MI->getNumOperands()-1).isImplicit()) { - // The non-predicate version of MI does not have any input operands. - // In this condition, we extend the length of Operands[] by one and - // copy the original last operand to the newly allocated slot. - // At this moment, it is just a place holder. Later, we will put - // predicate operand directly into it. No operand shift is needed. - // Example: r0=BARRIER (this is a faked insn used here for illustration) - MI->addOperand(MI->getOperand(MI->getNumOperands()-1)); - needShift = false; - oper = MI->getNumOperands() - 2; - } - else { - // We need to right shift all input operands by one. Duplicate the - // last operand into the newly allocated slot. - MI->addOperand(MI->getOperand(MI->getNumOperands()-1)); - } - - if (needShift) - { - // Operands[ MI->getNumOperands() - 2 ] has been copied into - // Operands[ MI->getNumOperands() - 1 ], so we start from - // Operands[ MI->getNumOperands() - 3 ]. - // oper is a signed int. - // It is ok if "MI->getNumOperands()-3" is -3, -2, or -1. - for (oper = MI->getNumOperands() - 3; oper >= 0; --oper) - { - MachineOperand &MO = MI->getOperand(oper); - - // Opnd[0] Opnd[1] Opnd[2] Opnd[3] Opnd[4] Opnd[5] Opnd[6] Opnd[7] - // <Def0> <Def1> <Use0> <Use1> <ImpDef0> <ImpDef1> <ImpUse0> <ImpUse1> - // /\~ - // /||\~ - // || - // Predicate Operand here - if (MO.isReg() && !MO.isUse() && !MO.isImplicit()) { - break; - } - if (MO.isReg()) { - MI->getOperand(oper+1).ChangeToRegister(MO.getReg(), MO.isDef(), - MO.isImplicit(), MO.isKill(), - MO.isDead(), MO.isUndef(), - MO.isDebug()); - } - else if (MO.isImm()) { - MI->getOperand(oper+1).ChangeToImmediate(MO.getImm()); - } - else if (MO.isGlobal()) { - // MI can not have more than one GlobalAddress operand. - assert(hasGAOpnd == false && "MI can only have one GlobalAddress opnd"); - - // There is no member function called "ChangeToGlobalAddress" in the - // MachineOperand class (not like "ChangeToRegister" and - // "ChangeToImmediate"). So we have to remove them from Operands[] list - // first, and then add them back after we have inserted the predicate - // operand. tmpOpnds[] is to remember these operands before we remove - // them. - tmpOpnds.push_back(MO); - - // Operands[oper] is a GlobalAddress operand; - // Operands[oper+1] has been copied into Operands[oper+2]; - hasGAOpnd = true; - GAIdx = oper; - continue; - } - else { - llvm_unreachable("Unexpected operand type"); - } - } + bool invertJump = predOpcodeHasNot(Cond); + + // We have to predicate MI "in place", i.e. after this function returns, + // MI will need to be transformed into a predicated form. To avoid com- + // plicated manipulations with the operands (handling tied operands, + // etc.), build a new temporary instruction, then overwrite MI with it. + + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + unsigned PredOpc = getCondOpcode(Opc, invertJump); + MachineInstrBuilder T = BuildMI(B, MI, DL, get(PredOpc)); + unsigned NOp = 0, NumOps = MI->getNumOperands(); + while (NOp < NumOps) { + MachineOperand &Op = MI->getOperand(NOp); + if (!Op.isReg() || !Op.isDef() || Op.isImplicit()) + break; + T.addOperand(Op); + NOp++; } - int regPos = invertJump ? 1 : 0; - MachineOperand PredMO = Cond[regPos]; + unsigned PredReg, PredRegPos, PredRegFlags; + bool GotPredReg = getPredReg(Cond, PredReg, PredRegPos, PredRegFlags); + (void)GotPredReg; + assert(GotPredReg); + T.addReg(PredReg, PredRegFlags); + while (NOp < NumOps) + T.addOperand(MI->getOperand(NOp++)); - // [oper] now points to the last explicit Def. Predicate operand must be - // located at [oper+1]. See diagram above. - // This assumes that the predicate is always the first operand, - // i.e. Operands[0+numResults], in the set of inputs - // It is better to have an assert here to check this. But I don't know how - // to write this assert because findFirstPredOperandIdx() would return -1 - if (oper < -1) oper = -1; + MI->setDesc(get(PredOpc)); + while (unsigned n = MI->getNumOperands()) + MI->RemoveOperand(n-1); + for (unsigned i = 0, n = T->getNumOperands(); i < n; ++i) + MI->addOperand(T->getOperand(i)); - MI->getOperand(oper+1).ChangeToRegister(PredMO.getReg(), PredMO.isDef(), - PredMO.isImplicit(), false, - PredMO.isDead(), PredMO.isUndef(), - PredMO.isDebug()); + MachineBasicBlock::instr_iterator TI = &*T; + B.erase(TI); - MachineRegisterInfo &RegInfo = MI->getParent()->getParent()->getRegInfo(); - RegInfo.clearKillFlags(PredMO.getReg()); - - if (hasGAOpnd) - { - unsigned int i; - - // Operands[GAIdx] is the original GlobalAddress operand, which is - // already copied into tmpOpnds[0]. - // Operands[GAIdx] now stores a copy of Operands[GAIdx-1] - // Operands[GAIdx+1] has already been copied into Operands[GAIdx+2], - // so we start from [GAIdx+2] - for (i = GAIdx + 2; i < MI->getNumOperands(); ++i) - tmpOpnds.push_back(MI->getOperand(i)); - - // Remove all operands in range [ (GAIdx+1) ... (MI->getNumOperands()-1) ] - // It is very important that we always remove from the end of Operands[] - // MI->getNumOperands() is at least 2 if program goes to here. - for (i = MI->getNumOperands() - 1; i > GAIdx; --i) - MI->RemoveOperand(i); - - for (i = 0; i < tmpOpnds.size(); ++i) - MI->addOperand(tmpOpnds[i]); - } + MachineRegisterInfo &MRI = B.getParent()->getRegInfo(); + MRI.clearKillFlags(PredReg); return true; } @@ -1014,12 +1138,10 @@ bool HexagonInstrInfo::isPredicatedNew(unsigned Opcode) const { // Returns true, if a ST insn can be promoted to a new-value store. bool HexagonInstrInfo::mayBeNewStore(const MachineInstr *MI) const { - const HexagonRegisterInfo& QRI = getRegisterInfo(); const uint64_t F = MI->getDesc().TSFlags; return ((F >> HexagonII::mayNVStorePos) & - HexagonII::mayNVStoreMask & - QRI.Subtarget.hasV4TOps()); + HexagonII::mayNVStoreMask); } bool @@ -1050,15 +1172,20 @@ SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, // // We indicate that we want to reverse the branch by -// inserting a 0 at the beginning of the Cond vector. +// inserting the reversed branching opcode. // -bool HexagonInstrInfo:: -ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { - if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) { - Cond.erase(Cond.begin()); - } else { - Cond.insert(Cond.begin(), MachineOperand::CreateImm(0)); - } +bool HexagonInstrInfo::ReverseBranchCondition( + SmallVectorImpl<MachineOperand> &Cond) const { + if (Cond.empty()) + return true; + assert(Cond[0].isImm() && "First entry in the cond vector not imm-val"); + Opcode_t opcode = Cond[0].getImm(); + //unsigned temp; + assert(get(opcode).isBranch() && "Should be a branching condition."); + if (isEndLoopN(opcode)) + return true; + Opcode_t NewOpcode = getInvertedPredicatedOpcode(opcode); + Cond[0].setImm(NewOpcode); return false; } @@ -1084,10 +1211,10 @@ bool HexagonInstrInfo::isDeallocRet(const MachineInstr *MI) const { } -bool HexagonInstrInfo:: -isValidOffset(const int Opcode, const int Offset) const { +bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, + bool Extend) const { // This function is to check whether the "Offset" is in the correct range of - // the given "Opcode". If "Offset" is not in the correct range, "ADD_ri" is + // the given "Opcode". If "Offset" is not in the correct range, "A2_addi" is // inserted to calculate the final address. Due to this reason, the function // assumes that the "Offset" has correct alignment. // We used to assert if the offset was not properly aligned, however, @@ -1095,19 +1222,23 @@ isValidOffset(const int Opcode, const int Offset) const { // problem, and we need to allow for it. The front end warns of such // misaligns with respect to load size. - switch(Opcode) { + switch (Opcode) { + case Hexagon::J2_loop0i: + case Hexagon::J2_loop1i: + return isUInt<10>(Offset); + } + + if (Extend) + return true; + switch (Opcode) { case Hexagon::L2_loadri_io: - case Hexagon::LDriw_f: case Hexagon::S2_storeri_io: - case Hexagon::STriw_f: return (Offset >= Hexagon_MEMW_OFFSET_MIN) && (Offset <= Hexagon_MEMW_OFFSET_MAX); case Hexagon::L2_loadrd_io: - case Hexagon::LDrid_f: case Hexagon::S2_storerd_io: - case Hexagon::STrid_f: return (Offset >= Hexagon_MEMD_OFFSET_MIN) && (Offset <= Hexagon_MEMD_OFFSET_MAX); @@ -1123,8 +1254,7 @@ isValidOffset(const int Opcode, const int Offset) const { return (Offset >= Hexagon_MEMB_OFFSET_MIN) && (Offset <= Hexagon_MEMB_OFFSET_MAX); - case Hexagon::ADD_ri: - case Hexagon::TFR_FI: + case Hexagon::A2_addi: return (Offset >= Hexagon_ADDI_OFFSET_MIN) && (Offset <= Hexagon_ADDI_OFFSET_MAX); @@ -1158,10 +1288,8 @@ isValidOffset(const int Opcode, const int Offset) const { case Hexagon::LDriw_pred: return true; - case Hexagon::J2_loop0i: - return isUInt<10>(Offset); - - // INLINEASM is very special. + case Hexagon::TFR_FI: + case Hexagon::TFR_FIA: case Hexagon::INLINEASM: return true; } @@ -1324,8 +1452,8 @@ bool HexagonInstrInfo::isConditionalALU32 (const MachineInstr* MI) const { case Hexagon::A4_pzxthfnew: case Hexagon::A4_pzxtht: case Hexagon::A4_pzxthtnew: - case Hexagon::ADD_ri_cPt: - case Hexagon::ADD_ri_cNotPt: + case Hexagon::A2_paddit: + case Hexagon::A2_paddif: case Hexagon::C2_ccombinewt: case Hexagon::C2_ccombinewf: return true; @@ -1334,7 +1462,6 @@ bool HexagonInstrInfo::isConditionalALU32 (const MachineInstr* MI) const { bool HexagonInstrInfo:: isConditionalLoad (const MachineInstr* MI) const { - const HexagonRegisterInfo& QRI = getRegisterInfo(); switch (MI->getOpcode()) { default: return false; @@ -1350,7 +1477,6 @@ isConditionalLoad (const MachineInstr* MI) const { case Hexagon::L2_ploadruhf_io: case Hexagon::L2_ploadrubt_io: case Hexagon::L2_ploadrubf_io: - return true; case Hexagon::L2_ploadrdt_pi: case Hexagon::L2_ploadrdf_pi: case Hexagon::L2_ploadrit_pi: @@ -1363,7 +1489,6 @@ isConditionalLoad (const MachineInstr* MI) const { case Hexagon::L2_ploadruhf_pi: case Hexagon::L2_ploadrubt_pi: case Hexagon::L2_ploadrubf_pi: - return QRI.Subtarget.hasV4TOps(); case Hexagon::L4_ploadrdt_rr: case Hexagon::L4_ploadrdf_rr: case Hexagon::L4_ploadrbt_rr: @@ -1376,7 +1501,7 @@ isConditionalLoad (const MachineInstr* MI) const { case Hexagon::L4_ploadruhf_rr: case Hexagon::L4_ploadrit_rr: case Hexagon::L4_ploadrif_rr: - return QRI.Subtarget.hasV4TOps(); + return true; } } @@ -1416,7 +1541,6 @@ isConditionalLoad (const MachineInstr* MI) const { // is not valid for new-value stores. bool HexagonInstrInfo:: isConditionalStore (const MachineInstr* MI) const { - const HexagonRegisterInfo& QRI = getRegisterInfo(); switch (MI->getOpcode()) { default: return false; @@ -1450,7 +1574,6 @@ isConditionalStore (const MachineInstr* MI) const { case Hexagon::S4_pstorerif_rr: case Hexagon::S2_pstorerit_pi: case Hexagon::S2_pstorerif_pi: - return QRI.Subtarget.hasV4TOps(); // V4 global address store before promoting to dot new. case Hexagon::S4_pstorerdt_abs: @@ -1461,7 +1584,7 @@ isConditionalStore (const MachineInstr* MI) const { case Hexagon::S4_pstorerhf_abs: case Hexagon::S4_pstorerit_abs: case Hexagon::S4_pstorerif_abs: - return QRI.Subtarget.hasV4TOps(); + return true; // Predicated new value stores (i.e. if (p0) memw(..)=r0.new) are excluded // from the "Conditional Store" list. Because a predicated new value store @@ -1500,13 +1623,12 @@ bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const { return false; } -bool HexagonInstrInfo::isPostIncrement (const MachineInstr* MI) const { - return (getAddrMode(MI) == HexagonII::PostInc); +bool HexagonInstrInfo::isNewValueJump(Opcode_t Opcode) const { + return isNewValue(Opcode) && get(Opcode).isBranch() && isPredicated(Opcode); } -bool HexagonInstrInfo::isNewValue(const MachineInstr* MI) const { - const uint64_t F = MI->getDesc().TSFlags; - return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask); +bool HexagonInstrInfo::isPostIncrement (const MachineInstr* MI) const { + return (getAddrMode(MI) == HexagonII::PostInc); } // Returns true, if any one of the operands is a dot new @@ -1548,22 +1670,29 @@ int HexagonInstrInfo::GetDotNewOp(const MachineInstr* MI) const { switch (MI->getOpcode()) { default: llvm_unreachable("Unknown .new type"); - // store new value byte - case Hexagon::STrib_shl_V4: - return Hexagon::STrib_shl_nv_V4; + case Hexagon::S4_storerb_ur: + return Hexagon::S4_storerbnew_ur; - case Hexagon::STrih_shl_V4: - return Hexagon::STrih_shl_nv_V4; + case Hexagon::S4_storerh_ur: + return Hexagon::S4_storerhnew_ur; - case Hexagon::STriw_f: - return Hexagon::S2_storerinew_io; + case Hexagon::S4_storeri_ur: + return Hexagon::S4_storerinew_ur; - case Hexagon::STriw_indexed_f: - return Hexagon::S4_storerinew_rr; + case Hexagon::S2_storerb_pci: + return Hexagon::S2_storerb_pci; - case Hexagon::STriw_shl_V4: - return Hexagon::STriw_shl_nv_V4; + case Hexagon::S2_storeri_pci: + return Hexagon::S2_storeri_pci; + case Hexagon::S2_storerh_pci: + return Hexagon::S2_storerh_pci; + + case Hexagon::S2_storerd_pci: + return Hexagon::S2_storerd_pci; + + case Hexagon::S2_storerf_pci: + return Hexagon::S2_storerf_pci; } return 0; } @@ -1652,19 +1781,14 @@ bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr *MI, return false; } -bool HexagonInstrInfo::isConstExtended(MachineInstr *MI) const { - - // Constant extenders are allowed only for V4 and above. - if (!Subtarget.hasV4TOps()) - return false; - +bool HexagonInstrInfo::isConstExtended(const MachineInstr *MI) const { const uint64_t F = MI->getDesc().TSFlags; unsigned isExtended = (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask; if (isExtended) // Instruction must be extended. return true; - unsigned isExtendable = (F >> HexagonII::ExtendablePos) - & HexagonII::ExtendableMask; + unsigned isExtendable = + (F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask; if (!isExtendable) return false; @@ -1685,7 +1809,8 @@ bool HexagonInstrInfo::isConstExtended(MachineInstr *MI) const { // We currently only handle isGlobal() because it is the only kind of // object we are going to end up with here for now. // In the future we probably should add isSymbol(), etc. - if (MO.isGlobal() || MO.isSymbol()) + if (MO.isGlobal() || MO.isSymbol() || MO.isBlockAddress() || + MO.isJTI() || MO.isCPI()) return true; // If the extendable operand is not 'Immediate' type, the instruction should @@ -1699,6 +1824,27 @@ bool HexagonInstrInfo::isConstExtended(MachineInstr *MI) const { return (ImmValue < MinValue || ImmValue > MaxValue); } +// Return the number of bytes required to encode the instruction. +// Hexagon instructions are fixed length, 4 bytes, unless they +// use a constant extender, which requires another 4 bytes. +// For debug instructions and prolog labels, return 0. +unsigned HexagonInstrInfo::getSize(const MachineInstr *MI) const { + + if (MI->isDebugValue() || MI->isPosition()) + return 0; + + unsigned Size = MI->getDesc().getSize(); + if (!Size) + // Assume the default insn size in case it cannot be determined + // for whatever reason. + Size = HEXAGON_INSTR_SIZE; + + if (isConstExtended(MI) || isExtended(MI)) + Size += HEXAGON_INSTR_SIZE; + + return Size; +} + // Returns the opcode to use when converting MI, which is a conditional jump, // into a conditional instruction which uses the .new value of the predicate. // We also use branch probabilities to add a hint to the jump. @@ -1730,10 +1876,6 @@ HexagonInstrInfo::getDotNewPredJumpOp(MachineInstr *MI, // Returns true if a particular operand is extendable for an instruction. bool HexagonInstrInfo::isOperandExtended(const MachineInstr *MI, unsigned short OperandNum) const { - // Constant extenders are allowed only for V4 and above. - if (!Subtarget.hasV4TOps()) - return false; - const uint64_t F = MI->getDesc().TSFlags; return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask) @@ -1841,8 +1983,36 @@ bool HexagonInstrInfo::PredOpcodeHasJMP_c(Opcode_t Opcode) const { (Opcode == Hexagon::J2_jumpf); } -bool HexagonInstrInfo::PredOpcodeHasNot(Opcode_t Opcode) const { - return (Opcode == Hexagon::J2_jumpf) || - (Opcode == Hexagon::J2_jumpfnewpt) || - (Opcode == Hexagon::J2_jumpfnew); +bool HexagonInstrInfo::predOpcodeHasNot( + const SmallVectorImpl<MachineOperand> &Cond) const { + if (Cond.empty() || !isPredicated(Cond[0].getImm())) + return false; + return !isPredicatedTrue(Cond[0].getImm()); +} + +bool HexagonInstrInfo::isEndLoopN(Opcode_t Opcode) const { + return (Opcode == Hexagon::ENDLOOP0 || + Opcode == Hexagon::ENDLOOP1); } + +bool HexagonInstrInfo::getPredReg(const SmallVectorImpl<MachineOperand> &Cond, + unsigned &PredReg, unsigned &PredRegPos, + unsigned &PredRegFlags) const { + if (Cond.empty()) + return false; + assert(Cond.size() == 2); + if (isNewValueJump(Cond[0].getImm()) || Cond[1].isMBB()) { + DEBUG(dbgs() << "No predregs for new-value jumps/endloop"); + return false; + } + PredReg = Cond[1].getReg(); + PredRegPos = 1; + // See IfConversion.cpp why we add RegState::Implicit | RegState::Undef + PredRegFlags = 0; + if (Cond[1].isImplicit()) + PredRegFlags = RegState::Implicit; + if (Cond[1].isUndef()) + PredRegFlags |= RegState::Undef; + return true; +} + diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h index 6acfbec24709..0239cabe9e52 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/lib/Target/Hexagon/HexagonInstrInfo.h @@ -1,3 +1,4 @@ + //===- HexagonInstrInfo.h - Hexagon Instruction Information -----*- C++ -*-===// // // The LLVM Compiler Infrastructure @@ -26,14 +27,15 @@ namespace llvm { struct EVT; - +class HexagonSubtarget; class HexagonInstrInfo : public HexagonGenInstrInfo { virtual void anchor(); const HexagonRegisterInfo RI; const HexagonSubtarget &Subtarget; - typedef unsigned Opcode_t; public: + typedef unsigned Opcode_t; + explicit HexagonInstrInfo(HexagonSubtarget &ST); /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As @@ -102,15 +104,21 @@ public: const TargetRegisterClass *RC, SmallVectorImpl<MachineInstr*> &NewMIs) const; - MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, + /// expandPostRAPseudo - This function is called for all pseudo instructions + /// that remain after register allocation. Many pseudo instructions are + /// created to help register allocation. This is the place to convert them + /// into real instructions. The target can edit MI in place, or it can insert + /// new instructions and erase MI. The function should return true if + /// anything was changed. + bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override; + + MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, + ArrayRef<unsigned> Ops, int FrameIndex) const override; - MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - MachineInstr* LoadMI) const override { + MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, + ArrayRef<unsigned> Ops, + MachineInstr *LoadMI) const override { return nullptr; } @@ -154,7 +162,7 @@ public: bool isSchedulingBoundary(const MachineInstr *MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override; - bool isValidOffset(const int Opcode, const int Offset) const; + bool isValidOffset(unsigned Opcode, int Offset, bool Extend = true) const; bool isValidAutoIncImm(const EVT VT, const int Offset) const; bool isMemOp(const MachineInstr *MI) const; bool isSpillPredRegOp(const MachineInstr *MI) const; @@ -178,6 +186,7 @@ public: bool isConditionalStore(const MachineInstr* MI) const; bool isNewValueInst(const MachineInstr* MI) const; bool isNewValue(const MachineInstr* MI) const; + bool isNewValue(Opcode_t Opcode) const; bool isDotNewInst(const MachineInstr* MI) const; int GetDotOldOp(const int opc) const; int GetDotNewOp(const MachineInstr* MI) const; @@ -193,11 +202,13 @@ public: bool isNewValueStore(const MachineInstr* MI) const; bool isNewValueStore(unsigned Opcode) const; bool isNewValueJump(const MachineInstr* MI) const; + bool isNewValueJump(Opcode_t Opcode) const; bool isNewValueJumpCandidate(const MachineInstr *MI) const; void immediateExtend(MachineInstr *MI) const; - bool isConstExtended(MachineInstr *MI) const; + bool isConstExtended(const MachineInstr *MI) const; + unsigned getSize(const MachineInstr *MI) const; int getDotNewPredJumpOp(MachineInstr *MI, const MachineBranchProbabilityInfo *MBPI) const; unsigned getAddrMode(const MachineInstr* MI) const; @@ -209,10 +220,12 @@ public: bool NonExtEquivalentExists (const MachineInstr *MI) const; short getNonExtOpcode(const MachineInstr *MI) const; bool PredOpcodeHasJMP_c(Opcode_t Opcode) const; - bool PredOpcodeHasNot(Opcode_t Opcode) const; - -private: - int getMatchingCondBranchOpcode(int Opc, bool sense) const; + bool predOpcodeHasNot(const SmallVectorImpl<MachineOperand> &Cond) const; + bool isEndLoopN(Opcode_t Opcode) const; + bool getPredReg(const SmallVectorImpl<MachineOperand> &Cond, + unsigned &PredReg, unsigned &PredRegPos, + unsigned &PredRegFlags) const; + int getCondOpcode(int Opc, bool sense) const; }; diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td index 7ce65f345cab..3b32c10ed5b0 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.td +++ b/lib/Target/Hexagon/HexagonInstrInfo.td @@ -29,8 +29,36 @@ def F64 : PatLeaf<(f64 DoubleRegs:$R)>; // 64-bit value. def LoReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG (i64 $Rs), subreg_loreg)>; +def HiReg: OutPatFrag<(ops node:$Rs), + (EXTRACT_SUBREG (i64 $Rs), subreg_hireg)>; -//===----------------------------------------------------------------------===// +// SDNode for converting immediate C to C-1. +def DEC_CONST_SIGNED : SDNodeXForm<imm, [{ + // Return the byte immediate const-1 as an SDNode. + int32_t imm = N->getSExtValue(); + return XformSToSM1Imm(imm, SDLoc(N)); +}]>; + +// SDNode for converting immediate C to C-2. +def DEC2_CONST_SIGNED : SDNodeXForm<imm, [{ + // Return the byte immediate const-2 as an SDNode. + int32_t imm = N->getSExtValue(); + return XformSToSM2Imm(imm, SDLoc(N)); +}]>; + +// SDNode for converting immediate C to C-3. +def DEC3_CONST_SIGNED : SDNodeXForm<imm, [{ + // Return the byte immediate const-3 as an SDNode. + int32_t imm = N->getSExtValue(); + return XformSToSM3Imm(imm, SDLoc(N)); +}]>; + +// SDNode for converting immediate C to C-1. +def DEC_CONST_UNSIGNED : SDNodeXForm<imm, [{ + // Return the byte immediate const-1 as an SDNode. + uint32_t imm = N->getZExtValue(); + return XformUToUM1Imm(imm, SDLoc(N)); +}]>; //===----------------------------------------------------------------------===// // Compare @@ -76,10 +104,16 @@ def : T_CMP_pat <C2_cmpgtui, setugt, u9ImmPred>; //===----------------------------------------------------------------------===// // ALU32/ALU + //===----------------------------------------------------------------------===// +// Add. + +def SDT_Int32Leaf : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>; +def SDT_Int32Unary : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; + def SDTHexagonI64I32I32 : SDTypeProfile<1, 2, [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; def HexagonCOMBINE : SDNode<"HexagonISD::COMBINE", SDTHexagonI64I32I32>; +def HexagonPACKHL : SDNode<"HexagonISD::PACKHL", SDTHexagonI64I32I32>; let hasSideEffects = 0, hasNewValue = 1, InputType = "reg" in class T_ALU32_3op<string mnemonic, bits<3> MajOp, bits<3> MinOp, bit OpsRev, @@ -140,12 +174,10 @@ class T_ALU32_combineh<string Op1, string Op2, bits<3> MajOp, bits<3> MinOp, let AsmString = "$Rd = combine($Rs"#Op1#", $Rt"#Op2#")"; } -let isCodeGenOnly = 0 in { def A2_combine_hh : T_ALU32_combineh<".h", ".h", 0b011, 0b100, 1>; def A2_combine_hl : T_ALU32_combineh<".h", ".l", 0b011, 0b101, 1>; def A2_combine_lh : T_ALU32_combineh<".l", ".h", 0b011, 0b110, 1>; def A2_combine_ll : T_ALU32_combineh<".l", ".l", 0b011, 0b111, 1>; -} class T_ALU32_3op_sfx<string mnemonic, string suffix, bits<3> MajOp, bits<3> MinOp, bit OpsRev, bit IsComm> @@ -153,12 +185,24 @@ class T_ALU32_3op_sfx<string mnemonic, string suffix, bits<3> MajOp, let AsmString = "$Rd = "#mnemonic#"($Rs, $Rt)"#suffix; } -let Defs = [USR_OVF], Itinerary = ALU32_3op_tc_2_SLOT0123, - isCodeGenOnly = 0 in { +def A2_svaddh : T_ALU32_3op<"vaddh", 0b110, 0b000, 0, 1>; +def A2_svsubh : T_ALU32_3op<"vsubh", 0b110, 0b100, 1, 0>; + +let Defs = [USR_OVF], Itinerary = ALU32_3op_tc_2_SLOT0123 in { + def A2_svaddhs : T_ALU32_3op_sfx<"vaddh", ":sat", 0b110, 0b001, 0, 1>; def A2_addsat : T_ALU32_3op_sfx<"add", ":sat", 0b110, 0b010, 0, 1>; + def A2_svadduhs : T_ALU32_3op_sfx<"vadduh", ":sat", 0b110, 0b011, 0, 1>; + def A2_svsubhs : T_ALU32_3op_sfx<"vsubh", ":sat", 0b110, 0b101, 1, 0>; def A2_subsat : T_ALU32_3op_sfx<"sub", ":sat", 0b110, 0b110, 1, 0>; + def A2_svsubuhs : T_ALU32_3op_sfx<"vsubuh", ":sat", 0b110, 0b111, 1, 0>; } +let Itinerary = ALU32_3op_tc_2_SLOT0123 in +def A2_svavghs : T_ALU32_3op_sfx<"vavgh", ":rnd", 0b111, 0b001, 0, 1>; + +def A2_svavgh : T_ALU32_3op<"vavgh", 0b111, 0b000, 0, 1>; +def A2_svnavgh : T_ALU32_3op<"vnavgh", 0b111, 0b011, 1, 0>; + multiclass T_ALU32_3op_p<string mnemonic, bits<3> MajOp, bits<3> MinOp, bit OpsRev> { def t : T_ALU32_3op_pred<mnemonic, MajOp, MinOp, OpsRev, 0, 0>; @@ -174,13 +218,11 @@ multiclass T_ALU32_3op_A2<string mnemonic, bits<3> MajOp, bits<3> MinOp, defm A2_p#NAME : T_ALU32_3op_p<mnemonic, MajOp, MinOp, OpsRev>; } -let isCodeGenOnly = 0 in { defm add : T_ALU32_3op_A2<"add", 0b011, 0b000, 0, 1>; defm and : T_ALU32_3op_A2<"and", 0b001, 0b000, 0, 1>; defm or : T_ALU32_3op_A2<"or", 0b001, 0b001, 0, 1>; defm sub : T_ALU32_3op_A2<"sub", 0b011, 0b001, 1, 0>; defm xor : T_ALU32_3op_A2<"xor", 0b001, 0b011, 0, 1>; -} // Pats for instruction selection. class BinOp32_pat<SDNode Op, InstHexagon MI, ValueType ResT> @@ -194,8 +236,7 @@ def: BinOp32_pat<sub, A2_sub, i32>; def: BinOp32_pat<xor, A2_xor, i32>; // A few special cases producing register pairs: -let OutOperandList = (outs DoubleRegs:$Rd), hasNewValue = 0, - isCodeGenOnly = 0 in { +let OutOperandList = (outs DoubleRegs:$Rd), hasNewValue = 0 in { def S2_packhl : T_ALU32_3op <"packhl", 0b101, 0b100, 0, 0>; let isPredicable = 1 in @@ -208,6 +249,9 @@ let OutOperandList = (outs DoubleRegs:$Rd), hasNewValue = 0, def C2_ccombinewnewf : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 1, 1>; } +def: BinOp32_pat<HexagonCOMBINE, A2_combinew, i64>; +def: BinOp32_pat<HexagonPACKHL, S2_packhl, i64>; + let hasSideEffects = 0, hasNewValue = 1, isCompare = 1, InputType = "reg" in class T_ALU32_3op_cmp<string mnemonic, bits<2> MinOp, bit IsNeg, bit IsComm> : ALU32_rr<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt), @@ -229,7 +273,7 @@ class T_ALU32_3op_cmp<string mnemonic, bits<2> MinOp, bit IsNeg, bit IsComm> let Inst{1-0} = Pd; } -let Itinerary = ALU32_3op_tc_2early_SLOT0123, isCodeGenOnly = 0 in { +let Itinerary = ALU32_3op_tc_2early_SLOT0123 in { def C2_cmpeq : T_ALU32_3op_cmp< "cmp.eq", 0b00, 0, 1>; def C2_cmpgt : T_ALU32_3op_cmp< "cmp.gt", 0b10, 0, 0>; def C2_cmpgtu : T_ALU32_3op_cmp< "cmp.gtu", 0b11, 0, 0>; @@ -252,8 +296,7 @@ def: T_cmp32_rr_pat<C2_cmpgtu, setugt, i1>; def: T_cmp32_rr_pat<C2_cmpgt, RevCmp<setlt>, i1>; def: T_cmp32_rr_pat<C2_cmpgtu, RevCmp<setult>, i1>; -let CextOpcode = "MUX", InputType = "reg", hasNewValue = 1, - isCodeGenOnly = 0 in +let CextOpcode = "MUX", InputType = "reg", hasNewValue = 1 in def C2_mux: ALU32_rr<(outs IntRegs:$Rd), (ins PredRegs:$Pu, IntRegs:$Rs, IntRegs:$Rt), "$Rd = mux($Pu, $Rs, $Rt)", [], "", ALU32_3op_tc_1_SLOT0123>, ImmRegRel { @@ -283,11 +326,11 @@ def: Pat<(i32 (select (i1 PredRegs:$Pu), (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), let isReMaterializable = 1, isMoveImm = 1, isAsCheapAsAMove = 1, isExtentSigned = 1, isExtendable = 1, opExtentBits = 8, opExtendable = 1, - AddedComplexity = 75, isCodeGenOnly = 0 in + AddedComplexity = 75 in def A2_combineii: ALU32Inst <(outs DoubleRegs:$Rdd), (ins s8Ext:$s8, s8Imm:$S8), "$Rdd = combine(#$s8, #$S8)", [(set (i64 DoubleRegs:$Rdd), - (i64 (HexagonCOMBINE(i32 s8ExtPred:$s8), (i32 s8ImmPred:$S8))))]> { + (i64 (HexagonCOMBINE(i32 s32ImmPred:$s8), (i32 s8ImmPred:$S8))))]> { bits<5> Rdd; bits<8> s8; bits<8> S8; @@ -303,7 +346,7 @@ def A2_combineii: ALU32Inst <(outs DoubleRegs:$Rdd), (ins s8Ext:$s8, s8Imm:$S8), //===----------------------------------------------------------------------===// // Template class for predicated ADD of a reg and an Immediate value. //===----------------------------------------------------------------------===// -let hasNewValue = 1 in +let hasNewValue = 1, hasSideEffects = 0 in class T_Addri_Pred <bit PredNot, bit PredNew> : ALU32_ri <(outs IntRegs:$Rd), (ins PredRegs:$Pu, IntRegs:$Rs, s8Ext:$s8), @@ -329,13 +372,11 @@ class T_Addri_Pred <bit PredNot, bit PredNew> //===----------------------------------------------------------------------===// // A2_addi: Add a signed immediate to a register. //===----------------------------------------------------------------------===// -let hasNewValue = 1 in -class T_Addri <Operand immOp, list<dag> pattern = [] > +let hasNewValue = 1, hasSideEffects = 0 in +class T_Addri <Operand immOp> : ALU32_ri <(outs IntRegs:$Rd), (ins IntRegs:$Rs, immOp:$s16), - "$Rd = add($Rs, #$s16)", pattern, - //[(set (i32 IntRegs:$Rd), (add (i32 IntRegs:$Rs), (s16ExtPred:$s16)))], - "", ALU32_ADDI_tc_1_SLOT0123> { + "$Rd = add($Rs, #$s16)", [], "", ALU32_ADDI_tc_1_SLOT0123> { bits<5> Rd; bits<5> Rs; bits<16> s16; @@ -353,31 +394,29 @@ class T_Addri <Operand immOp, list<dag> pattern = [] > //===----------------------------------------------------------------------===// multiclass Addri_Pred<string mnemonic, bit PredNot> { let isPredicatedFalse = PredNot in { - def _c#NAME : T_Addri_Pred<PredNot, 0>; + def NAME : T_Addri_Pred<PredNot, 0>; // Predicate new - def _cdn#NAME : T_Addri_Pred<PredNot, 1>; + def NAME#new : T_Addri_Pred<PredNot, 1>; } } -let isExtendable = 1, InputType = "imm" in +let isExtendable = 1, isExtentSigned = 1, InputType = "imm" in multiclass Addri_base<string mnemonic, SDNode OpNode> { let CextOpcode = mnemonic, BaseOpcode = mnemonic#_ri in { - let opExtendable = 2, isExtentSigned = 1, opExtentBits = 16, - isPredicable = 1 in - def NAME : T_Addri< s16Ext, // Rd=add(Rs,#s16) - [(set (i32 IntRegs:$Rd), - (add IntRegs:$Rs, s16ExtPred:$s16))]>; - - let opExtendable = 3, isExtentSigned = 1, opExtentBits = 8, - hasSideEffects = 0, isPredicated = 1 in { - defm Pt : Addri_Pred<mnemonic, 0>; - defm NotPt : Addri_Pred<mnemonic, 1>; + let opExtendable = 2, opExtentBits = 16, isPredicable = 1 in + def A2_#NAME : T_Addri<s16Ext>; + + let opExtendable = 3, opExtentBits = 8, isPredicated = 1 in { + defm A2_p#NAME#t : Addri_Pred<mnemonic, 0>; + defm A2_p#NAME#f : Addri_Pred<mnemonic, 1>; } } } -let isCodeGenOnly = 0 in -defm ADD_ri : Addri_base<"add", add>, ImmRegRel, PredNewRel; +defm addi : Addri_base<"add", add>, ImmRegRel, PredNewRel; + +def: Pat<(i32 (add I32:$Rs, s32ImmPred:$s16)), + (i32 (A2_addi I32:$Rs, imm:$s16))>; //===----------------------------------------------------------------------===// // Template class used for the following ALU32 instructions. @@ -390,7 +429,7 @@ class T_ALU32ri_logical <string mnemonic, SDNode OpNode, bits<2> MinOp> : ALU32_ri <(outs IntRegs:$Rd), (ins IntRegs:$Rs, s10Ext:$s10), "$Rd = "#mnemonic#"($Rs, #$s10)" , - [(set (i32 IntRegs:$Rd), (OpNode (i32 IntRegs:$Rs), s10ExtPred:$s10))]> { + [(set (i32 IntRegs:$Rd), (OpNode (i32 IntRegs:$Rs), s32ImmPred:$s10))]> { bits<5> Rd; bits<5> Rs; bits<10> s10; @@ -406,19 +445,15 @@ class T_ALU32ri_logical <string mnemonic, SDNode OpNode, bits<2> MinOp> let Inst{4-0} = Rd; } -let isCodeGenOnly = 0 in { -def OR_ri : T_ALU32ri_logical<"or", or, 0b10>, ImmRegRel; -def AND_ri : T_ALU32ri_logical<"and", and, 0b00>, ImmRegRel; -} +def A2_orir : T_ALU32ri_logical<"or", or, 0b10>, ImmRegRel; +def A2_andir : T_ALU32ri_logical<"and", and, 0b00>, ImmRegRel; // Subtract register from immediate // Rd32=sub(#s10,Rs32) -let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 10, -CextOpcode = "sub", InputType = "imm", hasNewValue = 1, isCodeGenOnly = 0 in -def SUB_ri: ALU32_ri <(outs IntRegs:$Rd), (ins s10Ext:$s10, IntRegs:$Rs), - "$Rd = sub(#$s10, $Rs)" , - [(set IntRegs:$Rd, (sub s10ExtPred:$s10, IntRegs:$Rs))] > , - ImmRegRel { +let isExtendable = 1, CextOpcode = "sub", opExtendable = 1, isExtentSigned = 1, + opExtentBits = 10, InputType = "imm", hasNewValue = 1, hasSideEffects = 0 in +def A2_subri: ALU32_ri <(outs IntRegs:$Rd), (ins s10Ext:$s10, IntRegs:$Rs), + "$Rd = sub(#$s10, $Rs)", []>, ImmRegRel { bits<5> Rd; bits<10> s10; bits<5> Rs; @@ -433,14 +468,18 @@ def SUB_ri: ALU32_ri <(outs IntRegs:$Rd), (ins s10Ext:$s10, IntRegs:$Rs), } // Nop. -let hasSideEffects = 0, isCodeGenOnly = 0 in +let hasSideEffects = 0 in def A2_nop: ALU32Inst <(outs), (ins), "nop" > { let IClass = 0b0111; let Inst{27-24} = 0b1111; } + +def: Pat<(sub s32ImmPred:$s10, IntRegs:$Rs), + (A2_subri imm:$s10, IntRegs:$Rs)>; + // Rd = not(Rs) gets mapped to Rd=sub(#-1, Rs). -def : Pat<(not (i32 IntRegs:$src1)), - (SUB_ri -1, (i32 IntRegs:$src1))>; +def: Pat<(not (i32 IntRegs:$src1)), + (A2_subri -1, IntRegs:$src1)>; let hasSideEffects = 0, hasNewValue = 1 in class T_tfr16<bit isHi> @@ -459,10 +498,8 @@ class T_tfr16<bit isHi> let Inst{13-0} = u16{13-0}; } -let isCodeGenOnly = 0 in { def A2_tfril: T_tfr16<0>; def A2_tfrih: T_tfr16<1>; -} // Conditional transfer is an alias to conditional "Rd = add(Rs, #0)". let isPredicated = 1, hasNewValue = 1, opNewValue = 0 in @@ -575,20 +612,17 @@ class T_TFRI_Pred<bit PredNot, bit PredNew> let Inst{4-0} = Rd; } -let isCodeGenOnly = 0 in { def C2_cmoveit : T_TFRI_Pred<0, 0>; def C2_cmoveif : T_TFRI_Pred<1, 0>; def C2_cmovenewit : T_TFRI_Pred<0, 1>; def C2_cmovenewif : T_TFRI_Pred<1, 1>; -} let InputType = "imm", isExtendable = 1, isExtentSigned = 1, CextOpcode = "TFR", BaseOpcode = "TFRI", hasNewValue = 1, opNewValue = 0, isAsCheapAsAMove = 1 , opExtendable = 1, opExtentBits = 16, isMoveImm = 1, - isPredicated = 0, isPredicable = 1, isReMaterializable = 1, - isCodeGenOnly = 0 in + isPredicated = 0, isPredicable = 1, isReMaterializable = 1 in def A2_tfrsi : ALU32Inst<(outs IntRegs:$Rd), (ins s16Ext:$s16), "$Rd = #$s16", - [(set (i32 IntRegs:$Rd), s16ExtPred:$s16)], "", ALU32_2op_tc_1_SLOT0123>, + [(set (i32 IntRegs:$Rd), s32ImmPred:$s16)], "", ALU32_2op_tc_1_SLOT0123>, ImmRegRel, PredRel { bits<5> Rd; bits<16> s16; @@ -599,20 +633,26 @@ def A2_tfrsi : ALU32Inst<(outs IntRegs:$Rd), (ins s16Ext:$s16), "$Rd = #$s16", let Inst{4-0} = Rd; } -let isCodeGenOnly = 0 in defm A2_tfr : tfr_base<"TFR">, ImmRegRel, PredNewRel; +let isAsmParserOnly = 1 in defm A2_tfrp : TFR64_base<"TFR64">, PredNewRel; // Assembler mapped -let isReMaterializable = 1, isMoveImm = 1, isAsCheapAsAMove = 1 in +let isReMaterializable = 1, isMoveImm = 1, isAsCheapAsAMove = 1, + isAsmParserOnly = 1 in def A2_tfrpi : ALU64_rr<(outs DoubleRegs:$dst), (ins s8Imm64:$src1), "$dst = #$src1", [(set (i64 DoubleRegs:$dst), s8Imm64Pred:$src1)]>; // TODO: see if this instruction can be deleted.. -let isExtendable = 1, opExtendable = 1, opExtentBits = 6 in -def TFRI64_V4 : ALU64_rr<(outs DoubleRegs:$dst), (ins u6Ext:$src1), +let isExtendable = 1, opExtendable = 1, opExtentBits = 6, + isAsmParserOnly = 1 in { +def TFRI64_V4 : ALU64_rr<(outs DoubleRegs:$dst), (ins u64Imm:$src1), "$dst = #$src1">; +def TFRI64_V2_ext : ALU64_rr<(outs DoubleRegs:$dst), + (ins s8Ext:$src1, s8Imm:$src2), + "$dst = combine(##$src1, #$src2)">; +} //===----------------------------------------------------------------------===// // ALU32/ALU - @@ -642,28 +682,28 @@ class T_MUX1 <bit MajOp, dag ins, string AsmStr> let Inst{4-0} = Rd; } -let opExtendable = 2, isCodeGenOnly = 0 in +let opExtendable = 2 in def C2_muxri : T_MUX1<0b1, (ins PredRegs:$Pu, s8Ext:$s8, IntRegs:$Rs), "$Rd = mux($Pu, #$s8, $Rs)">; -let opExtendable = 3, isCodeGenOnly = 0 in +let opExtendable = 3 in def C2_muxir : T_MUX1<0b0, (ins PredRegs:$Pu, IntRegs:$Rs, s8Ext:$s8), "$Rd = mux($Pu, $Rs, #$s8)">; -def : Pat<(i32 (select I1:$Pu, s8ExtPred:$s8, I32:$Rs)), - (C2_muxri I1:$Pu, s8ExtPred:$s8, I32:$Rs)>; +def : Pat<(i32 (select I1:$Pu, s32ImmPred:$s8, I32:$Rs)), + (C2_muxri I1:$Pu, s32ImmPred:$s8, I32:$Rs)>; -def : Pat<(i32 (select I1:$Pu, I32:$Rs, s8ExtPred:$s8)), - (C2_muxir I1:$Pu, I32:$Rs, s8ExtPred:$s8)>; +def : Pat<(i32 (select I1:$Pu, I32:$Rs, s32ImmPred:$s8)), + (C2_muxir I1:$Pu, I32:$Rs, s32ImmPred:$s8)>; // C2_muxii: Scalar mux immediates. let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1, - opExtentBits = 8, opExtendable = 2, isCodeGenOnly = 0 in + opExtentBits = 8, opExtendable = 2 in def C2_muxii: ALU32Inst <(outs IntRegs:$Rd), (ins PredRegs:$Pu, s8Ext:$s8, s8Imm:$S8), "$Rd = mux($Pu, #$s8, #$S8)" , [(set (i32 IntRegs:$Rd), - (i32 (select I1:$Pu, s8ExtPred:$s8, s8ImmPred:$S8)))] > { + (i32 (select I1:$Pu, s32ImmPred:$s8, s8ImmPred:$S8)))] > { bits<5> Rd; bits<2> Pu; bits<8> s8; @@ -679,14 +719,20 @@ def C2_muxii: ALU32Inst <(outs IntRegs:$Rd), let Inst{4-0} = Rd; } +let isCodeGenOnly = 1, isPseudo = 1 in +def MUX64_rr : ALU64_rr<(outs DoubleRegs:$Rd), + (ins PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt), + ".error \"should not emit\" ", []>; + + //===----------------------------------------------------------------------===// // template class for non-predicated alu32_2op instructions // - aslh, asrh, sxtb, sxth, zxth //===----------------------------------------------------------------------===// let hasNewValue = 1, opNewValue = 0 in class T_ALU32_2op <string mnemonic, bits<3> minOp> : - ALU32Inst < (outs IntRegs:$Rd), (ins IntRegs:$Rs), - "$Rd = "#mnemonic#"($Rs)", [] > { + ALU32Inst <(outs IntRegs:$Rd), (ins IntRegs:$Rs), + "$Rd = "#mnemonic#"($Rs)", [] > { bits<5> Rd; bits<5> Rs; @@ -703,13 +749,12 @@ class T_ALU32_2op <string mnemonic, bits<3> minOp> : // template class for predicated alu32_2op instructions // - aslh, asrh, sxtb, sxth, zxtb, zxth //===----------------------------------------------------------------------===// -let hasSideEffects = 0, validSubTargets = HasV4SubT, - hasNewValue = 1, opNewValue = 0 in -class T_ALU32_2op_Pred <string mnemonic, bits<3> minOp, bit isPredNot, - bit isPredNew > : - ALU32Inst <(outs IntRegs:$Rd), (ins PredRegs:$Pu, IntRegs:$Rs), - !if(isPredNot, "if (!$Pu", "if ($Pu") - #!if(isPredNew, ".new) ",") ")#"$Rd = "#mnemonic#"($Rs)"> { +let hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +class T_ALU32_2op_Pred <string mnemonic, bits<3> minOp, bit isPredNot, + bit isPredNew > : + ALU32Inst <(outs IntRegs:$Rd), (ins PredRegs:$Pu, IntRegs:$Rs), + !if(isPredNot, "if (!$Pu", "if ($Pu") + #!if(isPredNew, ".new) ",") ")#"$Rd = "#mnemonic#"($Rs)"> { bits<5> Rd; bits<2> Pu; bits<5> Rs; @@ -741,20 +786,18 @@ multiclass ALU32_2op_base<string mnemonic, bits<3> minOp> { let isPredicable = 1, hasSideEffects = 0 in def A2_#NAME : T_ALU32_2op<mnemonic, minOp>; - let validSubTargets = HasV4SubT, isPredicated = 1, hasSideEffects = 0 in { + let isPredicated = 1, hasSideEffects = 0 in { defm A4_p#NAME#t : ALU32_2op_Pred<mnemonic, minOp, 0>; defm A4_p#NAME#f : ALU32_2op_Pred<mnemonic, minOp, 1>; } } } -let isCodeGenOnly = 0 in { defm aslh : ALU32_2op_base<"aslh", 0b000>, PredNewRel; defm asrh : ALU32_2op_base<"asrh", 0b001>, PredNewRel; defm sxtb : ALU32_2op_base<"sxtb", 0b101>, PredNewRel; defm sxth : ALU32_2op_base<"sxth", 0b111>, PredNewRel; defm zxth : ALU32_2op_base<"zxth", 0b110>, PredNewRel; -} // Rd=zxtb(Rs): assembler mapped to Rd=and(Rs,#255). // Compiler would want to generate 'zxtb' instead of 'and' becuase 'zxtb' has @@ -784,14 +827,13 @@ multiclass ZXTB_base <string mnemonic, bits<3> minOp> { let isPredicable = 1, hasSideEffects = 0 in def A2_#NAME : T_ZXTB; - let validSubTargets = HasV4SubT, isPredicated = 1, hasSideEffects = 0 in { + let isPredicated = 1, hasSideEffects = 0 in { defm A4_p#NAME#t : ALU32_2op_Pred<mnemonic, minOp, 0>; defm A4_p#NAME#f : ALU32_2op_Pred<mnemonic, minOp, 1>; } } } -let isCodeGenOnly=0 in defm zxtb : ZXTB_base<"zxtb",0b100>, PredNewRel; def: Pat<(shl I32:$src1, (i32 16)), (A2_aslh I32:$src1)>; @@ -799,44 +841,182 @@ def: Pat<(sra I32:$src1, (i32 16)), (A2_asrh I32:$src1)>; def: Pat<(sext_inreg I32:$src1, i8), (A2_sxtb I32:$src1)>; def: Pat<(sext_inreg I32:$src1, i16), (A2_sxth I32:$src1)>; -// Mux. -def VMUX_prr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1, - DoubleRegs:$src2, - DoubleRegs:$src3), - "$dst = vmux($src1, $src2, $src3)", - []>; +//===----------------------------------------------------------------------===// +// Template class for vector add and avg +//===----------------------------------------------------------------------===// +class T_VectALU_64 <string opc, bits<3> majOp, bits<3> minOp, + bit isSat, bit isRnd, bit isCrnd, bit SwapOps > + : ALU64_rr < (outs DoubleRegs:$Rdd), + (ins DoubleRegs:$Rss, DoubleRegs:$Rtt), + "$Rdd = "#opc#"($Rss, $Rtt)"#!if(isRnd, ":rnd", "") + #!if(isCrnd,":crnd","") + #!if(isSat, ":sat", ""), + [], "", ALU64_tc_2_SLOT23 > { + bits<5> Rdd; + bits<5> Rss; + bits<5> Rtt; + + let IClass = 0b1101; + + let Inst{27-24} = 0b0011; + let Inst{23-21} = majOp; + let Inst{20-16} = !if (SwapOps, Rtt, Rss); + let Inst{12-8} = !if (SwapOps, Rss, Rtt); + let Inst{7-5} = minOp; + let Inst{4-0} = Rdd; + } + +// ALU64 - Vector add +// Rdd=vadd[u][bhw](Rss,Rtt) +let Itinerary = ALU64_tc_1_SLOT23 in { + def A2_vaddub : T_VectALU_64 < "vaddub", 0b000, 0b000, 0, 0, 0, 0>; + def A2_vaddh : T_VectALU_64 < "vaddh", 0b000, 0b010, 0, 0, 0, 0>; + def A2_vaddw : T_VectALU_64 < "vaddw", 0b000, 0b101, 0, 0, 0, 0>; +} + +// Rdd=vadd[u][bhw](Rss,Rtt):sat +let Defs = [USR_OVF] in { + def A2_vaddubs : T_VectALU_64 < "vaddub", 0b000, 0b001, 1, 0, 0, 0>; + def A2_vaddhs : T_VectALU_64 < "vaddh", 0b000, 0b011, 1, 0, 0, 0>; + def A2_vadduhs : T_VectALU_64 < "vadduh", 0b000, 0b100, 1, 0, 0, 0>; + def A2_vaddws : T_VectALU_64 < "vaddw", 0b000, 0b110, 1, 0, 0, 0>; +} + +// ALU64 - Vector average +// Rdd=vavg[u][bhw](Rss,Rtt) +let Itinerary = ALU64_tc_1_SLOT23 in { + def A2_vavgub : T_VectALU_64 < "vavgub", 0b010, 0b000, 0, 0, 0, 0>; + def A2_vavgh : T_VectALU_64 < "vavgh", 0b010, 0b010, 0, 0, 0, 0>; + def A2_vavguh : T_VectALU_64 < "vavguh", 0b010, 0b101, 0, 0, 0, 0>; + def A2_vavgw : T_VectALU_64 < "vavgw", 0b011, 0b000, 0, 0, 0, 0>; + def A2_vavguw : T_VectALU_64 < "vavguw", 0b011, 0b011, 0, 0, 0, 0>; +} + +// Rdd=vavg[u][bhw](Rss,Rtt)[:rnd|:crnd] +def A2_vavgubr : T_VectALU_64 < "vavgub", 0b010, 0b001, 0, 1, 0, 0>; +def A2_vavghr : T_VectALU_64 < "vavgh", 0b010, 0b011, 0, 1, 0, 0>; +def A2_vavghcr : T_VectALU_64 < "vavgh", 0b010, 0b100, 0, 0, 1, 0>; +def A2_vavguhr : T_VectALU_64 < "vavguh", 0b010, 0b110, 0, 1, 0, 0>; + +def A2_vavgwr : T_VectALU_64 < "vavgw", 0b011, 0b001, 0, 1, 0, 0>; +def A2_vavgwcr : T_VectALU_64 < "vavgw", 0b011, 0b010, 0, 0, 1, 0>; +def A2_vavguwr : T_VectALU_64 < "vavguw", 0b011, 0b100, 0, 1, 0, 0>; + +// Rdd=vnavg[bh](Rss,Rtt) +let Itinerary = ALU64_tc_1_SLOT23 in { + def A2_vnavgh : T_VectALU_64 < "vnavgh", 0b100, 0b000, 0, 0, 0, 1>; + def A2_vnavgw : T_VectALU_64 < "vnavgw", 0b100, 0b011, 0, 0, 0, 1>; +} + +// Rdd=vnavg[bh](Rss,Rtt)[:rnd|:crnd]:sat +let Defs = [USR_OVF] in { + def A2_vnavghr : T_VectALU_64 < "vnavgh", 0b100, 0b001, 1, 1, 0, 1>; + def A2_vnavghcr : T_VectALU_64 < "vnavgh", 0b100, 0b010, 1, 0, 1, 1>; + def A2_vnavgwr : T_VectALU_64 < "vnavgw", 0b100, 0b100, 1, 1, 0, 1>; + def A2_vnavgwcr : T_VectALU_64 < "vnavgw", 0b100, 0b110, 1, 0, 1, 1>; +} + +// Rdd=vsub[u][bh](Rss,Rtt) +let Itinerary = ALU64_tc_1_SLOT23 in { + def A2_vsubub : T_VectALU_64 < "vsubub", 0b001, 0b000, 0, 0, 0, 1>; + def A2_vsubh : T_VectALU_64 < "vsubh", 0b001, 0b010, 0, 0, 0, 1>; + def A2_vsubw : T_VectALU_64 < "vsubw", 0b001, 0b101, 0, 0, 0, 1>; +} + +// Rdd=vsub[u][bh](Rss,Rtt):sat +let Defs = [USR_OVF] in { + def A2_vsububs : T_VectALU_64 < "vsubub", 0b001, 0b001, 1, 0, 0, 1>; + def A2_vsubhs : T_VectALU_64 < "vsubh", 0b001, 0b011, 1, 0, 0, 1>; + def A2_vsubuhs : T_VectALU_64 < "vsubuh", 0b001, 0b100, 1, 0, 0, 1>; + def A2_vsubws : T_VectALU_64 < "vsubw", 0b001, 0b110, 1, 0, 0, 1>; +} + +// Rdd=vmax[u][bhw](Rss,Rtt) +def A2_vmaxb : T_VectALU_64 < "vmaxb", 0b110, 0b110, 0, 0, 0, 1>; +def A2_vmaxub : T_VectALU_64 < "vmaxub", 0b110, 0b000, 0, 0, 0, 1>; +def A2_vmaxh : T_VectALU_64 < "vmaxh", 0b110, 0b001, 0, 0, 0, 1>; +def A2_vmaxuh : T_VectALU_64 < "vmaxuh", 0b110, 0b010, 0, 0, 0, 1>; +def A2_vmaxw : T_VectALU_64 < "vmaxw", 0b110, 0b011, 0, 0, 0, 1>; +def A2_vmaxuw : T_VectALU_64 < "vmaxuw", 0b101, 0b101, 0, 0, 0, 1>; + +// Rdd=vmin[u][bhw](Rss,Rtt) +def A2_vminb : T_VectALU_64 < "vminb", 0b110, 0b111, 0, 0, 0, 1>; +def A2_vminub : T_VectALU_64 < "vminub", 0b101, 0b000, 0, 0, 0, 1>; +def A2_vminh : T_VectALU_64 < "vminh", 0b101, 0b001, 0, 0, 0, 1>; +def A2_vminuh : T_VectALU_64 < "vminuh", 0b101, 0b010, 0, 0, 0, 1>; +def A2_vminw : T_VectALU_64 < "vminw", 0b101, 0b011, 0, 0, 0, 1>; +def A2_vminuw : T_VectALU_64 < "vminuw", 0b101, 0b100, 0, 0, 0, 1>; //===----------------------------------------------------------------------===// -// ALU32/PERM - +// Template class for vector compare //===----------------------------------------------------------------------===// +let hasSideEffects = 0 in +class T_vcmp <string Str, bits<4> minOp> + : ALU64_rr <(outs PredRegs:$Pd), + (ins DoubleRegs:$Rss, DoubleRegs:$Rtt), + "$Pd = "#Str#"($Rss, $Rtt)", [], + "", ALU64_tc_2early_SLOT23> { + bits<2> Pd; + bits<5> Rss; + bits<5> Rtt; + + let IClass = 0b1101; + + let Inst{27-23} = 0b00100; + let Inst{13} = minOp{3}; + let Inst{7-5} = minOp{2-0}; + let Inst{1-0} = Pd; + let Inst{20-16} = Rss; + let Inst{12-8} = Rtt; + } + +class T_vcmp_pat<InstHexagon MI, PatFrag Op, ValueType T> + : Pat<(i1 (Op (T DoubleRegs:$Rss), (T DoubleRegs:$Rtt))), + (i1 (MI DoubleRegs:$Rss, DoubleRegs:$Rtt))>; + +// Vector compare bytes +def A2_vcmpbeq : T_vcmp <"vcmpb.eq", 0b0110>; +def A2_vcmpbgtu : T_vcmp <"vcmpb.gtu", 0b0111>; + +// Vector compare halfwords +def A2_vcmpheq : T_vcmp <"vcmph.eq", 0b0011>; +def A2_vcmphgt : T_vcmp <"vcmph.gt", 0b0100>; +def A2_vcmphgtu : T_vcmp <"vcmph.gtu", 0b0101>; + +// Vector compare words +def A2_vcmpweq : T_vcmp <"vcmpw.eq", 0b0000>; +def A2_vcmpwgt : T_vcmp <"vcmpw.gt", 0b0001>; +def A2_vcmpwgtu : T_vcmp <"vcmpw.gtu", 0b0010>; +def: T_vcmp_pat<A2_vcmpbeq, seteq, v8i8>; +def: T_vcmp_pat<A2_vcmpbgtu, setugt, v8i8>; +def: T_vcmp_pat<A2_vcmpheq, seteq, v4i16>; +def: T_vcmp_pat<A2_vcmphgt, setgt, v4i16>; +def: T_vcmp_pat<A2_vcmphgtu, setugt, v4i16>; +def: T_vcmp_pat<A2_vcmpweq, seteq, v2i32>; +def: T_vcmp_pat<A2_vcmpwgt, setgt, v2i32>; +def: T_vcmp_pat<A2_vcmpwgtu, setugt, v2i32>; //===----------------------------------------------------------------------===// -// ALU32/PRED + +// ALU32/PERM - //===----------------------------------------------------------------------===// -// SDNode for converting immediate C to C-1. -def DEC_CONST_SIGNED : SDNodeXForm<imm, [{ - // Return the byte immediate const-1 as an SDNode. - int32_t imm = N->getSExtValue(); - return XformSToSM1Imm(imm); -}]>; - -// SDNode for converting immediate C to C-1. -def DEC_CONST_UNSIGNED : SDNodeXForm<imm, [{ - // Return the byte immediate const-1 as an SDNode. - uint32_t imm = N->getZExtValue(); - return XformUToUM1Imm(imm); -}]>; -def CTLZ64_rr : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1), - "$dst = cl0($src1)", - [(set (i32 IntRegs:$dst), (i32 (trunc (ctlz (i64 DoubleRegs:$src1)))))]>; +//===----------------------------------------------------------------------===// +// ALU32/PRED + +//===----------------------------------------------------------------------===// +// No bits needed. If cmp.ge is found the assembler parser will +// transform it to cmp.gt subtracting 1 from the immediate. +let isPseudo = 1 in { +def C2_cmpgei: ALU32Inst < + (outs PredRegs:$Pd), (ins IntRegs:$Rs, s8Ext:$s8), + "$Pd = cmp.ge($Rs, #$s8)">; +def C2_cmpgeui: ALU32Inst < + (outs PredRegs:$Pd), (ins IntRegs:$Rs, u8Ext:$s8), + "$Pd = cmp.geu($Rs, #$s8)">; +} -def CTTZ64_rr : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1), - "$dst = ct0($src1)", - [(set (i32 IntRegs:$dst), (i32 (trunc (cttz (i64 DoubleRegs:$src1)))))]>; //===----------------------------------------------------------------------===// // ALU32/PRED - @@ -845,7 +1025,8 @@ def CTTZ64_rr : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1), //===----------------------------------------------------------------------===// // ALU64/ALU + -//===----------------------------------------------------------------------===//// Add. +//===----------------------------------------------------------------------===// +// Add. //===----------------------------------------------------------------------===// // Template Class // Add/Subtract halfword @@ -879,18 +1060,14 @@ class T_XTYPE_ADD_SUB <bits<2> LHbits, bit isSat, bit hasShift, bit isSub> } //Rd=sub(Rt.L,Rs.[LH]) -let isCodeGenOnly = 0 in { def A2_subh_l16_ll : T_XTYPE_ADD_SUB <0b00, 0, 0, 1>; def A2_subh_l16_hl : T_XTYPE_ADD_SUB <0b10, 0, 0, 1>; -} -let isCodeGenOnly = 0 in { //Rd=add(Rt.L,Rs.[LH]) def A2_addh_l16_ll : T_XTYPE_ADD_SUB <0b00, 0, 0, 0>; def A2_addh_l16_hl : T_XTYPE_ADD_SUB <0b10, 0, 0, 0>; -} -let Itinerary = ALU64_tc_2_SLOT23, Defs = [USR_OVF], isCodeGenOnly = 0 in { +let Itinerary = ALU64_tc_2_SLOT23, Defs = [USR_OVF] in { //Rd=sub(Rt.L,Rs.[LH]):sat def A2_subh_l16_sat_ll : T_XTYPE_ADD_SUB <0b00, 1, 0, 1>; def A2_subh_l16_sat_hl : T_XTYPE_ADD_SUB <0b10, 1, 0, 1>; @@ -901,22 +1078,18 @@ let Itinerary = ALU64_tc_2_SLOT23, Defs = [USR_OVF], isCodeGenOnly = 0 in { } //Rd=sub(Rt.[LH],Rs.[LH]):<<16 -let isCodeGenOnly = 0 in { def A2_subh_h16_ll : T_XTYPE_ADD_SUB <0b00, 0, 1, 1>; def A2_subh_h16_lh : T_XTYPE_ADD_SUB <0b01, 0, 1, 1>; def A2_subh_h16_hl : T_XTYPE_ADD_SUB <0b10, 0, 1, 1>; def A2_subh_h16_hh : T_XTYPE_ADD_SUB <0b11, 0, 1, 1>; -} //Rd=add(Rt.[LH],Rs.[LH]):<<16 -let isCodeGenOnly = 0 in { def A2_addh_h16_ll : T_XTYPE_ADD_SUB <0b00, 0, 1, 0>; def A2_addh_h16_lh : T_XTYPE_ADD_SUB <0b01, 0, 1, 0>; def A2_addh_h16_hl : T_XTYPE_ADD_SUB <0b10, 0, 1, 0>; def A2_addh_h16_hh : T_XTYPE_ADD_SUB <0b11, 0, 1, 0>; -} -let Itinerary = ALU64_tc_2_SLOT23, Defs = [USR_OVF], isCodeGenOnly = 0 in { +let Itinerary = ALU64_tc_2_SLOT23, Defs = [USR_OVF] in { //Rd=sub(Rt.[LH],Rs.[LH]):sat:<<16 def A2_subh_h16_sat_ll : T_XTYPE_ADD_SUB <0b00, 1, 1, 1>; def A2_subh_h16_sat_lh : T_XTYPE_ADD_SUB <0b01, 1, 1, 1>; @@ -947,7 +1120,7 @@ def: Pat<(sext_inreg (sub I32:$src1, I32:$src2), i16), def: Pat<(shl (sub I32:$src1, I32:$src2), (i32 16)), (A2_subh_h16_ll I32:$src1, I32:$src2)>; -let hasSideEffects = 0, hasNewValue = 1, isCodeGenOnly = 0 in +let hasSideEffects = 0, hasNewValue = 1 in def S2_parityp: ALU64Inst<(outs IntRegs:$Rd), (ins DoubleRegs:$Rs, DoubleRegs:$Rt), "$Rd = parity($Rs, $Rt)", [], "", ALU64_tc_2_SLOT23> { @@ -981,12 +1154,10 @@ class T_XTYPE_MIN_MAX < bit isMax, bit isUnsigned > let Inst{20-16} = !if(isMax, Rt, Rs); } -let isCodeGenOnly = 0 in { def A2_min : T_XTYPE_MIN_MAX < 0, 0 >; def A2_minu : T_XTYPE_MIN_MAX < 0, 1 >; def A2_max : T_XTYPE_MIN_MAX < 1, 0 >; def A2_maxu : T_XTYPE_MIN_MAX < 1, 1 >; -} // Here, depending on the operand being selected, we'll either generate a // min or max instruction. @@ -1053,11 +1224,9 @@ class T_cmp64_rr<string mnemonic, bits<3> MinOp, bit IsComm> let Inst{1-0} = Pd; } -let isCodeGenOnly = 0 in { def C2_cmpeqp : T_cmp64_rr<"cmp.eq", 0b000, 1>; def C2_cmpgtp : T_cmp64_rr<"cmp.gt", 0b010, 0>; def C2_cmpgtup : T_cmp64_rr<"cmp.gtu", 0b100, 0>; -} class T_cmp64_rr_pat<InstHexagon MI, PatFrag CmpOp> : Pat<(i1 (CmpOp (i64 DoubleRegs:$Rs), (i64 DoubleRegs:$Rt))), @@ -1069,6 +1238,24 @@ def: T_cmp64_rr_pat<C2_cmpgtup, setugt>; def: T_cmp64_rr_pat<C2_cmpgtp, RevCmp<setlt>>; def: T_cmp64_rr_pat<C2_cmpgtup, RevCmp<setult>>; +def C2_vmux : ALU64_rr<(outs DoubleRegs:$Rd), + (ins PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt), + "$Rd = vmux($Pu, $Rs, $Rt)", [], "", ALU64_tc_1_SLOT23> { + let hasSideEffects = 0; + + bits<5> Rd; + bits<2> Pu; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1101; + let Inst{27-24} = 0b0001; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + let Inst{6-5} = Pu; + let Inst{4-0} = Rd; +} + class T_ALU64_rr<string mnemonic, string suffix, bits<4> RegType, bits<3> MajOp, bits<3> MinOp, bit OpsRev, bit IsComm, string Op2Pfx> @@ -1096,10 +1283,8 @@ class T_ALU64_arith<string mnemonic, bits<3> MajOp, bits<3> MinOp, bit IsSat, : T_ALU64_rr<mnemonic, !if(IsSat,":sat",""), 0b0011, MajOp, MinOp, OpsRev, IsComm, "">; -let isCodeGenOnly = 0 in { def A2_addp : T_ALU64_arith<"add", 0b000, 0b111, 0, 0, 1>; def A2_subp : T_ALU64_arith<"sub", 0b001, 0b111, 0, 1, 0>; -} def: Pat<(i64 (add I64:$Rs, I64:$Rt)), (A2_addp I64:$Rs, I64:$Rt)>; def: Pat<(i64 (sub I64:$Rs, I64:$Rt)), (A2_subp I64:$Rs, I64:$Rt)>; @@ -1109,11 +1294,9 @@ class T_ALU64_logical<string mnemonic, bits<3> MinOp, bit OpsRev, bit IsComm, : T_ALU64_rr<mnemonic, "", 0b0011, 0b111, MinOp, OpsRev, IsComm, !if(IsNeg,"~","")>; -let isCodeGenOnly = 0 in { def A2_andp : T_ALU64_logical<"and", 0b000, 0, 1, 0>; def A2_orp : T_ALU64_logical<"or", 0b010, 0, 1, 0>; def A2_xorp : T_ALU64_logical<"xor", 0b100, 0, 1, 0>; -} def: Pat<(i64 (and I64:$Rs, I64:$Rt)), (A2_andp I64:$Rs, I64:$Rt)>; def: Pat<(i64 (or I64:$Rs, I64:$Rt)), (A2_orp I64:$Rs, I64:$Rt)>; @@ -1165,11 +1348,9 @@ class T_LOGICAL_1OP<string MnOp, bits<2> OpBits> let Inst{1-0} = Pd; } -let isCodeGenOnly = 0 in { def C2_any8 : T_LOGICAL_1OP<"any8", 0b00>; def C2_all8 : T_LOGICAL_1OP<"all8", 0b01>; def C2_not : T_LOGICAL_1OP<"not", 0b10>; -} def: Pat<(i1 (not (i1 PredRegs:$Ps))), (C2_not PredRegs:$Ps)>; @@ -1193,13 +1374,11 @@ class T_LOGICAL_2OP<string MnOp, bits<3> OpBits, bit IsNeg, bit Rev> let Inst{1-0} = Pd; } -let isCodeGenOnly = 0 in { def C2_and : T_LOGICAL_2OP<"and", 0b000, 0, 1>; def C2_or : T_LOGICAL_2OP<"or", 0b001, 0, 1>; def C2_xor : T_LOGICAL_2OP<"xor", 0b010, 0, 0>; def C2_andn : T_LOGICAL_2OP<"and", 0b011, 1, 1>; def C2_orn : T_LOGICAL_2OP<"or", 0b111, 1, 1>; -} def: Pat<(i1 (and I1:$Ps, I1:$Pt)), (C2_and I1:$Ps, I1:$Pt)>; def: Pat<(i1 (or I1:$Ps, I1:$Pt)), (C2_or I1:$Ps, I1:$Pt)>; @@ -1207,7 +1386,7 @@ def: Pat<(i1 (xor I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, I1:$Pt)>; def: Pat<(i1 (and I1:$Ps, (not I1:$Pt))), (C2_andn I1:$Ps, I1:$Pt)>; def: Pat<(i1 (or I1:$Ps, (not I1:$Pt))), (C2_orn I1:$Ps, I1:$Pt)>; -let hasSideEffects = 0, hasNewValue = 1, isCodeGenOnly = 0 in +let hasSideEffects = 0, hasNewValue = 1 in def C2_vitpack : SInst<(outs IntRegs:$Rd), (ins PredRegs:$Ps, PredRegs:$Pt), "$Rd = vitpack($Ps, $Pt)", [], "", S_2op_tc_1_SLOT23> { bits<5> Rd; @@ -1222,7 +1401,7 @@ def C2_vitpack : SInst<(outs IntRegs:$Rd), (ins PredRegs:$Ps, PredRegs:$Pt), let Inst{4-0} = Rd; } -let hasSideEffects = 0, isCodeGenOnly = 0 in +let hasSideEffects = 0 in def C2_mask : SInst<(outs DoubleRegs:$Rd), (ins PredRegs:$Pt), "$Rd = mask($Pt)", [], "", S_2op_tc_1_SLOT23> { bits<5> Rd; @@ -1234,18 +1413,6 @@ def C2_mask : SInst<(outs DoubleRegs:$Rd), (ins PredRegs:$Pt), let Inst{4-0} = Rd; } -def VALIGN_rrp : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, - DoubleRegs:$src2, - PredRegs:$src3), - "$dst = valignb($src1, $src2, $src3)", - []>; - -def VSPLICE_rrp : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, - DoubleRegs:$src2, - PredRegs:$src3), - "$dst = vspliceb($src1, $src2, $src3)", - []>; - // User control register transfer. //===----------------------------------------------------------------------===// // CR - @@ -1256,7 +1423,7 @@ def VSPLICE_rrp : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, //===----------------------------------------------------------------------===// def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone, [SDNPHasChain]>; def SDHexagonBR_JT: SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; @@ -1266,7 +1433,7 @@ class CondStr<string CReg, bit True, bit New> { string S = "if (" # !if(True,"","!") # CReg # !if(New,".new","") # ") "; } class JumpOpcStr<string Mnemonic, bit New, bit Taken> { - string S = Mnemonic # !if(New, !if(Taken,":t",":nt"), ""); + string S = Mnemonic # !if(Taken, ":t", !if(New, ":nt", "")); } let isBranch = 1, isBarrier = 1, Defs = [PC], hasSideEffects = 0, @@ -1304,7 +1471,7 @@ class T_JMP_c<bit PredNot, bit isPredNew, bit isTak, string ExtStr> let Inst{27-24} = 0b1100; let Inst{21} = PredNot; - let Inst{12} = !if(isPredNew, isTak, zero); + let Inst{12} = isTak; let Inst{11} = isPredNew; let Inst{9-8} = src; let Inst{23-22} = dst{16-15}; @@ -1314,7 +1481,7 @@ class T_JMP_c<bit PredNot, bit isPredNew, bit isTak, string ExtStr> } multiclass JMP_Pred<bit PredNot, string ExtStr> { - def NAME : T_JMP_c<PredNot, 0, 0, ExtStr>; + def NAME : T_JMP_c<PredNot, 0, 0, ExtStr>; // not taken // Predicate new def NAME#newpt : T_JMP_c<PredNot, 1, 1, ExtStr>; // taken def NAME#new : T_JMP_c<PredNot, 1, 0, ExtStr>; // not taken @@ -1361,13 +1528,13 @@ class T_JMPr_c <bit PredNot, bit isPredNew, bit isTak> let Inst{27-22} = 0b001101; let Inst{21} = PredNot; let Inst{20-16} = dst; - let Inst{12} = !if(isPredNew, isTak, zero); + let Inst{12} = isTak; let Inst{11} = isPredNew; let Inst{9-8} = src; } multiclass JMPR_Pred<bit PredNot> { - def NAME: T_JMPr_c<PredNot, 0, 0>; + def NAME : T_JMPr_c<PredNot, 0, 0>; // not taken // Predicate new def NAME#newpt : T_JMPr_c<PredNot, 1, 1>; // taken def NAME#new : T_JMPr_c<PredNot, 1, 0>; // not taken @@ -1404,12 +1571,12 @@ class JUMPR_MISC_CALLR<bit isPred, bit isPredNot, } -let Defs = VolatileV3.Regs, isCodeGenOnly = 0 in { +let Defs = VolatileV3.Regs in { def J2_callrt : JUMPR_MISC_CALLR<1, 0, (ins PredRegs:$Pu, IntRegs:$Rs)>; def J2_callrf : JUMPR_MISC_CALLR<1, 1, (ins PredRegs:$Pu, IntRegs:$Rs)>; } -let isTerminator = 1, hasSideEffects = 0, isCodeGenOnly = 0 in { +let isTerminator = 1, hasSideEffects = 0 in { defm J2_jump : JMP_base<"JMP", "">, PredNewRel; // Deal with explicit assembly @@ -1451,6 +1618,8 @@ def: Pat<(brind (i32 IntRegs:$dst)), //===----------------------------------------------------------------------===// // LD + //===----------------------------------------------------------------------===// + +// Load - Base with Immediate offset addressing mode let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, AddedComplexity = 20 in class T_load_io <string mnemonic, RegisterClass RC, bits<4> MajOp, Operand ImmOp> @@ -1471,7 +1640,7 @@ class T_load_io <string mnemonic, RegisterClass RC, bits<4> MajOp, !if (!eq(ImmOpStr, "s11_2Ext"), 13, !if (!eq(ImmOpStr, "s11_1Ext"), 12, /* s11_0Ext */ 11))); - let hasNewValue = !if (!eq(ImmOpStr, "s11_3Ext"), 0, 1); + let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1); let IClass = 0b1001; @@ -1542,60 +1711,100 @@ multiclass LD_Idxd<string mnemonic, string CextOp, RegisterClass RC, } } -let accessSize = ByteAccess, isCodeGenOnly = 0 in { +let accessSize = ByteAccess in { defm loadrb: LD_Idxd <"memb", "LDrib", IntRegs, s11_0Ext, u6_0Ext, 0b1000>; defm loadrub: LD_Idxd <"memub", "LDriub", IntRegs, s11_0Ext, u6_0Ext, 0b1001>; } -let accessSize = HalfWordAccess, opExtentAlign = 1, isCodeGenOnly = 0 in { +let accessSize = HalfWordAccess, opExtentAlign = 1 in { defm loadrh: LD_Idxd <"memh", "LDrih", IntRegs, s11_1Ext, u6_1Ext, 0b1010>; defm loadruh: LD_Idxd <"memuh", "LDriuh", IntRegs, s11_1Ext, u6_1Ext, 0b1011>; } -let accessSize = WordAccess, opExtentAlign = 2, isCodeGenOnly = 0 in +let accessSize = WordAccess, opExtentAlign = 2 in defm loadri: LD_Idxd <"memw", "LDriw", IntRegs, s11_2Ext, u6_2Ext, 0b1100>; -let accessSize = DoubleWordAccess, opExtentAlign = 3, isCodeGenOnly = 0 in +let accessSize = DoubleWordAccess, opExtentAlign = 3 in defm loadrd: LD_Idxd <"memd", "LDrid", DoubleRegs, s11_3Ext, u6_3Ext, 0b1110>; -def : Pat < (i32 (sextloadi8 ADDRriS11_0:$addr)), - (L2_loadrb_io AddrFI:$addr, 0) >; - -def : Pat < (i32 (zextloadi8 ADDRriS11_0:$addr)), - (L2_loadrub_io AddrFI:$addr, 0) >; - -def : Pat < (i32 (sextloadi16 ADDRriS11_1:$addr)), - (L2_loadrh_io AddrFI:$addr, 0) >; - -def : Pat < (i32 (zextloadi16 ADDRriS11_1:$addr)), - (L2_loadruh_io AddrFI:$addr, 0) >; - -def : Pat < (i32 (load ADDRriS11_2:$addr)), - (L2_loadri_io AddrFI:$addr, 0) >; +let accessSize = HalfWordAccess, opExtentAlign = 1 in { + def L2_loadbsw2_io: T_load_io<"membh", IntRegs, 0b0001, s11_1Ext>; + def L2_loadbzw2_io: T_load_io<"memubh", IntRegs, 0b0011, s11_1Ext>; +} -def : Pat < (i64 (load ADDRriS11_3:$addr)), - (L2_loadrd_io AddrFI:$addr, 0) >; +let accessSize = WordAccess, opExtentAlign = 2 in { + def L2_loadbzw4_io: T_load_io<"memubh", DoubleRegs, 0b0101, s11_2Ext>; + def L2_loadbsw4_io: T_load_io<"membh", DoubleRegs, 0b0111, s11_2Ext>; +} -let AddedComplexity = 20 in { -def : Pat < (i32 (sextloadi8 (add IntRegs:$src1, s11_0ExtPred:$offset))), - (L2_loadrb_io IntRegs:$src1, s11_0ExtPred:$offset) >; +let addrMode = BaseImmOffset, isExtendable = 1, hasSideEffects = 0, + opExtendable = 3, isExtentSigned = 1 in +class T_loadalign_io <string str, bits<4> MajOp, Operand ImmOp> + : LDInst<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, IntRegs:$src2, ImmOp:$offset), + "$dst = "#str#"($src2 + #$offset)", [], + "$src1 = $dst">, AddrModeRel { + bits<4> name; + bits<5> dst; + bits<5> src2; + bits<12> offset; + bits<11> offsetBits; -def : Pat < (i32 (zextloadi8 (add IntRegs:$src1, s11_0ExtPred:$offset))), - (L2_loadrub_io IntRegs:$src1, s11_0ExtPred:$offset) >; + let offsetBits = !if (!eq(!cast<string>(ImmOp), "s11_1Ext"), offset{11-1}, + /* s11_0Ext */ offset{10-0}); + let IClass = 0b1001; -def : Pat < (i32 (sextloadi16 (add IntRegs:$src1, s11_1ExtPred:$offset))), - (L2_loadrh_io IntRegs:$src1, s11_1ExtPred:$offset) >; + let Inst{27} = 0b0; + let Inst{26-25} = offsetBits{10-9}; + let Inst{24-21} = MajOp; + let Inst{20-16} = src2; + let Inst{13-5} = offsetBits{8-0}; + let Inst{4-0} = dst; + } -def : Pat < (i32 (zextloadi16 (add IntRegs:$src1, s11_1ExtPred:$offset))), - (L2_loadruh_io IntRegs:$src1, s11_1ExtPred:$offset) >; +let accessSize = HalfWordAccess, opExtentBits = 12, opExtentAlign = 1 in +def L2_loadalignh_io: T_loadalign_io <"memh_fifo", 0b0010, s11_1Ext>; -def : Pat < (i32 (load (add IntRegs:$src1, s11_2ExtPred:$offset))), - (L2_loadri_io IntRegs:$src1, s11_2ExtPred:$offset) >; +let accessSize = ByteAccess, opExtentBits = 11 in +def L2_loadalignb_io: T_loadalign_io <"memb_fifo", 0b0100, s11_0Ext>; -def : Pat < (i64 (load (add IntRegs:$src1, s11_3ExtPred:$offset))), - (L2_loadrd_io IntRegs:$src1, s11_3ExtPred:$offset) >; +// Patterns to select load-indexed (i.e. load from base+offset). +multiclass Loadx_pat<PatFrag Load, ValueType VT, PatLeaf ImmPred, + InstHexagon MI> { + def: Pat<(VT (Load AddrFI:$fi)), (VT (MI AddrFI:$fi, 0))>; + def: Pat<(VT (Load (add (i32 AddrFI:$fi), ImmPred:$Off))), + (VT (MI AddrFI:$fi, imm:$Off))>; + def: Pat<(VT (Load (add (i32 IntRegs:$Rs), ImmPred:$Off))), + (VT (MI IntRegs:$Rs, imm:$Off))>; + def: Pat<(VT (Load (i32 IntRegs:$Rs))), (VT (MI IntRegs:$Rs, 0))>; } +let AddedComplexity = 20 in { + defm: Loadx_pat<load, i32, s30_2ImmPred, L2_loadri_io>; + defm: Loadx_pat<load, i64, s29_3ImmPred, L2_loadrd_io>; + defm: Loadx_pat<atomic_load_8 , i32, s32_0ImmPred, L2_loadrub_io>; + defm: Loadx_pat<atomic_load_16, i32, s31_1ImmPred, L2_loadruh_io>; + defm: Loadx_pat<atomic_load_32, i32, s30_2ImmPred, L2_loadri_io>; + defm: Loadx_pat<atomic_load_64, i64, s29_3ImmPred, L2_loadrd_io>; + + defm: Loadx_pat<extloadi1, i32, s32_0ImmPred, L2_loadrub_io>; + defm: Loadx_pat<extloadi8, i32, s32_0ImmPred, L2_loadrub_io>; + defm: Loadx_pat<extloadi16, i32, s31_1ImmPred, L2_loadruh_io>; + defm: Loadx_pat<sextloadi8, i32, s32_0ImmPred, L2_loadrb_io>; + defm: Loadx_pat<sextloadi16, i32, s31_1ImmPred, L2_loadrh_io>; + defm: Loadx_pat<zextloadi1, i32, s32_0ImmPred, L2_loadrub_io>; + defm: Loadx_pat<zextloadi8, i32, s32_0ImmPred, L2_loadrub_io>; + defm: Loadx_pat<zextloadi16, i32, s31_1ImmPred, L2_loadruh_io>; + // No sextloadi1. +} + +// Sign-extending loads of i1 need to replicate the lowest bit throughout +// the 32-bit value. Since the loaded value can only be 0 or 1, 0-v should +// do the trick. +let AddedComplexity = 20 in +def: Pat<(i32 (sextloadi1 (i32 IntRegs:$Rs))), + (A2_subri 0, (L2_loadrub_io IntRegs:$Rs, 0))>; + //===----------------------------------------------------------------------===// // Post increment load //===----------------------------------------------------------------------===// @@ -1696,51 +1905,70 @@ multiclass LD_PostInc <string mnemonic, string BaseOp, RegisterClass RC, } // post increment byte loads with immediate offset -let accessSize = ByteAccess, isCodeGenOnly = 0 in { +let accessSize = ByteAccess in { defm loadrb : LD_PostInc <"memb", "LDrib", IntRegs, s4_0Imm, 0b1000>; defm loadrub : LD_PostInc <"memub", "LDriub", IntRegs, s4_0Imm, 0b1001>; } // post increment halfword loads with immediate offset -let accessSize = HalfWordAccess, opExtentAlign = 1, isCodeGenOnly = 0 in { +let accessSize = HalfWordAccess, opExtentAlign = 1 in { defm loadrh : LD_PostInc <"memh", "LDrih", IntRegs, s4_1Imm, 0b1010>; defm loadruh : LD_PostInc <"memuh", "LDriuh", IntRegs, s4_1Imm, 0b1011>; } // post increment word loads with immediate offset -let accessSize = WordAccess, opExtentAlign = 2, isCodeGenOnly = 0 in +let accessSize = WordAccess, opExtentAlign = 2 in defm loadri : LD_PostInc <"memw", "LDriw", IntRegs, s4_2Imm, 0b1100>; // post increment doubleword loads with immediate offset -let accessSize = DoubleWordAccess, opExtentAlign = 3, isCodeGenOnly = 0 in +let accessSize = DoubleWordAccess, opExtentAlign = 3 in defm loadrd : LD_PostInc <"memd", "LDrid", DoubleRegs, s4_3Imm, 0b1110>; -def : Pat< (i32 (extloadi1 ADDRriS11_0:$addr)), - (i32 (L2_loadrb_io AddrFI:$addr, 0)) >; - -// Load byte any-extend. -def : Pat < (i32 (extloadi8 ADDRriS11_0:$addr)), - (i32 (L2_loadrb_io AddrFI:$addr, 0)) >; +// Rd=memb[u]h(Rx++#s4:1) +// Rdd=memb[u]h(Rx++#s4:2) +let accessSize = HalfWordAccess, opExtentAlign = 1 in { + def L2_loadbsw2_pi : T_load_pi <"membh", IntRegs, s4_1Imm, 0b0001>; + def L2_loadbzw2_pi : T_load_pi <"memubh", IntRegs, s4_1Imm, 0b0011>; +} +let accessSize = WordAccess, opExtentAlign = 2, hasNewValue = 0 in { + def L2_loadbsw4_pi : T_load_pi <"membh", DoubleRegs, s4_2Imm, 0b0111>; + def L2_loadbzw4_pi : T_load_pi <"memubh", DoubleRegs, s4_2Imm, 0b0101>; +} -// Indexed load byte any-extend. -let AddedComplexity = 20 in -def : Pat < (i32 (extloadi8 (add IntRegs:$src1, s11_0ImmPred:$offset))), - (i32 (L2_loadrb_io IntRegs:$src1, s11_0ImmPred:$offset)) >; +//===----------------------------------------------------------------------===// +// Template class for post increment fifo loads with immediate offset. +//===----------------------------------------------------------------------===// +let hasSideEffects = 0, addrMode = PostInc in +class T_loadalign_pi <string mnemonic, Operand ImmOp, bits<4> MajOp > + : LDInstPI <(outs DoubleRegs:$dst, IntRegs:$dst2), + (ins DoubleRegs:$src1, IntRegs:$src2, ImmOp:$offset), + "$dst = "#mnemonic#"($src2++#$offset)" , + [], "$src2 = $dst2, $src1 = $dst" > , + PredNewRel { + bits<5> dst; + bits<5> src2; + bits<5> offset; + bits<4> offsetBits; -def : Pat < (i32 (extloadi16 ADDRriS11_1:$addr)), - (i32 (L2_loadrh_io AddrFI:$addr, 0))>; + let offsetBits = !if (!eq(!cast<string>(ImmOp), "s4_1Imm"), offset{4-1}, + /* s4_0Imm */ offset{3-0}); + let IClass = 0b1001; -let AddedComplexity = 20 in -def : Pat < (i32 (extloadi16 (add IntRegs:$src1, s11_1ImmPred:$offset))), - (i32 (L2_loadrh_io IntRegs:$src1, s11_1ImmPred:$offset)) >; + let Inst{27-25} = 0b101; + let Inst{24-21} = MajOp; + let Inst{20-16} = src2; + let Inst{13-12} = 0b00; + let Inst{8-5} = offsetBits; + let Inst{4-0} = dst; + } -let AddedComplexity = 10 in -def : Pat < (i32 (zextloadi1 ADDRriS11_0:$addr)), - (i32 (L2_loadrub_io AddrFI:$addr, 0))>; +// Ryy=memh_fifo(Rx++#s4:1) +// Ryy=memb_fifo(Rx++#s4:0) +let accessSize = ByteAccess in +def L2_loadalignb_pi : T_loadalign_pi <"memb_fifo", s4_0Imm, 0b0100>; -let AddedComplexity = 20 in -def : Pat < (i32 (zextloadi1 (add IntRegs:$src1, s11_0ImmPred:$offset))), - (i32 (L2_loadrub_io IntRegs:$src1, s11_0ImmPred:$offset))>; +let accessSize = HalfWordAccess, opExtentAlign = 1 in +def L2_loadalignh_pi : T_loadalign_pi <"memh_fifo", s4_1Imm, 0b0010>; //===----------------------------------------------------------------------===// // Template class for post increment loads with register offset. @@ -1768,26 +1996,27 @@ class T_load_pr <string mnemonic, RegisterClass RC, bits<4> MajOp, let Inst{4-0} = dst; } -let hasNewValue = 1, isCodeGenOnly = 0 in { +let hasNewValue = 1 in { def L2_loadrb_pr : T_load_pr <"memb", IntRegs, 0b1000, ByteAccess>; def L2_loadrub_pr : T_load_pr <"memub", IntRegs, 0b1001, ByteAccess>; def L2_loadrh_pr : T_load_pr <"memh", IntRegs, 0b1010, HalfWordAccess>; def L2_loadruh_pr : T_load_pr <"memuh", IntRegs, 0b1011, HalfWordAccess>; def L2_loadri_pr : T_load_pr <"memw", IntRegs, 0b1100, WordAccess>; + + def L2_loadbzw2_pr : T_load_pr <"memubh", IntRegs, 0b0011, HalfWordAccess>; } -let isCodeGenOnly = 0 in def L2_loadrd_pr : T_load_pr <"memd", DoubleRegs, 0b1110, DoubleWordAccess>; +def L2_loadbzw4_pr : T_load_pr <"memubh", DoubleRegs, 0b0101, WordAccess>; // Load predicate. let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13, -isPseudo = 1, Defs = [R10,R11,D5], hasSideEffects = 0 in -def LDriw_pred : LDInst2<(outs PredRegs:$dst), - (ins MEMri:$addr), - "Error; should not emit", - []>; + isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in +def LDriw_pred : LDInst<(outs PredRegs:$dst), + (ins IntRegs:$addr, s11_2Ext:$off), + ".error \"should not emit\"", []>; -let Defs = [R29, R30, R31], Uses = [R30], hasSideEffects = 0, isCodeGenOnly = 0 in +let Defs = [R29, R30, R31], Uses = [R30], hasSideEffects = 0 in def L2_deallocframe : LDInst<(outs), (ins), "deallocframe", []> { @@ -1799,7 +2028,7 @@ let Defs = [R29, R30, R31], Uses = [R30], hasSideEffects = 0, isCodeGenOnly = 0 } // Load / Post increment circular addressing mode. -let Uses = [CS], hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +let Uses = [CS], hasSideEffects = 0 in class T_load_pcr<string mnemonic, RegisterClass RC, bits<4> MajOp> : LDInst <(outs RC:$dst, IntRegs:$_dst_), (ins IntRegs:$Rz, ModRegs:$Mu), @@ -1809,6 +2038,7 @@ class T_load_pcr<string mnemonic, RegisterClass RC, bits<4> MajOp> bits<5> Rz; bit Mu; + let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1); let IClass = 0b1001; let Inst{27-25} = 0b100; @@ -1821,27 +2051,60 @@ class T_load_pcr<string mnemonic, RegisterClass RC, bits<4> MajOp> let Inst{4-0} = dst; } -let accessSize = ByteAccess, isCodeGenOnly = 0 in { +let accessSize = ByteAccess in { def L2_loadrb_pcr : T_load_pcr <"memb", IntRegs, 0b1000>; def L2_loadrub_pcr : T_load_pcr <"memub", IntRegs, 0b1001>; } -let accessSize = HalfWordAccess, isCodeGenOnly = 0 in { +let accessSize = HalfWordAccess in { def L2_loadrh_pcr : T_load_pcr <"memh", IntRegs, 0b1010>; def L2_loadruh_pcr : T_load_pcr <"memuh", IntRegs, 0b1011>; + def L2_loadbsw2_pcr : T_load_pcr <"membh", IntRegs, 0b0001>; + def L2_loadbzw2_pcr : T_load_pcr <"memubh", IntRegs, 0b0011>; } -let accessSize = WordAccess, isCodeGenOnly = 0 in { +let accessSize = WordAccess in { def L2_loadri_pcr : T_load_pcr <"memw", IntRegs, 0b1100>; + let hasNewValue = 0 in { + def L2_loadbzw4_pcr : T_load_pcr <"memubh", DoubleRegs, 0b0101>; + def L2_loadbsw4_pcr : T_load_pcr <"membh", DoubleRegs, 0b0111>; + } } -let accessSize = DoubleWordAccess, isCodeGenOnly = 0 in +let accessSize = DoubleWordAccess in def L2_loadrd_pcr : T_load_pcr <"memd", DoubleRegs, 0b1110>; +// Load / Post increment circular addressing mode. +let Uses = [CS], hasSideEffects = 0 in +class T_loadalign_pcr<string mnemonic, bits<4> MajOp, MemAccessSize AccessSz > + : LDInst <(outs DoubleRegs:$dst, IntRegs:$_dst_), + (ins DoubleRegs:$_src_, IntRegs:$Rz, ModRegs:$Mu), + "$dst = "#mnemonic#"($Rz ++ I:circ($Mu))", [], + "$Rz = $_dst_, $dst = $_src_" > { + bits<5> dst; + bits<5> Rz; + bit Mu; + + let accessSize = AccessSz; + let IClass = 0b1001; + + let Inst{27-25} = 0b100; + let Inst{24-21} = MajOp; + let Inst{20-16} = Rz; + let Inst{13} = Mu; + let Inst{12} = 0b0; + let Inst{9} = 0b1; + let Inst{7} = 0b0; + let Inst{4-0} = dst; + } + +def L2_loadalignb_pcr : T_loadalign_pcr <"memb_fifo", 0b0100, ByteAccess>; +def L2_loadalignh_pcr : T_loadalign_pcr <"memh_fifo", 0b0010, HalfWordAccess>; + //===----------------------------------------------------------------------===// // Circular loads with immediate offset. //===----------------------------------------------------------------------===// -let Uses = [CS], mayLoad = 1, hasSideEffects = 0, hasNewValue = 1 in +let Uses = [CS], mayLoad = 1, hasSideEffects = 0 in class T_load_pci <string mnemonic, RegisterClass RC, Operand ImmOp, bits<4> MajOp> : LDInstPI<(outs RC:$dst, IntRegs:$_dst_), @@ -1855,6 +2118,7 @@ class T_load_pci <string mnemonic, RegisterClass RC, bits<4> offsetBits; string ImmOpStr = !cast<string>(ImmOp); + let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1); let offsetBits = !if (!eq(ImmOpStr, "s4_3Imm"), offset{6-3}, !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2}, !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1}, @@ -1871,24 +2135,62 @@ class T_load_pci <string mnemonic, RegisterClass RC, } // Byte variants of circ load -let accessSize = ByteAccess, isCodeGenOnly = 0 in { +let accessSize = ByteAccess in { def L2_loadrb_pci : T_load_pci <"memb", IntRegs, s4_0Imm, 0b1000>; def L2_loadrub_pci : T_load_pci <"memub", IntRegs, s4_0Imm, 0b1001>; } // Half word variants of circ load -let accessSize = HalfWordAccess, isCodeGenOnly = 0 in { +let accessSize = HalfWordAccess in { def L2_loadrh_pci : T_load_pci <"memh", IntRegs, s4_1Imm, 0b1010>; def L2_loadruh_pci : T_load_pci <"memuh", IntRegs, s4_1Imm, 0b1011>; + def L2_loadbzw2_pci : T_load_pci <"memubh", IntRegs, s4_1Imm, 0b0011>; + def L2_loadbsw2_pci : T_load_pci <"membh", IntRegs, s4_1Imm, 0b0001>; } // Word variants of circ load -let accessSize = WordAccess, isCodeGenOnly = 0 in +let accessSize = WordAccess in def L2_loadri_pci : T_load_pci <"memw", IntRegs, s4_2Imm, 0b1100>; -let accessSize = DoubleWordAccess, hasNewValue = 0, isCodeGenOnly = 0 in +let accessSize = WordAccess, hasNewValue = 0 in { + def L2_loadbzw4_pci : T_load_pci <"memubh", DoubleRegs, s4_2Imm, 0b0101>; + def L2_loadbsw4_pci : T_load_pci <"membh", DoubleRegs, s4_2Imm, 0b0111>; +} + +let accessSize = DoubleWordAccess, hasNewValue = 0 in def L2_loadrd_pci : T_load_pci <"memd", DoubleRegs, s4_3Imm, 0b1110>; +//===----------------------------------------------------------------------===// +// Circular loads - Pseudo +// +// Please note that the input operand order in the pseudo instructions +// doesn't match with the real instructions. Pseudo instructions operand +// order should mimics the ordering in the intrinsics. Also, 'src2' doesn't +// appear in the AsmString because it's same as 'dst'. +//===----------------------------------------------------------------------===// +let isCodeGenOnly = 1, mayLoad = 1, hasSideEffects = 0, isPseudo = 1 in +class T_load_pci_pseudo <string opc, RegisterClass RC> + : LDInstPI<(outs IntRegs:$_dst_, RC:$dst), + (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4Imm:$src4), + ".error \"$dst = "#opc#"($src1++#$src4:circ($src3))\"", + [], "$src1 = $_dst_">; + +def L2_loadrb_pci_pseudo : T_load_pci_pseudo <"memb", IntRegs>; +def L2_loadrub_pci_pseudo : T_load_pci_pseudo <"memub", IntRegs>; +def L2_loadrh_pci_pseudo : T_load_pci_pseudo <"memh", IntRegs>; +def L2_loadruh_pci_pseudo : T_load_pci_pseudo <"memuh", IntRegs>; +def L2_loadri_pci_pseudo : T_load_pci_pseudo <"memw", IntRegs>; +def L2_loadrd_pci_pseudo : T_load_pci_pseudo <"memd", DoubleRegs>; + + +// TODO: memb_fifo and memh_fifo must take destination register as input. +// One-off circ loads - not enough in common to break into a class. +let accessSize = ByteAccess in +def L2_loadalignb_pci : T_load_pci <"memb_fifo", DoubleRegs, s4_0Imm, 0b0100>; + +let accessSize = HalfWordAccess, opExtentAlign = 1 in +def L2_loadalignh_pci : T_load_pci <"memh_fifo", DoubleRegs, s4_1Imm, 0b0010>; + // L[24]_load[wd]_locked: Load word/double with lock. let isSoloAX = 1 in class T_load_locked <string mnemonic, RegisterClass RC> @@ -1901,12 +2203,38 @@ class T_load_locked <string mnemonic, RegisterClass RC> let Inst{27-21} = 0b0010000; let Inst{20-16} = src; let Inst{13-12} = !if (!eq(mnemonic, "memd_locked"), 0b01, 0b00); + let Inst{5} = 0; let Inst{4-0} = dst; } -let hasNewValue = 1, accessSize = WordAccess, opNewValue = 0, isCodeGenOnly = 0 in +let hasNewValue = 1, accessSize = WordAccess, opNewValue = 0 in def L2_loadw_locked : T_load_locked <"memw_locked", IntRegs>; -let accessSize = DoubleWordAccess, isCodeGenOnly = 0 in +let accessSize = DoubleWordAccess in def L4_loadd_locked : T_load_locked <"memd_locked", DoubleRegs>; + +// S[24]_store[wd]_locked: Store word/double conditionally. +let isSoloAX = 1, isPredicateLate = 1 in +class T_store_locked <string mnemonic, RegisterClass RC> + : ST0Inst <(outs PredRegs:$Pd), (ins IntRegs:$Rs, RC:$Rt), + mnemonic#"($Rs, $Pd) = $Rt"> { + bits<2> Pd; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1010; + let Inst{27-23} = 0b00001; + let Inst{22} = !if (!eq(mnemonic, "memw_locked"), 0b0, 0b1); + let Inst{21} = 0b1; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + let Inst{1-0} = Pd; +} + +let accessSize = WordAccess in +def S2_storew_locked : T_store_locked <"memw_locked", IntRegs>; + +let accessSize = DoubleWordAccess in +def S4_stored_locked : T_store_locked <"memd_locked", DoubleRegs>; + //===----------------------------------------------------------------------===// // Bit-reversed loads with auto-increment register //===----------------------------------------------------------------------===// @@ -1936,17 +2264,44 @@ class T_load_pbr<string mnemonic, RegisterClass RC, let Inst{4-0} = dst; } -let hasNewValue =1, opNewValue = 0, isCodeGenOnly = 0 in { +let hasNewValue =1, opNewValue = 0 in { def L2_loadrb_pbr : T_load_pbr <"memb", IntRegs, ByteAccess, 0b1000>; def L2_loadrub_pbr : T_load_pbr <"memub", IntRegs, ByteAccess, 0b1001>; def L2_loadrh_pbr : T_load_pbr <"memh", IntRegs, HalfWordAccess, 0b1010>; def L2_loadruh_pbr : T_load_pbr <"memuh", IntRegs, HalfWordAccess, 0b1011>; + def L2_loadbsw2_pbr : T_load_pbr <"membh", IntRegs, HalfWordAccess, 0b0001>; + def L2_loadbzw2_pbr : T_load_pbr <"memubh", IntRegs, HalfWordAccess, 0b0011>; def L2_loadri_pbr : T_load_pbr <"memw", IntRegs, WordAccess, 0b1100>; } -let isCodeGenOnly = 0 in +def L2_loadbzw4_pbr : T_load_pbr <"memubh", DoubleRegs, WordAccess, 0b0101>; +def L2_loadbsw4_pbr : T_load_pbr <"membh", DoubleRegs, WordAccess, 0b0111>; def L2_loadrd_pbr : T_load_pbr <"memd", DoubleRegs, DoubleWordAccess, 0b1110>; +def L2_loadalignb_pbr :T_load_pbr <"memb_fifo", DoubleRegs, ByteAccess, 0b0100>; +def L2_loadalignh_pbr :T_load_pbr <"memh_fifo", DoubleRegs, + HalfWordAccess, 0b0010>; + +//===----------------------------------------------------------------------===// +// Bit-reversed loads - Pseudo +// +// Please note that 'src2' doesn't appear in the AsmString because +// it's same as 'dst'. +//===----------------------------------------------------------------------===// +let isCodeGenOnly = 1, mayLoad = 1, hasSideEffects = 0, isPseudo = 1 in +class T_load_pbr_pseudo <string opc, RegisterClass RC> + : LDInstPI<(outs IntRegs:$_dst_, RC:$dst), + (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), + ".error \"$dst = "#opc#"($src1++$src3:brev)\"", + [], "$src1 = $_dst_">; + +def L2_loadrb_pbr_pseudo : T_load_pbr_pseudo <"memb", IntRegs>; +def L2_loadrub_pbr_pseudo : T_load_pbr_pseudo <"memub", IntRegs>; +def L2_loadrh_pbr_pseudo : T_load_pbr_pseudo <"memh", IntRegs>; +def L2_loadruh_pbr_pseudo : T_load_pbr_pseudo <"memuh", IntRegs>; +def L2_loadri_pbr_pseudo : T_load_pbr_pseudo <"memw", IntRegs>; +def L2_loadrd_pbr_pseudo : T_load_pbr_pseudo <"memd", DoubleRegs>; + //===----------------------------------------------------------------------===// // LD - //===----------------------------------------------------------------------===// @@ -2003,7 +2358,6 @@ class T_M2_mpy < bits<2> LHbits, bit isSat, bit isRnd, } //Rd=mpy(Rs.[H|L],Rt.[H|L])[:<<1] -let isCodeGenOnly = 0 in { def M2_mpy_ll_s1: T_M2_mpy<0b00, 0, 0, 1, 0>; def M2_mpy_ll_s0: T_M2_mpy<0b00, 0, 0, 0, 0>; def M2_mpy_lh_s1: T_M2_mpy<0b01, 0, 0, 1, 0>; @@ -2012,10 +2366,8 @@ def M2_mpy_hl_s1: T_M2_mpy<0b10, 0, 0, 1, 0>; def M2_mpy_hl_s0: T_M2_mpy<0b10, 0, 0, 0, 0>; def M2_mpy_hh_s1: T_M2_mpy<0b11, 0, 0, 1, 0>; def M2_mpy_hh_s0: T_M2_mpy<0b11, 0, 0, 0, 0>; -} //Rd=mpyu(Rs.[H|L],Rt.[H|L])[:<<1] -let isCodeGenOnly = 0 in { def M2_mpyu_ll_s1: T_M2_mpy<0b00, 0, 0, 1, 1>; def M2_mpyu_ll_s0: T_M2_mpy<0b00, 0, 0, 0, 1>; def M2_mpyu_lh_s1: T_M2_mpy<0b01, 0, 0, 1, 1>; @@ -2024,10 +2376,8 @@ def M2_mpyu_hl_s1: T_M2_mpy<0b10, 0, 0, 1, 1>; def M2_mpyu_hl_s0: T_M2_mpy<0b10, 0, 0, 0, 1>; def M2_mpyu_hh_s1: T_M2_mpy<0b11, 0, 0, 1, 1>; def M2_mpyu_hh_s0: T_M2_mpy<0b11, 0, 0, 0, 1>; -} //Rd=mpy(Rs.[H|L],Rt.[H|L])[:<<1]:rnd -let isCodeGenOnly = 0 in { def M2_mpy_rnd_ll_s1: T_M2_mpy <0b00, 0, 1, 1, 0>; def M2_mpy_rnd_ll_s0: T_M2_mpy <0b00, 0, 1, 0, 0>; def M2_mpy_rnd_lh_s1: T_M2_mpy <0b01, 0, 1, 1, 0>; @@ -2036,11 +2386,10 @@ def M2_mpy_rnd_hl_s1: T_M2_mpy <0b10, 0, 1, 1, 0>; def M2_mpy_rnd_hl_s0: T_M2_mpy <0b10, 0, 1, 0, 0>; def M2_mpy_rnd_hh_s1: T_M2_mpy <0b11, 0, 1, 1, 0>; def M2_mpy_rnd_hh_s0: T_M2_mpy <0b11, 0, 1, 0, 0>; -} //Rd=mpy(Rs.[H|L],Rt.[H|L])[:<<1][:sat] //Rd=mpy(Rs.[H|L],Rt.[H|L])[:<<1][:rnd][:sat] -let Defs = [USR_OVF], isCodeGenOnly = 0 in { +let Defs = [USR_OVF] in { def M2_mpy_sat_ll_s1: T_M2_mpy <0b00, 1, 0, 1, 0>; def M2_mpy_sat_ll_s0: T_M2_mpy <0b00, 1, 0, 0, 0>; def M2_mpy_sat_lh_s1: T_M2_mpy <0b01, 1, 0, 1, 0>; @@ -2094,7 +2443,6 @@ class T_M2_mpy_acc < bits<2> LHbits, bit isSat, bit isNac, } //Rx += mpy(Rs.[H|L],Rt.[H|L])[:<<1] -let isCodeGenOnly = 0 in { def M2_mpy_acc_ll_s1: T_M2_mpy_acc <0b00, 0, 0, 1, 0>; def M2_mpy_acc_ll_s0: T_M2_mpy_acc <0b00, 0, 0, 0, 0>; def M2_mpy_acc_lh_s1: T_M2_mpy_acc <0b01, 0, 0, 1, 0>; @@ -2103,10 +2451,8 @@ def M2_mpy_acc_hl_s1: T_M2_mpy_acc <0b10, 0, 0, 1, 0>; def M2_mpy_acc_hl_s0: T_M2_mpy_acc <0b10, 0, 0, 0, 0>; def M2_mpy_acc_hh_s1: T_M2_mpy_acc <0b11, 0, 0, 1, 0>; def M2_mpy_acc_hh_s0: T_M2_mpy_acc <0b11, 0, 0, 0, 0>; -} //Rx += mpyu(Rs.[H|L],Rt.[H|L])[:<<1] -let isCodeGenOnly = 0 in { def M2_mpyu_acc_ll_s1: T_M2_mpy_acc <0b00, 0, 0, 1, 1>; def M2_mpyu_acc_ll_s0: T_M2_mpy_acc <0b00, 0, 0, 0, 1>; def M2_mpyu_acc_lh_s1: T_M2_mpy_acc <0b01, 0, 0, 1, 1>; @@ -2115,10 +2461,8 @@ def M2_mpyu_acc_hl_s1: T_M2_mpy_acc <0b10, 0, 0, 1, 1>; def M2_mpyu_acc_hl_s0: T_M2_mpy_acc <0b10, 0, 0, 0, 1>; def M2_mpyu_acc_hh_s1: T_M2_mpy_acc <0b11, 0, 0, 1, 1>; def M2_mpyu_acc_hh_s0: T_M2_mpy_acc <0b11, 0, 0, 0, 1>; -} //Rx -= mpy(Rs.[H|L],Rt.[H|L])[:<<1] -let isCodeGenOnly = 0 in { def M2_mpy_nac_ll_s1: T_M2_mpy_acc <0b00, 0, 1, 1, 0>; def M2_mpy_nac_ll_s0: T_M2_mpy_acc <0b00, 0, 1, 0, 0>; def M2_mpy_nac_lh_s1: T_M2_mpy_acc <0b01, 0, 1, 1, 0>; @@ -2127,10 +2471,8 @@ def M2_mpy_nac_hl_s1: T_M2_mpy_acc <0b10, 0, 1, 1, 0>; def M2_mpy_nac_hl_s0: T_M2_mpy_acc <0b10, 0, 1, 0, 0>; def M2_mpy_nac_hh_s1: T_M2_mpy_acc <0b11, 0, 1, 1, 0>; def M2_mpy_nac_hh_s0: T_M2_mpy_acc <0b11, 0, 1, 0, 0>; -} //Rx -= mpyu(Rs.[H|L],Rt.[H|L])[:<<1] -let isCodeGenOnly = 0 in { def M2_mpyu_nac_ll_s1: T_M2_mpy_acc <0b00, 0, 1, 1, 1>; def M2_mpyu_nac_ll_s0: T_M2_mpy_acc <0b00, 0, 1, 0, 1>; def M2_mpyu_nac_lh_s1: T_M2_mpy_acc <0b01, 0, 1, 1, 1>; @@ -2139,10 +2481,8 @@ def M2_mpyu_nac_hl_s1: T_M2_mpy_acc <0b10, 0, 1, 1, 1>; def M2_mpyu_nac_hl_s0: T_M2_mpy_acc <0b10, 0, 1, 0, 1>; def M2_mpyu_nac_hh_s1: T_M2_mpy_acc <0b11, 0, 1, 1, 1>; def M2_mpyu_nac_hh_s0: T_M2_mpy_acc <0b11, 0, 1, 0, 1>; -} //Rx += mpy(Rs.[H|L],Rt.[H|L])[:<<1]:sat -let isCodeGenOnly = 0 in { def M2_mpy_acc_sat_ll_s1: T_M2_mpy_acc <0b00, 1, 0, 1, 0>; def M2_mpy_acc_sat_ll_s0: T_M2_mpy_acc <0b00, 1, 0, 0, 0>; def M2_mpy_acc_sat_lh_s1: T_M2_mpy_acc <0b01, 1, 0, 1, 0>; @@ -2151,10 +2491,8 @@ def M2_mpy_acc_sat_hl_s1: T_M2_mpy_acc <0b10, 1, 0, 1, 0>; def M2_mpy_acc_sat_hl_s0: T_M2_mpy_acc <0b10, 1, 0, 0, 0>; def M2_mpy_acc_sat_hh_s1: T_M2_mpy_acc <0b11, 1, 0, 1, 0>; def M2_mpy_acc_sat_hh_s0: T_M2_mpy_acc <0b11, 1, 0, 0, 0>; -} //Rx -= mpy(Rs.[H|L],Rt.[H|L])[:<<1]:sat -let isCodeGenOnly = 0 in { def M2_mpy_nac_sat_ll_s1: T_M2_mpy_acc <0b00, 1, 1, 1, 0>; def M2_mpy_nac_sat_ll_s0: T_M2_mpy_acc <0b00, 1, 1, 0, 0>; def M2_mpy_nac_sat_lh_s1: T_M2_mpy_acc <0b01, 1, 1, 1, 0>; @@ -2163,7 +2501,6 @@ def M2_mpy_nac_sat_hl_s1: T_M2_mpy_acc <0b10, 1, 1, 1, 0>; def M2_mpy_nac_sat_hl_s0: T_M2_mpy_acc <0b10, 1, 1, 0, 0>; def M2_mpy_nac_sat_hh_s1: T_M2_mpy_acc <0b11, 1, 1, 1, 0>; def M2_mpy_nac_sat_hh_s0: T_M2_mpy_acc <0b11, 1, 1, 0, 0>; -} //===----------------------------------------------------------------------===// // Template Class @@ -2197,7 +2534,6 @@ class T_M2_mpyd_acc < bits<2> LHbits, bit isNac, bit hasShift, bit isUnsigned> let Inst{12-8} = Rt; } -let isCodeGenOnly = 0 in { def M2_mpyd_acc_hh_s0: T_M2_mpyd_acc <0b11, 0, 0, 0>; def M2_mpyd_acc_hl_s0: T_M2_mpyd_acc <0b10, 0, 0, 0>; def M2_mpyd_acc_lh_s0: T_M2_mpyd_acc <0b01, 0, 0, 0>; @@ -2237,6 +2573,72 @@ def M2_mpyud_nac_hh_s1: T_M2_mpyd_acc <0b11, 1, 1, 1>; def M2_mpyud_nac_hl_s1: T_M2_mpyd_acc <0b10, 1, 1, 1>; def M2_mpyud_nac_lh_s1: T_M2_mpyd_acc <0b01, 1, 1, 1>; def M2_mpyud_nac_ll_s1: T_M2_mpyd_acc <0b00, 1, 1, 1>; + +//===----------------------------------------------------------------------===// +// Template Class -- Vector Multipy +// Used for complex multiply real or imaginary, dual multiply and even halfwords +//===----------------------------------------------------------------------===// +class T_M2_vmpy < string opc, bits<3> MajOp, bits<3> MinOp, bit hasShift, + bit isRnd, bit isSat > + : MInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss, DoubleRegs:$Rtt), + "$Rdd = "#opc#"($Rss, $Rtt)"#!if(hasShift,":<<1","") + #!if(isRnd,":rnd","") + #!if(isSat,":sat",""), + [] > { + bits<5> Rdd; + bits<5> Rss; + bits<5> Rtt; + + let IClass = 0b1110; + + let Inst{27-24} = 0b1000; + let Inst{23-21} = MajOp; + let Inst{7-5} = MinOp; + let Inst{4-0} = Rdd; + let Inst{20-16} = Rss; + let Inst{12-8} = Rtt; + } + +// Vector complex multiply imaginary: Rdd=vcmpyi(Rss,Rtt)[:<<1]:sat +let Defs = [USR_OVF] in { +def M2_vcmpy_s1_sat_i: T_M2_vmpy <"vcmpyi", 0b110, 0b110, 1, 0, 1>; +def M2_vcmpy_s0_sat_i: T_M2_vmpy <"vcmpyi", 0b010, 0b110, 0, 0, 1>; + +// Vector complex multiply real: Rdd=vcmpyr(Rss,Rtt)[:<<1]:sat +def M2_vcmpy_s1_sat_r: T_M2_vmpy <"vcmpyr", 0b101, 0b110, 1, 0, 1>; +def M2_vcmpy_s0_sat_r: T_M2_vmpy <"vcmpyr", 0b001, 0b110, 0, 0, 1>; + +// Vector dual multiply: Rdd=vdmpy(Rss,Rtt)[:<<1]:sat +def M2_vdmpys_s1: T_M2_vmpy <"vdmpy", 0b100, 0b100, 1, 0, 1>; +def M2_vdmpys_s0: T_M2_vmpy <"vdmpy", 0b000, 0b100, 0, 0, 1>; + +// Vector multiply even halfwords: Rdd=vmpyeh(Rss,Rtt)[:<<1]:sat +def M2_vmpy2es_s1: T_M2_vmpy <"vmpyeh", 0b100, 0b110, 1, 0, 1>; +def M2_vmpy2es_s0: T_M2_vmpy <"vmpyeh", 0b000, 0b110, 0, 0, 1>; + +//Rdd=vmpywoh(Rss,Rtt)[:<<1][:rnd]:sat +def M2_mmpyh_s0: T_M2_vmpy <"vmpywoh", 0b000, 0b111, 0, 0, 1>; +def M2_mmpyh_s1: T_M2_vmpy <"vmpywoh", 0b100, 0b111, 1, 0, 1>; +def M2_mmpyh_rs0: T_M2_vmpy <"vmpywoh", 0b001, 0b111, 0, 1, 1>; +def M2_mmpyh_rs1: T_M2_vmpy <"vmpywoh", 0b101, 0b111, 1, 1, 1>; + +//Rdd=vmpyweh(Rss,Rtt)[:<<1][:rnd]:sat +def M2_mmpyl_s0: T_M2_vmpy <"vmpyweh", 0b000, 0b101, 0, 0, 1>; +def M2_mmpyl_s1: T_M2_vmpy <"vmpyweh", 0b100, 0b101, 1, 0, 1>; +def M2_mmpyl_rs0: T_M2_vmpy <"vmpyweh", 0b001, 0b101, 0, 1, 1>; +def M2_mmpyl_rs1: T_M2_vmpy <"vmpyweh", 0b101, 0b101, 1, 1, 1>; + +//Rdd=vmpywouh(Rss,Rtt)[:<<1][:rnd]:sat +def M2_mmpyuh_s0: T_M2_vmpy <"vmpywouh", 0b010, 0b111, 0, 0, 1>; +def M2_mmpyuh_s1: T_M2_vmpy <"vmpywouh", 0b110, 0b111, 1, 0, 1>; +def M2_mmpyuh_rs0: T_M2_vmpy <"vmpywouh", 0b011, 0b111, 0, 1, 1>; +def M2_mmpyuh_rs1: T_M2_vmpy <"vmpywouh", 0b111, 0b111, 1, 1, 1>; + +//Rdd=vmpyweuh(Rss,Rtt)[:<<1][:rnd]:sat +def M2_mmpyul_s0: T_M2_vmpy <"vmpyweuh", 0b010, 0b101, 0, 0, 1>; +def M2_mmpyul_s1: T_M2_vmpy <"vmpyweuh", 0b110, 0b101, 1, 0, 1>; +def M2_mmpyul_rs0: T_M2_vmpy <"vmpyweuh", 0b011, 0b101, 0, 1, 1>; +def M2_mmpyul_rs1: T_M2_vmpy <"vmpyweuh", 0b111, 0b101, 1, 1, 1>; } let hasNewValue = 1, opNewValue = 0 in @@ -2265,6 +2667,9 @@ class T_MType_mpy <string mnemonic, bits<4> RegTyBits, RegisterClass RC, let Inst{4-0} = dst; } +class T_MType_vrcmpy <string mnemonic, bits<3> MajOp, bits<3> MinOp, bit isHi> + : T_MType_mpy <mnemonic, 0b1001, DoubleRegs, MajOp, MinOp, 1, 1, "", 1, isHi>; + class T_MType_dd <string mnemonic, bits<3> MajOp, bits<3> MinOp, bit isSat = 0, bit isRnd = 0 > : T_MType_mpy <mnemonic, 0b1001, DoubleRegs, MajOp, MinOp, isSat, isRnd>; @@ -2277,30 +2682,37 @@ class T_MType_rr2 <string mnemonic, bits<3> MajOp, bits<3> MinOp, bit isSat = 0, bit isRnd = 0, string op2str = "" > : T_MType_mpy<mnemonic, 0b1101, IntRegs, MajOp, MinOp, isSat, isRnd, op2str>; -let CextOpcode = "mpyi", InputType = "reg", isCodeGenOnly = 0 in +def M2_vradduh : T_MType_dd <"vradduh", 0b000, 0b001, 0, 0>; +def M2_vdmpyrs_s0 : T_MType_dd <"vdmpy", 0b000, 0b000, 1, 1>; +def M2_vdmpyrs_s1 : T_MType_dd <"vdmpy", 0b100, 0b000, 1, 1>; + +let CextOpcode = "mpyi", InputType = "reg" in def M2_mpyi : T_MType_rr1 <"mpyi", 0b000, 0b000>, ImmRegRel; -let isCodeGenOnly = 0 in { def M2_mpy_up : T_MType_rr1 <"mpy", 0b000, 0b001>; def M2_mpyu_up : T_MType_rr1 <"mpyu", 0b010, 0b001>; -} -let isCodeGenOnly = 0 in def M2_dpmpyss_rnd_s0 : T_MType_rr1 <"mpy", 0b001, 0b001, 0, 1>; -let isCodeGenOnly = 0 in { +def M2_vmpy2s_s0pack : T_MType_rr1 <"vmpyh", 0b001, 0b111, 1, 1>; +def M2_vmpy2s_s1pack : T_MType_rr1 <"vmpyh", 0b101, 0b111, 1, 1>; + def M2_hmmpyh_rs1 : T_MType_rr2 <"mpy", 0b101, 0b100, 1, 1, ".h">; def M2_hmmpyl_rs1 : T_MType_rr2 <"mpy", 0b111, 0b100, 1, 1, ".l">; -} + +def M2_cmpyrs_s0 : T_MType_rr2 <"cmpy", 0b001, 0b110, 1, 1>; +def M2_cmpyrs_s1 : T_MType_rr2 <"cmpy", 0b101, 0b110, 1, 1>; +def M2_cmpyrsc_s0 : T_MType_rr2 <"cmpy", 0b011, 0b110, 1, 1, "*">; +def M2_cmpyrsc_s1 : T_MType_rr2 <"cmpy", 0b111, 0b110, 1, 1, "*">; // V4 Instructions -let isCodeGenOnly = 0 in { +def M2_vraddh : T_MType_dd <"vraddh", 0b001, 0b111, 0>; def M2_mpysu_up : T_MType_rr1 <"mpysu", 0b011, 0b001, 0>; +def M2_mpy_up_s1 : T_MType_rr1 <"mpy", 0b101, 0b010, 0>; def M2_mpy_up_s1_sat : T_MType_rr1 <"mpy", 0b111, 0b000, 1>; def M2_hmmpyh_s1 : T_MType_rr2 <"mpy", 0b101, 0b000, 1, 0, ".h">; def M2_hmmpyl_s1 : T_MType_rr2 <"mpy", 0b101, 0b001, 1, 0, ".l">; -} def: Pat<(i32 (mul I32:$src1, I32:$src2)), (M2_mpyi I32:$src1, I32:$src2)>; def: Pat<(i32 (mulhs I32:$src1, I32:$src2)), (M2_mpy_up I32:$src1, I32:$src2)>; @@ -2325,11 +2737,10 @@ class T_MType_mpy_ri <bit isNeg, Operand ImmOp, list<dag> pattern> let Inst{12-5} = u8; } -let isExtendable = 1, opExtentBits = 8, opExtendable = 2, isCodeGenOnly = 0 in +let isExtendable = 1, opExtentBits = 8, opExtendable = 2 in def M2_mpysip : T_MType_mpy_ri <0, u8Ext, - [(set (i32 IntRegs:$Rd), (mul IntRegs:$Rs, u8ExtPred:$u8))]>; + [(set (i32 IntRegs:$Rd), (mul IntRegs:$Rs, u32ImmPred:$u8))]>; -let isCodeGenOnly = 0 in def M2_mpysin : T_MType_mpy_ri <1, u8Imm, [(set (i32 IntRegs:$Rd), (ineg (mul IntRegs:$Rs, u8ImmPred:$u8)))]>; @@ -2345,11 +2756,12 @@ def M2_mpyui : MInst<(outs IntRegs:$dst), // Assembler maps to either Rd=+mpyi(Rs,#u8) or Rd=-mpyi(Rs,#u8) // depending on the value of m9. See Arch Spec. let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 9, - CextOpcode = "mpyi", InputType = "imm", hasNewValue = 1 in + CextOpcode = "mpyi", InputType = "imm", hasNewValue = 1, + isAsmParserOnly = 1 in def M2_mpysmi : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Ext:$src2), "$dst = mpyi($src1, #$src2)", [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1), - s9ExtPred:$src2))]>, ImmRegRel; + s32ImmPred:$src2))]>, ImmRegRel; let hasNewValue = 1, isExtendable = 1, opExtentBits = 8, opExtendable = 3, InputType = "imm" in @@ -2397,10 +2809,10 @@ class T_MType_acc_rr <string mnemonic, bits<3> MajOp, bits<3> MinOp, let Inst{4-0} = dst; } -let CextOpcode = "MPYI_acc", Itinerary = M_tc_3x_SLOT23, isCodeGenOnly = 0 in { +let CextOpcode = "MPYI_acc", Itinerary = M_tc_3x_SLOT23 in { def M2_macsip : T_MType_acc_ri <"+= mpyi", 0b010, u8Ext, [(set (i32 IntRegs:$dst), - (add (mul IntRegs:$src2, u8ExtPred:$src3), + (add (mul IntRegs:$src2, u32ImmPred:$src3), IntRegs:$src1))]>, ImmRegRel; def M2_maci : T_MType_acc_rr <"+= mpyi", 0b000, 0b000, 0, @@ -2409,11 +2821,11 @@ let CextOpcode = "MPYI_acc", Itinerary = M_tc_3x_SLOT23, isCodeGenOnly = 0 in { IntRegs:$src1))]>, ImmRegRel; } -let CextOpcode = "ADD_acc", isCodeGenOnly = 0 in { +let CextOpcode = "ADD_acc" in { let isExtentSigned = 1 in def M2_accii : T_MType_acc_ri <"+= add", 0b100, s8Ext, [(set (i32 IntRegs:$dst), - (add (add (i32 IntRegs:$src2), s8_16ExtPred:$src3), + (add (add (i32 IntRegs:$src2), s16_16ImmPred:$src3), (i32 IntRegs:$src1)))]>, ImmRegRel; def M2_acci : T_MType_acc_rr <"+= add", 0b000, 0b001, 0, @@ -2422,20 +2834,18 @@ let CextOpcode = "ADD_acc", isCodeGenOnly = 0 in { (i32 IntRegs:$src1)))]>, ImmRegRel; } -let CextOpcode = "SUB_acc", isCodeGenOnly = 0 in { +let CextOpcode = "SUB_acc" in { let isExtentSigned = 1 in def M2_naccii : T_MType_acc_ri <"-= add", 0b101, s8Ext>, ImmRegRel; def M2_nacci : T_MType_acc_rr <"-= add", 0b100, 0b001, 0>, ImmRegRel; } -let Itinerary = M_tc_3x_SLOT23, isCodeGenOnly = 0 in +let Itinerary = M_tc_3x_SLOT23 in def M2_macsin : T_MType_acc_ri <"-= mpyi", 0b011, u8Ext>; -let isCodeGenOnly = 0 in { def M2_xor_xacc : T_MType_acc_rr < "^= xor", 0b100, 0b011, 0>; def M2_subacc : T_MType_acc_rr <"+= sub", 0b000, 0b011, 1>; -} class T_MType_acc_pat1 <InstHexagon MI, SDNode firstOp, SDNode secOp, PatLeaf ImmPred> @@ -2447,10 +2857,187 @@ class T_MType_acc_pat2 <InstHexagon MI, SDNode firstOp, SDNode secOp> (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; def : T_MType_acc_pat2 <M2_xor_xacc, xor, xor>; -def : T_MType_acc_pat1 <M2_macsin, mul, sub, u8ExtPred>; +def : T_MType_acc_pat1 <M2_macsin, mul, sub, u32ImmPred>; -def : T_MType_acc_pat1 <M2_naccii, add, sub, s8_16ExtPred>; +def : T_MType_acc_pat1 <M2_naccii, add, sub, s16_16ImmPred>; def : T_MType_acc_pat2 <M2_nacci, add, sub>; + +//===----------------------------------------------------------------------===// +// Template Class -- XType Vector Instructions +//===----------------------------------------------------------------------===// +class T_XTYPE_Vect < string opc, bits<3> MajOp, bits<3> MinOp, bit isConj > + : MInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss, DoubleRegs:$Rtt), + "$Rdd = "#opc#"($Rss, $Rtt"#!if(isConj,"*)",")"), + [] > { + bits<5> Rdd; + bits<5> Rss; + bits<5> Rtt; + + let IClass = 0b1110; + + let Inst{27-24} = 0b1000; + let Inst{23-21} = MajOp; + let Inst{7-5} = MinOp; + let Inst{4-0} = Rdd; + let Inst{20-16} = Rss; + let Inst{12-8} = Rtt; + } + +class T_XTYPE_Vect_acc < string opc, bits<3> MajOp, bits<3> MinOp, bit isConj > + : MInst <(outs DoubleRegs:$Rdd), + (ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt), + "$Rdd += "#opc#"($Rss, $Rtt"#!if(isConj,"*)",")"), + [], "$dst2 = $Rdd",M_tc_3x_SLOT23 > { + bits<5> Rdd; + bits<5> Rss; + bits<5> Rtt; + + let IClass = 0b1110; + + let Inst{27-24} = 0b1010; + let Inst{23-21} = MajOp; + let Inst{7-5} = MinOp; + let Inst{4-0} = Rdd; + let Inst{20-16} = Rss; + let Inst{12-8} = Rtt; + } + +class T_XTYPE_Vect_diff < bits<3> MajOp, string opc > + : MInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rtt, DoubleRegs:$Rss), + "$Rdd = "#opc#"($Rtt, $Rss)", + [], "",M_tc_2_SLOT23 > { + bits<5> Rdd; + bits<5> Rss; + bits<5> Rtt; + + let IClass = 0b1110; + + let Inst{27-24} = 0b1000; + let Inst{23-21} = MajOp; + let Inst{7-5} = 0b000; + let Inst{4-0} = Rdd; + let Inst{20-16} = Rss; + let Inst{12-8} = Rtt; + } + +// Vector reduce add unsigned bytes: Rdd32=vrmpybu(Rss32,Rtt32) +def A2_vraddub: T_XTYPE_Vect <"vraddub", 0b010, 0b001, 0>; +def A2_vraddub_acc: T_XTYPE_Vect_acc <"vraddub", 0b010, 0b001, 0>; + +// Vector sum of absolute differences unsigned bytes: Rdd=vrsadub(Rss,Rtt) +def A2_vrsadub: T_XTYPE_Vect <"vrsadub", 0b010, 0b010, 0>; +def A2_vrsadub_acc: T_XTYPE_Vect_acc <"vrsadub", 0b010, 0b010, 0>; + +// Vector absolute difference: Rdd=vabsdiffh(Rtt,Rss) +def M2_vabsdiffh: T_XTYPE_Vect_diff<0b011, "vabsdiffh">; + +// Vector absolute difference words: Rdd=vabsdiffw(Rtt,Rss) +def M2_vabsdiffw: T_XTYPE_Vect_diff<0b001, "vabsdiffw">; + +// Vector reduce complex multiply real or imaginary: +// Rdd[+]=vrcmpy[ir](Rss,Rtt[*]) +def M2_vrcmpyi_s0: T_XTYPE_Vect <"vrcmpyi", 0b000, 0b000, 0>; +def M2_vrcmpyi_s0c: T_XTYPE_Vect <"vrcmpyi", 0b010, 0b000, 1>; +def M2_vrcmaci_s0: T_XTYPE_Vect_acc <"vrcmpyi", 0b000, 0b000, 0>; +def M2_vrcmaci_s0c: T_XTYPE_Vect_acc <"vrcmpyi", 0b010, 0b000, 1>; + +def M2_vrcmpyr_s0: T_XTYPE_Vect <"vrcmpyr", 0b000, 0b001, 0>; +def M2_vrcmpyr_s0c: T_XTYPE_Vect <"vrcmpyr", 0b011, 0b001, 1>; +def M2_vrcmacr_s0: T_XTYPE_Vect_acc <"vrcmpyr", 0b000, 0b001, 0>; +def M2_vrcmacr_s0c: T_XTYPE_Vect_acc <"vrcmpyr", 0b011, 0b001, 1>; + +// Vector reduce halfwords: +// Rdd[+]=vrmpyh(Rss,Rtt) +def M2_vrmpy_s0: T_XTYPE_Vect <"vrmpyh", 0b000, 0b010, 0>; +def M2_vrmac_s0: T_XTYPE_Vect_acc <"vrmpyh", 0b000, 0b010, 0>; + +//===----------------------------------------------------------------------===// +// Template Class -- Vector Multipy with accumulation. +// Used for complex multiply real or imaginary, dual multiply and even halfwords +//===----------------------------------------------------------------------===// +let Defs = [USR_OVF] in +class T_M2_vmpy_acc_sat < string opc, bits<3> MajOp, bits<3> MinOp, + bit hasShift, bit isRnd > + : MInst <(outs DoubleRegs:$Rxx), + (ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt), + "$Rxx += "#opc#"($Rss, $Rtt)"#!if(hasShift,":<<1","") + #!if(isRnd,":rnd","")#":sat", + [], "$dst2 = $Rxx",M_tc_3x_SLOT23 > { + bits<5> Rxx; + bits<5> Rss; + bits<5> Rtt; + + let IClass = 0b1110; + + let Inst{27-24} = 0b1010; + let Inst{23-21} = MajOp; + let Inst{7-5} = MinOp; + let Inst{4-0} = Rxx; + let Inst{20-16} = Rss; + let Inst{12-8} = Rtt; + } + +class T_M2_vmpy_acc < string opc, bits<3> MajOp, bits<3> MinOp, + bit hasShift, bit isRnd > + : MInst <(outs DoubleRegs:$Rxx), + (ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt), + "$Rxx += "#opc#"($Rss, $Rtt)"#!if(hasShift,":<<1","") + #!if(isRnd,":rnd",""), + [], "$dst2 = $Rxx",M_tc_3x_SLOT23 > { + bits<5> Rxx; + bits<5> Rss; + bits<5> Rtt; + + let IClass = 0b1110; + + let Inst{27-24} = 0b1010; + let Inst{23-21} = MajOp; + let Inst{7-5} = MinOp; + let Inst{4-0} = Rxx; + let Inst{20-16} = Rss; + let Inst{12-8} = Rtt; + } + +// Vector multiply word by signed half with accumulation +// Rxx+=vmpyw[eo]h(Rss,Rtt)[:<<1][:rnd]:sat +def M2_mmacls_s1: T_M2_vmpy_acc_sat <"vmpyweh", 0b100, 0b101, 1, 0>; +def M2_mmacls_s0: T_M2_vmpy_acc_sat <"vmpyweh", 0b000, 0b101, 0, 0>; +def M2_mmacls_rs1: T_M2_vmpy_acc_sat <"vmpyweh", 0b101, 0b101, 1, 1>; +def M2_mmacls_rs0: T_M2_vmpy_acc_sat <"vmpyweh", 0b001, 0b101, 0, 1>; + +def M2_mmachs_s1: T_M2_vmpy_acc_sat <"vmpywoh", 0b100, 0b111, 1, 0>; +def M2_mmachs_s0: T_M2_vmpy_acc_sat <"vmpywoh", 0b000, 0b111, 0, 0>; +def M2_mmachs_rs1: T_M2_vmpy_acc_sat <"vmpywoh", 0b101, 0b111, 1, 1>; +def M2_mmachs_rs0: T_M2_vmpy_acc_sat <"vmpywoh", 0b001, 0b111, 0, 1>; + +// Vector multiply word by unsigned half with accumulation +// Rxx+=vmpyw[eo]uh(Rss,Rtt)[:<<1][:rnd]:sat +def M2_mmaculs_s1: T_M2_vmpy_acc_sat <"vmpyweuh", 0b110, 0b101, 1, 0>; +def M2_mmaculs_s0: T_M2_vmpy_acc_sat <"vmpyweuh", 0b010, 0b101, 0, 0>; +def M2_mmaculs_rs1: T_M2_vmpy_acc_sat <"vmpyweuh", 0b111, 0b101, 1, 1>; +def M2_mmaculs_rs0: T_M2_vmpy_acc_sat <"vmpyweuh", 0b011, 0b101, 0, 1>; + +def M2_mmacuhs_s1: T_M2_vmpy_acc_sat <"vmpywouh", 0b110, 0b111, 1, 0>; +def M2_mmacuhs_s0: T_M2_vmpy_acc_sat <"vmpywouh", 0b010, 0b111, 0, 0>; +def M2_mmacuhs_rs1: T_M2_vmpy_acc_sat <"vmpywouh", 0b111, 0b111, 1, 1>; +def M2_mmacuhs_rs0: T_M2_vmpy_acc_sat <"vmpywouh", 0b011, 0b111, 0, 1>; + +// Vector multiply even halfwords with accumulation +// Rxx+=vmpyeh(Rss,Rtt)[:<<1][:sat] +def M2_vmac2es: T_M2_vmpy_acc <"vmpyeh", 0b001, 0b010, 0, 0>; +def M2_vmac2es_s1: T_M2_vmpy_acc_sat <"vmpyeh", 0b100, 0b110, 1, 0>; +def M2_vmac2es_s0: T_M2_vmpy_acc_sat <"vmpyeh", 0b000, 0b110, 0, 0>; + +// Vector dual multiply with accumulation +// Rxx+=vdmpy(Rss,Rtt)[:sat] +def M2_vdmacs_s1: T_M2_vmpy_acc_sat <"vdmpy", 0b100, 0b100, 1, 0>; +def M2_vdmacs_s0: T_M2_vmpy_acc_sat <"vdmpy", 0b000, 0b100, 0, 0>; + +// Vector complex multiply real or imaginary with accumulation +// Rxx+=vcmpy[ir](Rss,Rtt):sat +def M2_vcmac_s0_sat_r: T_M2_vmpy_acc_sat <"vcmpyr", 0b001, 0b100, 0, 0>; +def M2_vcmac_s0_sat_i: T_M2_vmpy_acc_sat <"vcmpyi", 0b010, 0b100, 0, 0>; + //===----------------------------------------------------------------------===// // Template Class -- Multiply signed/unsigned halfwords with and without // saturation and rounding @@ -2478,7 +3065,6 @@ class T_M2_mpyd < bits<2> LHbits, bit isRnd, bit hasShift, bit isUnsigned > let Inst{12-8} = Rt; } -let isCodeGenOnly = 0 in { def M2_mpyd_hh_s0: T_M2_mpyd<0b11, 0, 0, 0>; def M2_mpyd_hl_s0: T_M2_mpyd<0b10, 0, 0, 0>; def M2_mpyd_lh_s0: T_M2_mpyd<0b01, 0, 0, 0>; @@ -2509,7 +3095,7 @@ def M2_mpyud_hh_s1: T_M2_mpyd<0b11, 0, 1, 1>; def M2_mpyud_hl_s1: T_M2_mpyd<0b10, 0, 1, 1>; def M2_mpyud_lh_s1: T_M2_mpyd<0b01, 0, 1, 1>; def M2_mpyud_ll_s1: T_M2_mpyd<0b00, 0, 1, 1>; -} + //===----------------------------------------------------------------------===// // Template Class for xtype mpy: // Vector multiply @@ -2570,7 +3156,6 @@ class T_XTYPE_mpy64_acc <string op1, string op2, bits<3> MajOp, bits<3> MinOp, // MPY - Multiply and use full result // Rdd = mpy[u](Rs,Rt) -let isCodeGenOnly = 0 in { def M2_dpmpyss_s0 : T_XTYPE_mpy64 < "mpy", 0b000, 0b000, 0, 0, 0>; def M2_dpmpyuu_s0 : T_XTYPE_mpy64 < "mpyu", 0b010, 0b000, 0, 0, 0>; @@ -2579,7 +3164,48 @@ def M2_dpmpyss_acc_s0 : T_XTYPE_mpy64_acc < "mpy", "+", 0b000, 0b000, 0, 0, 0>; def M2_dpmpyss_nac_s0 : T_XTYPE_mpy64_acc < "mpy", "-", 0b001, 0b000, 0, 0, 0>; def M2_dpmpyuu_acc_s0 : T_XTYPE_mpy64_acc < "mpyu", "+", 0b010, 0b000, 0, 0, 0>; def M2_dpmpyuu_nac_s0 : T_XTYPE_mpy64_acc < "mpyu", "-", 0b011, 0b000, 0, 0, 0>; -} + +// Complex multiply real or imaginary +// Rxx=cmpy[ir](Rs,Rt) +def M2_cmpyi_s0 : T_XTYPE_mpy64 < "cmpyi", 0b000, 0b001, 0, 0, 0>; +def M2_cmpyr_s0 : T_XTYPE_mpy64 < "cmpyr", 0b000, 0b010, 0, 0, 0>; + +// Rxx+=cmpy[ir](Rs,Rt) +def M2_cmaci_s0 : T_XTYPE_mpy64_acc < "cmpyi", "+", 0b000, 0b001, 0, 0, 0>; +def M2_cmacr_s0 : T_XTYPE_mpy64_acc < "cmpyr", "+", 0b000, 0b010, 0, 0, 0>; + +// Complex multiply +// Rdd=cmpy(Rs,Rt)[:<<]:sat +def M2_cmpys_s0 : T_XTYPE_mpy64 < "cmpy", 0b000, 0b110, 1, 0, 0>; +def M2_cmpys_s1 : T_XTYPE_mpy64 < "cmpy", 0b100, 0b110, 1, 1, 0>; + +// Rdd=cmpy(Rs,Rt*)[:<<]:sat +def M2_cmpysc_s0 : T_XTYPE_mpy64 < "cmpy", 0b010, 0b110, 1, 0, 1>; +def M2_cmpysc_s1 : T_XTYPE_mpy64 < "cmpy", 0b110, 0b110, 1, 1, 1>; + +// Rxx[-+]=cmpy(Rs,Rt)[:<<1]:sat +def M2_cmacs_s0 : T_XTYPE_mpy64_acc < "cmpy", "+", 0b000, 0b110, 1, 0, 0>; +def M2_cnacs_s0 : T_XTYPE_mpy64_acc < "cmpy", "-", 0b000, 0b111, 1, 0, 0>; +def M2_cmacs_s1 : T_XTYPE_mpy64_acc < "cmpy", "+", 0b100, 0b110, 1, 1, 0>; +def M2_cnacs_s1 : T_XTYPE_mpy64_acc < "cmpy", "-", 0b100, 0b111, 1, 1, 0>; + +// Rxx[-+]=cmpy(Rs,Rt*)[:<<1]:sat +def M2_cmacsc_s0 : T_XTYPE_mpy64_acc < "cmpy", "+", 0b010, 0b110, 1, 0, 1>; +def M2_cnacsc_s0 : T_XTYPE_mpy64_acc < "cmpy", "-", 0b010, 0b111, 1, 0, 1>; +def M2_cmacsc_s1 : T_XTYPE_mpy64_acc < "cmpy", "+", 0b110, 0b110, 1, 1, 1>; +def M2_cnacsc_s1 : T_XTYPE_mpy64_acc < "cmpy", "-", 0b110, 0b111, 1, 1, 1>; + +// Vector multiply halfwords +// Rdd=vmpyh(Rs,Rt)[:<<]:sat +//let Defs = [USR_OVF] in { + def M2_vmpy2s_s1 : T_XTYPE_mpy64 < "vmpyh", 0b100, 0b101, 1, 1, 0>; + def M2_vmpy2s_s0 : T_XTYPE_mpy64 < "vmpyh", 0b000, 0b101, 1, 0, 0>; +//} + +// Rxx+=vmpyh(Rs,Rt)[:<<1][:sat] +def M2_vmac2 : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b001, 0b001, 0, 0, 0>; +def M2_vmac2s_s1 : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b100, 0b101, 1, 1, 0>; +def M2_vmac2s_s0 : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b000, 0b101, 1, 0, 0>; def: Pat<(i64 (mul (i64 (anyext (i32 IntRegs:$src1))), (i64 (anyext (i32 IntRegs:$src2))))), @@ -2750,35 +3376,30 @@ multiclass ST_PostInc<string mnemonic, string BaseOp, RegisterClass RC, } } -let accessSize = ByteAccess, isCodeGenOnly = 0 in +let accessSize = ByteAccess in defm storerb: ST_PostInc <"memb", "STrib", IntRegs, s4_0Imm, 0b1000>; -let accessSize = HalfWordAccess, isCodeGenOnly = 0 in +let accessSize = HalfWordAccess in defm storerh: ST_PostInc <"memh", "STrih", IntRegs, s4_1Imm, 0b1010>; -let accessSize = WordAccess, isCodeGenOnly = 0 in +let accessSize = WordAccess in defm storeri: ST_PostInc <"memw", "STriw", IntRegs, s4_2Imm, 0b1100>; -let accessSize = DoubleWordAccess, isCodeGenOnly = 0 in +let accessSize = DoubleWordAccess in defm storerd: ST_PostInc <"memd", "STrid", DoubleRegs, s4_3Imm, 0b1110>; -let accessSize = HalfWordAccess, isNVStorable = 0, isCodeGenOnly = 0 in +let accessSize = HalfWordAccess, isNVStorable = 0 in defm storerf: ST_PostInc <"memh", "STrih_H", IntRegs, s4_1Imm, 0b1011, 1>; -def : Pat<(post_truncsti8 (i32 IntRegs:$src1), IntRegs:$src2, - s4_3ImmPred:$offset), - (S2_storerb_pi IntRegs:$src2, s4_0ImmPred:$offset, IntRegs:$src1)>; +class Storepi_pat<PatFrag Store, PatFrag Value, PatFrag Offset, + InstHexagon MI> + : Pat<(Store Value:$src1, I32:$src2, Offset:$offset), + (MI I32:$src2, imm:$offset, Value:$src1)>; -def : Pat<(post_truncsti16 (i32 IntRegs:$src1), IntRegs:$src2, - s4_3ImmPred:$offset), - (S2_storerh_pi IntRegs:$src2, s4_1ImmPred:$offset, IntRegs:$src1)>; - -def : Pat<(post_store (i32 IntRegs:$src1), IntRegs:$src2, s4_2ImmPred:$offset), - (S2_storeri_pi IntRegs:$src2, s4_1ImmPred:$offset, IntRegs:$src1)>; - -def : Pat<(post_store (i64 DoubleRegs:$src1), IntRegs:$src2, - s4_3ImmPred:$offset), - (S2_storerd_pi IntRegs:$src2, s4_3ImmPred:$offset, DoubleRegs:$src1)>; +def: Storepi_pat<post_truncsti8, I32, s4_0ImmPred, S2_storerb_pi>; +def: Storepi_pat<post_truncsti16, I32, s4_1ImmPred, S2_storerh_pi>; +def: Storepi_pat<post_store, I32, s4_2ImmPred, S2_storeri_pi>; +def: Storepi_pat<post_store, I64, s4_3ImmPred, S2_storerd_pi>; //===----------------------------------------------------------------------===// // Template class for post increment stores with register offset. @@ -2805,14 +3426,13 @@ class T_store_pr <string mnemonic, RegisterClass RC, bits<3> MajOp, let Inst{7} = 0b0; } -let isCodeGenOnly = 0 in { def S2_storerb_pr : T_store_pr<"memb", IntRegs, 0b000, ByteAccess>; def S2_storerh_pr : T_store_pr<"memh", IntRegs, 0b010, HalfWordAccess>; def S2_storeri_pr : T_store_pr<"memw", IntRegs, 0b100, WordAccess>; def S2_storerd_pr : T_store_pr<"memd", DoubleRegs, 0b110, DoubleWordAccess>; def S2_storerf_pr : T_store_pr<"memh", IntRegs, 0b011, HalfWordAccess, 1>; -} + let opExtendable = 1, isExtentSigned = 1, isPredicable = 1 in class T_store_io <string mnemonic, RegisterClass RC, Operand ImmOp, bits<3>MajOp, bit isH = 0> @@ -2906,7 +3526,7 @@ multiclass ST_Idxd<string mnemonic, string CextOp, RegisterClass RC, } } -let addrMode = BaseImmOffset, InputType = "imm", isCodeGenOnly = 0 in { +let addrMode = BaseImmOffset, InputType = "imm" in { let accessSize = ByteAccess in defm storerb: ST_Idxd < "memb", "STrib", IntRegs, s11_0Ext, u6_0Ext, 0b000>; @@ -2925,42 +3545,102 @@ let addrMode = BaseImmOffset, InputType = "imm", isCodeGenOnly = 0 in { u6_1Ext, 0b011, 1>; } -def : Pat<(truncstorei8 (i32 IntRegs:$src1), ADDRriS11_0:$addr), - (S2_storerb_io AddrFI:$addr, 0, (i32 IntRegs:$src1))>; - -def : Pat<(truncstorei16 (i32 IntRegs:$src1), ADDRriS11_1:$addr), - (S2_storerh_io AddrFI:$addr, 0, (i32 IntRegs:$src1))>; - -def : Pat<(store (i32 IntRegs:$src1), ADDRriS11_2:$addr), - (S2_storeri_io AddrFI:$addr, 0, (i32 IntRegs:$src1))>; - -def : Pat<(store (i64 DoubleRegs:$src1), ADDRriS11_3:$addr), - (S2_storerd_io AddrFI:$addr, 0, (i64 DoubleRegs:$src1))>; +// Patterns for generating stores, where the address takes different forms: +// - frameindex, +// - frameindex + offset, +// - base + offset, +// - simple (base address without offset). +// These would usually be used together (via Storex_pat defined below), but +// in some cases one may want to apply different properties (such as +// AddedComplexity) to the individual patterns. +class Storex_fi_pat<PatFrag Store, PatFrag Value, InstHexagon MI> + : Pat<(Store Value:$Rs, AddrFI:$fi), (MI AddrFI:$fi, 0, Value:$Rs)>; +class Storex_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred, + InstHexagon MI> + : Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)), + (MI AddrFI:$fi, imm:$Off, Value:$Rs)>; +class Storex_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred, + InstHexagon MI> + : Pat<(Store Value:$Rt, (add (i32 IntRegs:$Rs), ImmPred:$Off)), + (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>; +class Storex_simple_pat<PatFrag Store, PatFrag Value, InstHexagon MI> + : Pat<(Store Value:$Rt, (i32 IntRegs:$Rs)), + (MI IntRegs:$Rs, 0, Value:$Rt)>; + +// Patterns for generating stores, where the address takes different forms, +// and where the value being stored is transformed through the value modifier +// ValueMod. The address forms are same as above. +class Storexm_fi_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod, + InstHexagon MI> + : Pat<(Store Value:$Rs, AddrFI:$fi), + (MI AddrFI:$fi, 0, (ValueMod Value:$Rs))>; +class Storexm_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred, + PatFrag ValueMod, InstHexagon MI> + : Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)), + (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>; +class Storexm_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred, + PatFrag ValueMod, InstHexagon MI> + : Pat<(Store Value:$Rt, (add (i32 IntRegs:$Rs), ImmPred:$Off)), + (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>; +class Storexm_simple_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod, + InstHexagon MI> + : Pat<(Store Value:$Rt, (i32 IntRegs:$Rs)), + (MI IntRegs:$Rs, 0, (ValueMod Value:$Rt))>; + +multiclass Storex_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred, + InstHexagon MI> { + def: Storex_fi_pat <Store, Value, MI>; + def: Storex_fi_add_pat <Store, Value, ImmPred, MI>; + def: Storex_add_pat <Store, Value, ImmPred, MI>; +} + +multiclass Storexm_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred, + PatFrag ValueMod, InstHexagon MI> { + def: Storexm_fi_pat <Store, Value, ValueMod, MI>; + def: Storexm_fi_add_pat <Store, Value, ImmPred, ValueMod, MI>; + def: Storexm_add_pat <Store, Value, ImmPred, ValueMod, MI>; +} + +// Regular stores in the DAG have two operands: value and address. +// Atomic stores also have two, but they are reversed: address, value. +// To use atomic stores with the patterns, they need to have their operands +// swapped. This relies on the knowledge that the F.Fragment uses names +// "ptr" and "val". +class SwapSt<PatFrag F> + : PatFrag<(ops node:$val, node:$ptr), F.Fragment>; +let AddedComplexity = 20 in { + defm: Storex_pat<truncstorei8, I32, s32_0ImmPred, S2_storerb_io>; + defm: Storex_pat<truncstorei16, I32, s31_1ImmPred, S2_storerh_io>; + defm: Storex_pat<store, I32, s30_2ImmPred, S2_storeri_io>; + defm: Storex_pat<store, I64, s29_3ImmPred, S2_storerd_io>; -let AddedComplexity = 10 in { -def : Pat<(truncstorei8 (i32 IntRegs:$src1), (add IntRegs:$src2, - s11_0ExtPred:$offset)), - (S2_storerb_io IntRegs:$src2, s11_0ImmPred:$offset, - (i32 IntRegs:$src1))>; + defm: Storex_pat<SwapSt<atomic_store_8>, I32, s32_0ImmPred, S2_storerb_io>; + defm: Storex_pat<SwapSt<atomic_store_16>, I32, s31_1ImmPred, S2_storerh_io>; + defm: Storex_pat<SwapSt<atomic_store_32>, I32, s30_2ImmPred, S2_storeri_io>; + defm: Storex_pat<SwapSt<atomic_store_64>, I64, s29_3ImmPred, S2_storerd_io>; +} -def : Pat<(truncstorei16 (i32 IntRegs:$src1), (add IntRegs:$src2, - s11_1ExtPred:$offset)), - (S2_storerh_io IntRegs:$src2, s11_1ImmPred:$offset, - (i32 IntRegs:$src1))>; +// Simple patterns should be tried with the least priority. +def: Storex_simple_pat<truncstorei8, I32, S2_storerb_io>; +def: Storex_simple_pat<truncstorei16, I32, S2_storerh_io>; +def: Storex_simple_pat<store, I32, S2_storeri_io>; +def: Storex_simple_pat<store, I64, S2_storerd_io>; -def : Pat<(store (i32 IntRegs:$src1), (add IntRegs:$src2, - s11_2ExtPred:$offset)), - (S2_storeri_io IntRegs:$src2, s11_2ImmPred:$offset, - (i32 IntRegs:$src1))>; +def: Storex_simple_pat<SwapSt<atomic_store_8>, I32, S2_storerb_io>; +def: Storex_simple_pat<SwapSt<atomic_store_16>, I32, S2_storerh_io>; +def: Storex_simple_pat<SwapSt<atomic_store_32>, I32, S2_storeri_io>; +def: Storex_simple_pat<SwapSt<atomic_store_64>, I64, S2_storerd_io>; -def : Pat<(store (i64 DoubleRegs:$src1), (add IntRegs:$src2, - s11_3ExtPred:$offset)), - (S2_storerd_io IntRegs:$src2, s11_3ImmPred:$offset, - (i64 DoubleRegs:$src1))>; +let AddedComplexity = 20 in { + defm: Storexm_pat<truncstorei8, I64, s32_0ImmPred, LoReg, S2_storerb_io>; + defm: Storexm_pat<truncstorei16, I64, s31_1ImmPred, LoReg, S2_storerh_io>; + defm: Storexm_pat<truncstorei32, I64, s30_2ImmPred, LoReg, S2_storeri_io>; } -// memh(Rx++#s4:1)=Rt.H +def: Storexm_simple_pat<truncstorei8, I64, LoReg, S2_storerb_io>; +def: Storexm_simple_pat<truncstorei16, I64, LoReg, S2_storerh_io>; +def: Storexm_simple_pat<truncstorei32, I64, LoReg, S2_storeri_io>; // Store predicate. let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13, @@ -2971,7 +3651,7 @@ def STriw_pred : STInst<(outs), // S2_allocframe: Allocate stack frame. let Defs = [R29, R30], Uses = [R29, R31, R30], - hasSideEffects = 0, accessSize = DoubleWordAccess, isCodeGenOnly = 0 in + hasSideEffects = 0, accessSize = DoubleWordAccess in def S2_allocframe: ST0Inst < (outs), (ins u11_3Imm:$u11_3), "allocframe(#$u11_3)" > { @@ -3015,7 +3695,6 @@ class T_store_pci <string mnemonic, RegisterClass RC, let Inst{1} = 0b0; } -let isCodeGenOnly = 0 in { def S2_storerb_pci : T_store_pci<"memb", IntRegs, s4_0Imm, 0b1000, ByteAccess>; def S2_storerh_pci : T_store_pci<"memh", IntRegs, s4_1Imm, 0b1010, @@ -3026,12 +3705,64 @@ def S2_storeri_pci : T_store_pci<"memw", IntRegs, s4_2Imm, 0b1100, WordAccess>; def S2_storerd_pci : T_store_pci<"memd", DoubleRegs, s4_3Imm, 0b1110, DoubleWordAccess>; -} + +let Uses = [CS], isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 4 in +class T_storenew_pci <string mnemonic, Operand Imm, + bits<2>MajOp, MemAccessSize AlignSize> + : NVInst < (outs IntRegs:$_dst_), + (ins IntRegs:$Rz, Imm:$offset, ModRegs:$Mu, IntRegs:$Nt), + #mnemonic#"($Rz ++ #$offset:circ($Mu)) = $Nt.new", + [], + "$Rz = $_dst_"> { + bits<5> Rz; + bits<6> offset; + bits<1> Mu; + bits<3> Nt; + + let accessSize = AlignSize; + + let IClass = 0b1010; + let Inst{27-21} = 0b1001101; + let Inst{20-16} = Rz; + let Inst{13} = Mu; + let Inst{12-11} = MajOp; + let Inst{10-8} = Nt; + let Inst{7} = 0b0; + let Inst{6-3} = + !if (!eq(!cast<string>(AlignSize), "WordAccess"), offset{5-2}, + !if (!eq(!cast<string>(AlignSize), "HalfWordAccess"), offset{4-1}, + /* ByteAccess */ offset{3-0})); + let Inst{1} = 0b0; + } + +def S2_storerbnew_pci : T_storenew_pci <"memb", s4_0Imm, 0b00, ByteAccess>; +def S2_storerhnew_pci : T_storenew_pci <"memh", s4_1Imm, 0b01, HalfWordAccess>; +def S2_storerinew_pci : T_storenew_pci <"memw", s4_2Imm, 0b10, WordAccess>; + +//===----------------------------------------------------------------------===// +// Circular stores - Pseudo +// +// Please note that the input operand order in the pseudo instructions +// doesn't match with the real instructions. Pseudo instructions operand +// order should mimics the ordering in the intrinsics. +//===----------------------------------------------------------------------===// +let isCodeGenOnly = 1, mayStore = 1, hasSideEffects = 0, isPseudo = 1 in +class T_store_pci_pseudo <string opc, RegisterClass RC> + : STInstPI<(outs IntRegs:$_dst_), + (ins IntRegs:$src1, RC:$src2, IntRegs:$src3, s4Imm:$src4), + ".error \""#opc#"($src1++#$src4:circ($src3)) = $src2\"", + [], "$_dst_ = $src1">; + +def S2_storerb_pci_pseudo : T_store_pci_pseudo <"memb", IntRegs>; +def S2_storerh_pci_pseudo : T_store_pci_pseudo <"memh", IntRegs>; +def S2_storerf_pci_pseudo : T_store_pci_pseudo <"memh", IntRegs>; +def S2_storeri_pci_pseudo : T_store_pci_pseudo <"memw", IntRegs>; +def S2_storerd_pci_pseudo : T_store_pci_pseudo <"memd", DoubleRegs>; //===----------------------------------------------------------------------===// // Circular stores with auto-increment register //===----------------------------------------------------------------------===// -let Uses = [CS], isNVStorable = 1, isCodeGenOnly = 0 in +let Uses = [CS], isNVStorable = 1 in class T_store_pcr <string mnemonic, RegisterClass RC, bits<4>MajOp, MemAccessSize AlignSize, string RegSrc = "Rt"> : STInst <(outs IntRegs:$_dst_), @@ -3055,14 +3786,43 @@ class T_store_pcr <string mnemonic, RegisterClass RC, bits<4>MajOp, let Inst{1} = 0b1; } -let isCodeGenOnly = 0 in { def S2_storerb_pcr : T_store_pcr<"memb", IntRegs, 0b1000, ByteAccess>; def S2_storerh_pcr : T_store_pcr<"memh", IntRegs, 0b1010, HalfWordAccess>; def S2_storeri_pcr : T_store_pcr<"memw", IntRegs, 0b1100, WordAccess>; def S2_storerd_pcr : T_store_pcr<"memd", DoubleRegs, 0b1110, DoubleWordAccess>; def S2_storerf_pcr : T_store_pcr<"memh", IntRegs, 0b1011, HalfWordAccess, "Rt.h">; -} + +//===----------------------------------------------------------------------===// +// Circular .new stores with auto-increment register +//===----------------------------------------------------------------------===// +let Uses = [CS], isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 3 in +class T_storenew_pcr <string mnemonic, bits<2>MajOp, + MemAccessSize AlignSize> + : NVInst <(outs IntRegs:$_dst_), + (ins IntRegs:$Rz, ModRegs:$Mu, IntRegs:$Nt), + #mnemonic#"($Rz ++ I:circ($Mu)) = $Nt.new" , + [] , + "$Rz = $_dst_"> { + bits<5> Rz; + bits<1> Mu; + bits<3> Nt; + + let accessSize = AlignSize; + + let IClass = 0b1010; + let Inst{27-21} = 0b1001101; + let Inst{20-16} = Rz; + let Inst{13} = Mu; + let Inst{12-11} = MajOp; + let Inst{10-8} = Nt; + let Inst{7} = 0b0; + let Inst{1} = 0b1; + } + +def S2_storerbnew_pcr : T_storenew_pcr <"memb", 0b00, ByteAccess>; +def S2_storerhnew_pcr : T_storenew_pcr <"memh", 0b01, HalfWordAccess>; +def S2_storerinew_pcr : T_storenew_pcr <"memw", 0b10, WordAccess>; //===----------------------------------------------------------------------===// // Bit-reversed stores with auto-increment register @@ -3093,7 +3853,7 @@ class T_store_pbr<string mnemonic, RegisterClass RC, let Inst{12-8} = src; } -let isNVStorable = 1, isCodeGenOnly = 0 in { +let isNVStorable = 1 in { let BaseOpcode = "S2_storerb_pbr" in def S2_storerb_pbr : T_store_pbr<"memb", IntRegs, ByteAccess, 0b000>, NewValueRel; @@ -3104,28 +3864,71 @@ let isNVStorable = 1, isCodeGenOnly = 0 in { def S2_storeri_pbr : T_store_pbr<"memw", IntRegs, WordAccess, 0b100>, NewValueRel; } -let isCodeGenOnly = 0 in { + def S2_storerf_pbr : T_store_pbr<"memh", IntRegs, HalfWordAccess, 0b011, 1>; def S2_storerd_pbr : T_store_pbr<"memd", DoubleRegs, DoubleWordAccess, 0b110>; -} //===----------------------------------------------------------------------===// -// ST - +// Bit-reversed .new stores with auto-increment register //===----------------------------------------------------------------------===// +let isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 3, + hasSideEffects = 0 in +class T_storenew_pbr<string mnemonic, MemAccessSize addrSize, bits<2> majOp> + : NVInst <(outs IntRegs:$_dst_), + (ins IntRegs:$Rz, ModRegs:$Mu, IntRegs:$Nt), + #mnemonic#"($Rz ++ $Mu:brev) = $Nt.new", [], + "$Rz = $_dst_">, NewValueRel { + let accessSize = addrSize; + bits<5> Rz; + bits<1> Mu; + bits<3> Nt; + + let IClass = 0b1010; + + let Inst{27-21} = 0b1111101; + let Inst{12-11} = majOp; + let Inst{7} = 0b0; + let Inst{20-16} = Rz; + let Inst{13} = Mu; + let Inst{10-8} = Nt; + } + +let BaseOpcode = "S2_storerb_pbr" in +def S2_storerbnew_pbr : T_storenew_pbr<"memb", ByteAccess, 0b00>; + +let BaseOpcode = "S2_storerh_pbr" in +def S2_storerhnew_pbr : T_storenew_pbr<"memh", HalfWordAccess, 0b01>; + +let BaseOpcode = "S2_storeri_pbr" in +def S2_storerinew_pbr : T_storenew_pbr<"memw", WordAccess, 0b10>; //===----------------------------------------------------------------------===// -// STYPE/ALU + +// Bit-reversed stores - Pseudo +// +// Please note that the input operand order in the pseudo instructions +// doesn't match with the real instructions. Pseudo instructions operand +// order should mimics the ordering in the intrinsics. //===----------------------------------------------------------------------===// -// Logical NOT. -def NOT_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1), - "$dst = not($src1)", - [(set (i64 DoubleRegs:$dst), (not (i64 DoubleRegs:$src1)))]>; +let isCodeGenOnly = 1, mayStore = 1, hasSideEffects = 0, isPseudo = 1 in +class T_store_pbr_pseudo <string opc, RegisterClass RC> + : STInstPI<(outs IntRegs:$_dst_), + (ins IntRegs:$src1, RC:$src2, IntRegs:$src3), + ".error \""#opc#"($src1++$src3:brev) = $src2\"", + [], "$_dst_ = $src1">; +def S2_storerb_pbr_pseudo : T_store_pbr_pseudo <"memb", IntRegs>; +def S2_storerh_pbr_pseudo : T_store_pbr_pseudo <"memh", IntRegs>; +def S2_storeri_pbr_pseudo : T_store_pbr_pseudo <"memw", IntRegs>; +def S2_storerf_pbr_pseudo : T_store_pbr_pseudo <"memh", IntRegs>; +def S2_storerd_pbr_pseudo : T_store_pbr_pseudo <"memd", DoubleRegs>; //===----------------------------------------------------------------------===// -// STYPE/ALU - +// ST - //===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// Template class for S_2op instructions. +//===----------------------------------------------------------------------===// let hasSideEffects = 0 in class T_S2op_1 <string mnemonic, bits<4> RegTyBits, RegisterClass RCOut, RegisterClass RCIn, bits<2> MajOp, bits<3> MinOp, bit isSat> @@ -3156,26 +3959,62 @@ let hasNewValue = 1 in class T_S2op_1_ii <string mnemonic, bits<2> MajOp, bits<3> MinOp, bit isSat = 0> : T_S2op_1 <mnemonic, 0b1100, IntRegs, IntRegs, MajOp, MinOp, isSat>; +// Vector sign/zero extend +let isReMaterializable = 1, isAsCheapAsAMove = 1 in { + def S2_vsxtbh : T_S2op_1_di <"vsxtbh", 0b00, 0b000>; + def S2_vsxthw : T_S2op_1_di <"vsxthw", 0b00, 0b100>; + def S2_vzxtbh : T_S2op_1_di <"vzxtbh", 0b00, 0b010>; + def S2_vzxthw : T_S2op_1_di <"vzxthw", 0b00, 0b110>; +} + +// Vector splat bytes/halfwords +let isReMaterializable = 1, isAsCheapAsAMove = 1 in { + def S2_vsplatrb : T_S2op_1_ii <"vsplatb", 0b01, 0b111>; + def S2_vsplatrh : T_S2op_1_di <"vsplath", 0b01, 0b010>; +} + // Sign extend word to doubleword -let isCodeGenOnly = 0 in def A2_sxtw : T_S2op_1_di <"sxtw", 0b01, 0b000>; def: Pat <(i64 (sext I32:$src)), (A2_sxtw I32:$src)>; +// Vector saturate and pack +let Defs = [USR_OVF] in { + def S2_svsathb : T_S2op_1_ii <"vsathb", 0b10, 0b000>; + def S2_svsathub : T_S2op_1_ii <"vsathub", 0b10, 0b010>; + def S2_vsathb : T_S2op_1_id <"vsathb", 0b00, 0b110>; + def S2_vsathub : T_S2op_1_id <"vsathub", 0b00, 0b000>; + def S2_vsatwh : T_S2op_1_id <"vsatwh", 0b00, 0b010>; + def S2_vsatwuh : T_S2op_1_id <"vsatwuh", 0b00, 0b100>; +} + +// Vector truncate +def S2_vtrunohb : T_S2op_1_id <"vtrunohb", 0b10, 0b000>; +def S2_vtrunehb : T_S2op_1_id <"vtrunehb", 0b10, 0b010>; + // Swizzle the bytes of a word -let isCodeGenOnly = 0 in def A2_swiz : T_S2op_1_ii <"swiz", 0b10, 0b111>; // Saturate -let Defs = [USR_OVF], isCodeGenOnly = 0 in { +let Defs = [USR_OVF] in { def A2_sat : T_S2op_1_id <"sat", 0b11, 0b000>; def A2_satb : T_S2op_1_ii <"satb", 0b11, 0b111>; def A2_satub : T_S2op_1_ii <"satub", 0b11, 0b110>; def A2_sath : T_S2op_1_ii <"sath", 0b11, 0b100>; def A2_satuh : T_S2op_1_ii <"satuh", 0b11, 0b101>; + def A2_roundsat : T_S2op_1_id <"round", 0b11, 0b001, 0b1>; } -let Itinerary = S_2op_tc_2_SLOT23, isCodeGenOnly = 0 in { +let Itinerary = S_2op_tc_2_SLOT23 in { + // Vector round and pack + def S2_vrndpackwh : T_S2op_1_id <"vrndwh", 0b10, 0b100>; + + let Defs = [USR_OVF] in + def S2_vrndpackwhs : T_S2op_1_id <"vrndwh", 0b10, 0b110, 1>; + + // Bit reverse + def S2_brev : T_S2op_1_ii <"brev", 0b01, 0b110>; + // Absolute value word def A2_abs : T_S2op_1_ii <"abs", 0b10, 0b100>; @@ -3227,7 +4066,7 @@ class T_S2op_2_di <string mnemonic, bits<3> MajOp, bits<3> MinOp> let hasNewValue = 1 in class T_S2op_2_id <string mnemonic, bits<3> MajOp, bits<3> MinOp> : T_S2op_2 <mnemonic, 0b1000, IntRegs, DoubleRegs, MajOp, MinOp, 0, 0>; - + let hasNewValue = 1 in class T_S2op_2_ii <string mnemonic, bits<3> MajOp, bits<3> MinOp, bit isSat = 0, bit isRnd = 0, list<dag> pattern = []> @@ -3239,21 +4078,33 @@ class T_S2op_shift <string mnemonic, bits<3> MajOp, bits<3> MinOp, SDNode OpNd> [(set (i32 IntRegs:$dst), (OpNd (i32 IntRegs:$src), (u5ImmPred:$u5)))]>; +// Vector arithmetic shift right by immediate with truncate and pack +def S2_asr_i_svw_trun : T_S2op_2_id <"vasrw", 0b110, 0b010>; + // Arithmetic/logical shift right/left by immediate -let Itinerary = S_2op_tc_1_SLOT23, isCodeGenOnly = 0 in { +let Itinerary = S_2op_tc_1_SLOT23 in { def S2_asr_i_r : T_S2op_shift <"asr", 0b000, 0b000, sra>; def S2_lsr_i_r : T_S2op_shift <"lsr", 0b000, 0b001, srl>; def S2_asl_i_r : T_S2op_shift <"asl", 0b000, 0b010, shl>; } // Shift left by immediate with saturation -let Defs = [USR_OVF], isCodeGenOnly = 0 in +let Defs = [USR_OVF] in def S2_asl_i_r_sat : T_S2op_2_ii <"asl", 0b010, 0b010, 1>; // Shift right with round -let isCodeGenOnly = 0 in def S2_asr_i_r_rnd : T_S2op_2_ii <"asr", 0b010, 0b000, 0, 1>; +let isAsmParserOnly = 1 in +def S2_asr_i_r_rnd_goodsyntax + : SInst <(outs IntRegs:$dst), (ins IntRegs:$src, u5Imm:$u5), + "$dst = asrrnd($src, #$u5)", + [], "", S_2op_tc_1_SLOT23>; + +let isAsmParserOnly = 1 in +def A2_not: ALU32_rr<(outs IntRegs:$dst),(ins IntRegs:$src), + "$dst = not($src)">; + def: Pat<(i32 (sra (i32 (add (i32 (sra I32:$src1, u5ImmPred:$src2)), (i32 1))), (i32 1))), @@ -3272,17 +4123,34 @@ class T_S2op_3<string opc, bits<2>MajOp, bits<3>minOp, bits<1> sat = 0> let Inst{4-0} = Rdd; } -let isCodeGenOnly = 0 in { def A2_absp : T_S2op_3 <"abs", 0b10, 0b110>; def A2_negp : T_S2op_3 <"neg", 0b10, 0b101>; def A2_notp : T_S2op_3 <"not", 0b10, 0b100>; -} // Innterleave/deinterleave -let isCodeGenOnly = 0 in { def S2_interleave : T_S2op_3 <"interleave", 0b11, 0b101>; def S2_deinterleave : T_S2op_3 <"deinterleave", 0b11, 0b100>; -} + +// Vector Complex conjugate +def A2_vconj : T_S2op_3 <"vconj", 0b10, 0b111, 1>; + +// Vector saturate without pack +def S2_vsathb_nopack : T_S2op_3 <"vsathb", 0b00, 0b111>; +def S2_vsathub_nopack : T_S2op_3 <"vsathub", 0b00, 0b100>; +def S2_vsatwh_nopack : T_S2op_3 <"vsatwh", 0b00, 0b110>; +def S2_vsatwuh_nopack : T_S2op_3 <"vsatwuh", 0b00, 0b101>; + +// Vector absolute value halfwords with and without saturation +// Rdd64=vabsh(Rss64)[:sat] +def A2_vabsh : T_S2op_3 <"vabsh", 0b01, 0b100>; +def A2_vabshsat : T_S2op_3 <"vabsh", 0b01, 0b101, 1>; + +// Vector absolute value words with and without saturation +def A2_vabsw : T_S2op_3 <"vabsw", 0b01, 0b110>; +def A2_vabswsat : T_S2op_3 <"vabsw", 0b01, 0b111, 1>; + +def : Pat<(not (i64 DoubleRegs:$src1)), + (A2_notp DoubleRegs:$src1)>; //===----------------------------------------------------------------------===// // STYPE/BIT + @@ -3313,7 +4181,6 @@ class T_COUNT_LEADING_64<string MnOp, bits<3> MajOp, bits<3> MinOp> : T_COUNT_LEADING<MnOp, MajOp, MinOp, 0b0, (outs IntRegs:$Rd), (ins DoubleRegs:$Rs)>; -let isCodeGenOnly = 0 in { def S2_cl0 : T_COUNT_LEADING_32<"cl0", 0b000, 0b101>; def S2_cl1 : T_COUNT_LEADING_32<"cl1", 0b000, 0b110>; def S2_ct0 : T_COUNT_LEADING_32<"ct0", 0b010, 0b100>; @@ -3323,14 +4190,28 @@ def S2_cl1p : T_COUNT_LEADING_64<"cl1", 0b010, 0b100>; def S2_clb : T_COUNT_LEADING_32<"clb", 0b000, 0b100>; def S2_clbp : T_COUNT_LEADING_64<"clb", 0b010, 0b000>; def S2_clbnorm : T_COUNT_LEADING_32<"normamt", 0b000, 0b111>; -} -def: Pat<(i32 (ctlz I32:$Rs)), (S2_cl0 I32:$Rs)>; -def: Pat<(i32 (ctlz (not I32:$Rs))), (S2_cl1 I32:$Rs)>; -def: Pat<(i32 (cttz I32:$Rs)), (S2_ct0 I32:$Rs)>; -def: Pat<(i32 (cttz (not I32:$Rs))), (S2_ct1 I32:$Rs)>; -def: Pat<(i32 (trunc (ctlz I64:$Rss))), (S2_cl0p I64:$Rss)>; +// Count leading zeros. +def: Pat<(i32 (ctlz I32:$Rs)), (S2_cl0 I32:$Rs)>; +def: Pat<(i32 (trunc (ctlz I64:$Rss))), (S2_cl0p I64:$Rss)>; +def: Pat<(i32 (ctlz_zero_undef I32:$Rs)), (S2_cl0 I32:$Rs)>; +def: Pat<(i32 (trunc (ctlz_zero_undef I64:$Rss))), (S2_cl0p I64:$Rss)>; + +// Count trailing zeros: 32-bit. +def: Pat<(i32 (cttz I32:$Rs)), (S2_ct0 I32:$Rs)>; +def: Pat<(i32 (cttz_zero_undef I32:$Rs)), (S2_ct0 I32:$Rs)>; + +// Count leading ones. +def: Pat<(i32 (ctlz (not I32:$Rs))), (S2_cl1 I32:$Rs)>; def: Pat<(i32 (trunc (ctlz (not I64:$Rss)))), (S2_cl1p I64:$Rss)>; +def: Pat<(i32 (ctlz_zero_undef (not I32:$Rs))), (S2_cl1 I32:$Rs)>; +def: Pat<(i32 (trunc (ctlz_zero_undef (not I64:$Rss)))), (S2_cl1p I64:$Rss)>; + +// Count trailing ones: 32-bit. +def: Pat<(i32 (cttz (not I32:$Rs))), (S2_ct1 I32:$Rs)>; +def: Pat<(i32 (cttz_zero_undef (not I32:$Rs))), (S2_ct1 I32:$Rs)>; + +// The 64-bit counts leading/trailing are defined in HexagonInstrInfoV4.td. // Bit set/clear/toggle @@ -3365,14 +4246,12 @@ class T_SCT_BIT_REG<string MnOp, bits<2> MinOp> let Inst{4-0} = Rd; } -let isCodeGenOnly = 0 in { def S2_clrbit_i : T_SCT_BIT_IMM<"clrbit", 0b001>; def S2_setbit_i : T_SCT_BIT_IMM<"setbit", 0b000>; def S2_togglebit_i : T_SCT_BIT_IMM<"togglebit", 0b010>; def S2_clrbit_r : T_SCT_BIT_REG<"clrbit", 0b01>; def S2_setbit_r : T_SCT_BIT_REG<"setbit", 0b00>; def S2_togglebit_r : T_SCT_BIT_REG<"togglebit", 0b10>; -} def: Pat<(i32 (and (i32 IntRegs:$Rs), (not (shl 1, u5ImmPred:$u5)))), (S2_clrbit_i IntRegs:$Rs, u5ImmPred:$u5)>; @@ -3422,10 +4301,8 @@ class T_TEST_BIT_REG<string MnOp, bit IsNeg> let Inst{1-0} = Pd; } -let isCodeGenOnly = 0 in { def S2_tstbit_i : T_TEST_BIT_IMM<"tstbit", 0b000>; def S2_tstbit_r : T_TEST_BIT_REG<"tstbit", 0>; -} let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm. def: Pat<(i1 (setne (and (shl 1, u5ImmPred:$u5), (i32 IntRegs:$Rs)), 0)), @@ -3437,6 +4314,7 @@ let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm. def: Pat<(i1 (trunc (i64 DoubleRegs:$Rs))), (S2_tstbit_i (LoReg DoubleRegs:$Rs), 0)>; } + let hasSideEffects = 0 in class T_TEST_BITS_IMM<string MnOp, bits<2> MajOp, bit IsNeg> : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, u6Imm:$u6), @@ -3471,11 +4349,9 @@ class T_TEST_BITS_REG<string MnOp, bits<2> MajOp, bit IsNeg> let Inst{1-0} = Pd; } -let isCodeGenOnly = 0 in { def C2_bitsclri : T_TEST_BITS_IMM<"bitsclr", 0b10, 0>; def C2_bitsclr : T_TEST_BITS_REG<"bitsclr", 0b10, 0>; def C2_bitsset : T_TEST_BITS_REG<"bitsset", 0b01, 0>; -} let AddedComplexity = 20 in { // Complexity greater than compare reg-imm. def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), u6ImmPred:$u6), 0)), @@ -3503,6 +4379,14 @@ def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), IntRegs:$Rt)), // XTYPE/PERM + //===----------------------------------------------------------------------===// +def: Pat<(or (or (shl (or (shl (i32 (extloadi8 (add (i32 IntRegs:$b), 3))), + (i32 8)), + (i32 (zextloadi8 (add (i32 IntRegs:$b), 2)))), + (i32 16)), + (shl (i32 (zextloadi8 (add (i32 IntRegs:$b), 1))), (i32 8))), + (zextloadi8 (i32 IntRegs:$b))), + (A2_swiz (L2_loadri_io IntRegs:$b, 0))>; + //===----------------------------------------------------------------------===// // XTYPE/PERM - //===----------------------------------------------------------------------===// @@ -3512,7 +4396,7 @@ def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), IntRegs:$Rt)), //===----------------------------------------------------------------------===// // Predicate transfer. -let hasSideEffects = 0, hasNewValue = 1, isCodeGenOnly = 0 in +let hasSideEffects = 0, hasNewValue = 1 in def C2_tfrpr : SInst<(outs IntRegs:$Rd), (ins PredRegs:$Ps), "$Rd = $Ps", [], "", S_2op_tc_1_SLOT23> { bits<5> Rd; @@ -3526,7 +4410,7 @@ def C2_tfrpr : SInst<(outs IntRegs:$Rd), (ins PredRegs:$Ps), } // Transfer general register to predicate. -let hasSideEffects = 0, isCodeGenOnly = 0 in +let hasSideEffects = 0 in def C2_tfrrp: SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs), "$Pd = $Rs", [], "", S_2op_tc_2early_SLOT23> { bits<2> Pd; @@ -3538,6 +4422,27 @@ def C2_tfrrp: SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs), let Inst{1-0} = Pd; } +let hasSideEffects = 0, isCodeGenOnly = 1 in +def C2_pxfer_map: SInst<(outs PredRegs:$dst), (ins PredRegs:$src), + "$dst = $src">; + + +// Patterns for loads of i1: +def: Pat<(i1 (load AddrFI:$fi)), + (C2_tfrrp (L2_loadrub_io AddrFI:$fi, 0))>; +def: Pat<(i1 (load (add (i32 IntRegs:$Rs), s32ImmPred:$Off))), + (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, imm:$Off))>; +def: Pat<(i1 (load (i32 IntRegs:$Rs))), + (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, 0))>; + +def I1toI32: OutPatFrag<(ops node:$Rs), + (C2_muxii (i1 $Rs), 1, 0)>; + +def I32toI1: OutPatFrag<(ops node:$Rs), + (i1 (C2_tfrrp (i32 $Rs)))>; + +defm: Storexm_pat<store, I1, s32ImmPred, I1toI32, S2_storerb_io>; +def: Storexm_simple_pat<store, I1, I1toI32, S2_storerb_io>; //===----------------------------------------------------------------------===// // STYPE/PRED - @@ -3570,15 +4475,12 @@ class S_2OpInstImmI6<string Mnemonic, SDNode OpNode, bits<3>MinOp> } // Shift by immediate. -let isCodeGenOnly = 0 in { def S2_asr_i_p : S_2OpInstImmI6<"asr", sra, 0b000>; def S2_asl_i_p : S_2OpInstImmI6<"asl", shl, 0b010>; def S2_lsr_i_p : S_2OpInstImmI6<"lsr", srl, 0b001>; -} // Shift left by small amount and add. -let AddedComplexity = 100, hasNewValue = 1, hasSideEffects = 0, - isCodeGenOnly = 0 in +let AddedComplexity = 100, hasNewValue = 1, hasSideEffects = 0 in def S2_addasl_rrri: SInst <(outs IntRegs:$Rd), (ins IntRegs:$Rt, IntRegs:$Rs, u3Imm:$u3), "$Rd = addasl($Rt, $Rs, #$u3)" , @@ -3627,8 +4529,8 @@ def S2_addasl_rrri: SInst <(outs IntRegs:$Rd), //===----------------------------------------------------------------------===// def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDTNone, [SDNPHasChain]>; -let hasSideEffects = 1, isSoloAX = 1, isCodeGenOnly = 0 in -def BARRIER : SYSInst<(outs), (ins), +let hasSideEffects = 1, isSoloAX = 1 in +def Y2_barrier : SYSInst<(outs), (ins), "barrier", [(HexagonBARRIER)],"",ST_tc_st_SLOT0> { let Inst{31-28} = 0b1010; @@ -3638,6 +4540,20 @@ def BARRIER : SYSInst<(outs), (ins), //===----------------------------------------------------------------------===// // SYSTEM/SUPER - //===----------------------------------------------------------------------===// + +// Generate frameindex addresses. The main reason for the offset operand is +// that every instruction that is allowed to have frame index as an operand +// will then have that operand followed by an immediate operand (the offset). +// This simplifies the frame-index elimination code. +// +let isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1, + isPseudo = 1, isCodeGenOnly = 1, hasSideEffects = 0 in { + def TFR_FI : ALU32_ri<(outs IntRegs:$Rd), + (ins IntRegs:$fi, s32Imm:$off), "">; + def TFR_FIA : ALU32_ri<(outs IntRegs:$Rd), + (ins IntRegs:$Rs, IntRegs:$fi, s32Imm:$off), "">; +} + //===----------------------------------------------------------------------===// // CRUSER - Type. //===----------------------------------------------------------------------===// @@ -3683,14 +4599,19 @@ class LOOP_rBase<string mnemonic, Operand brOp, bit mustExtend = 0> multiclass LOOP_ri<string mnemonic> { def i : LOOP_iBase<mnemonic, brtarget>; def r : LOOP_rBase<mnemonic, brtarget>; + + let isCodeGenOnly = 1, isExtended = 1, opExtendable = 0 in { + def iext: LOOP_iBase<mnemonic, brtargetExt, 1>; + def rext: LOOP_rBase<mnemonic, brtargetExt, 1>; + } } -let Defs = [SA0, LC0, USR], isCodeGenOnly = 0 in +let Defs = [SA0, LC0, USR] in defm J2_loop0 : LOOP_ri<"loop0">; // Interestingly only loop0's appear to set usr.lpcfg -let Defs = [SA1, LC1], isCodeGenOnly = 0 in +let Defs = [SA1, LC1] in defm J2_loop1 : LOOP_ri<"loop1">; let isBranch = 1, isTerminator = 1, hasSideEffects = 0, @@ -3751,12 +4672,40 @@ multiclass SPLOOP_ri<string mnemonic, bits<2> op> { def r : SPLOOP_rBase<mnemonic, op>; } -let isCodeGenOnly = 0 in { defm J2_ploop1s : SPLOOP_ri<"1", 0b01>; defm J2_ploop2s : SPLOOP_ri<"2", 0b10>; defm J2_ploop3s : SPLOOP_ri<"3", 0b11>; + +// if (Rs[!>=<]=#0) jump:[t/nt] +let Defs = [PC], isPredicated = 1, isBranch = 1, hasSideEffects = 0, + hasSideEffects = 0 in +class J2_jump_0_Base<string compare, bit isTak, bits<2> op> + : CRInst <(outs), (ins IntRegs:$Rs, brtarget:$r13_2), + "if ($Rs"#compare#"#0) jump"#!if(isTak, ":t", ":nt")#" $r13_2" > { + bits<5> Rs; + bits<15> r13_2; + + let IClass = 0b0110; + + let Inst{27-24} = 0b0001; + let Inst{23-22} = op; + let Inst{12} = isTak; + let Inst{21} = r13_2{14}; + let Inst{20-16} = Rs; + let Inst{11-1} = r13_2{12-2}; + let Inst{13} = r13_2{13}; + } + +multiclass J2_jump_compare_0<string compare, bits<2> op> { + def NAME : J2_jump_0_Base<compare, 0, op>; + def NAME#pt : J2_jump_0_Base<compare, 1, op>; } +defm J2_jumprz : J2_jump_compare_0<"!=", 0b00>; +defm J2_jumprgtez : J2_jump_compare_0<">=", 0b01>; +defm J2_jumprnz : J2_jump_compare_0<"==", 0b10>; +defm J2_jumprltez : J2_jump_compare_0<"<=", 0b11>; + // Transfer to/from Control/GPR Guest/GPR let hasSideEffects = 0 in class TFR_CR_RS_base<RegisterClass CTRC, RegisterClass RC, bit isDouble> @@ -3773,8 +4722,9 @@ class TFR_CR_RS_base<RegisterClass CTRC, RegisterClass RC, bit isDouble> let Inst{20-16} = src; let Inst{4-0} = dst; } -let isCodeGenOnly = 0 in + def A2_tfrrcr : TFR_CR_RS_base<CtrRegs, IntRegs, 0b0>; +def A4_tfrpcp : TFR_CR_RS_base<CtrRegs64, DoubleRegs, 0b1>; def : InstAlias<"m0 = $Rs", (A2_tfrrcr C6, IntRegs:$Rs)>; def : InstAlias<"m1 = $Rs", (A2_tfrrcr C7, IntRegs:$Rs)>; @@ -3794,13 +4744,14 @@ class TFR_RD_CR_base<RegisterClass RC, RegisterClass CTRC, bit isSingle> let Inst{4-0} = dst; } -let hasNewValue = 1, opNewValue = 0, isCodeGenOnly = 0 in +let hasNewValue = 1, opNewValue = 0 in def A2_tfrcrr : TFR_RD_CR_base<IntRegs, CtrRegs, 1>; +def A4_tfrcpp : TFR_RD_CR_base<DoubleRegs, CtrRegs64, 0>; def : InstAlias<"$Rd = m0", (A2_tfrcrr IntRegs:$Rd, C6)>; def : InstAlias<"$Rd = m1", (A2_tfrcrr IntRegs:$Rd, C7)>; // Y4_trace: Send value to etm trace. -let isSoloAX = 1, hasSideEffects = 0, isCodeGenOnly = 0 in +let isSoloAX = 1, hasSideEffects = 0 in def Y4_trace: CRInst <(outs), (ins IntRegs:$Rs), "trace($Rs)"> { bits<5> Rs; @@ -3810,350 +4761,201 @@ def Y4_trace: CRInst <(outs), (ins IntRegs:$Rs), let Inst{20-16} = Rs; } -let AddedComplexity = 100, isPredicated = 1 in -def TFR_condset_ri : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, s12Imm:$src3), - "Error; should not emit", - [(set (i32 IntRegs:$dst), - (i32 (select (i1 PredRegs:$src1), (i32 IntRegs:$src2), - s12ImmPred:$src3)))]>; - -let AddedComplexity = 100, isPredicated = 1 in -def TFR_condset_ir : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, s12Imm:$src2, IntRegs:$src3), - "Error; should not emit", - [(set (i32 IntRegs:$dst), - (i32 (select (i1 PredRegs:$src1), s12ImmPred:$src2, - (i32 IntRegs:$src3))))]>; - -let AddedComplexity = 100, isPredicated = 1 in -def TFR_condset_ii : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, s12Imm:$src2, s12Imm:$src3), - "Error; should not emit", - [(set (i32 IntRegs:$dst), - (i32 (select (i1 PredRegs:$src1), s12ImmPred:$src2, - s12ImmPred:$src3)))]>; - -// Generate frameindex addresses. -let isReMaterializable = 1 in -def TFR_FI : ALU32_ri<(outs IntRegs:$dst), (ins FrameIndex:$src1), - "$dst = add($src1)", - [(set (i32 IntRegs:$dst), ADDRri:$src1)]>; - // Support for generating global address. // Taken from X86InstrInfo.td. -def SDTHexagonCONST32 : SDTypeProfile<1, 1, [ - SDTCisVT<0, i32>, - SDTCisVT<1, i32>, - SDTCisPtrTy<0>]>; -def HexagonCONST32 : SDNode<"HexagonISD::CONST32", SDTHexagonCONST32>; -def HexagonCONST32_GP : SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>; +def SDTHexagonCONST32 : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, + SDTCisVT<1, i32>, + SDTCisPtrTy<0>]>; +def HexagonCONST32 : SDNode<"HexagonISD::CONST32", SDTHexagonCONST32>; +def HexagonCONST32_GP : SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>; // HI/LO Instructions -let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0 in -def LO : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global), - "$dst.l = #LO($global)", - []>; - -let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0 in -def HI : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global), - "$dst.h = #HI($global)", - []>; +let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0, + hasNewValue = 1, opNewValue = 0 in +class REG_IMMED<string RegHalf, string Op, bit Rs, bits<3> MajOp, bit MinOp> + : ALU32_ri<(outs IntRegs:$dst), + (ins i32imm:$imm_value), + "$dst"#RegHalf#" = #"#Op#"($imm_value)", []> { + bits<5> dst; + bits<32> imm_value; + let IClass = 0b0111; -let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0 in -def LOi : ALU32_ri<(outs IntRegs:$dst), (ins i32imm:$imm_value), - "$dst.l = #LO($imm_value)", - []>; + let Inst{27} = Rs; + let Inst{26-24} = MajOp; + let Inst{21} = MinOp; + let Inst{20-16} = dst; + let Inst{23-22} = !if (!eq(Op, "LO"), imm_value{15-14}, imm_value{31-30}); + let Inst{13-0} = !if (!eq(Op, "LO"), imm_value{13-0}, imm_value{29-16}); +} +let isAsmParserOnly = 1 in { + def LO : REG_IMMED<".l", "LO", 0b0, 0b001, 0b1>; + def LO_H : REG_IMMED<".l", "HI", 0b0, 0b001, 0b1>; + def HI : REG_IMMED<".h", "HI", 0b0, 0b010, 0b1>; + def HI_L : REG_IMMED<".h", "LO", 0b0, 0b010, 0b1>; +} -let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0 in -def HIi : ALU32_ri<(outs IntRegs:$dst), (ins i32imm:$imm_value), - "$dst.h = #HI($imm_value)", - []>; +let isMoveImm = 1, isCodeGenOnly = 1 in +def LO_PIC : ALU32_ri<(outs IntRegs:$dst), (ins bblabel:$label), + "$dst.l = #LO($label@GOTREL)", + []>; -let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0 in -def LO_jt : ALU32_ri<(outs IntRegs:$dst), (ins jumptablebase:$jt), - "$dst.l = #LO($jt)", - []>; +let isMoveImm = 1, isCodeGenOnly = 1 in +def HI_PIC : ALU32_ri<(outs IntRegs:$dst), (ins bblabel:$label), + "$dst.h = #HI($label@GOTREL)", + []>; -let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0 in -def HI_jt : ALU32_ri<(outs IntRegs:$dst), (ins jumptablebase:$jt), - "$dst.h = #HI($jt)", - []>; +let isReMaterializable = 1, isMoveImm = 1, + isCodeGenOnly = 1, hasSideEffects = 0 in +def HI_GOT : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global), + "$dst.h = #HI($global@GOT)", + []>; +let isReMaterializable = 1, isMoveImm = 1, + isCodeGenOnly = 1, hasSideEffects = 0 in +def LO_GOT : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global), + "$dst.l = #LO($global@GOT)", + []>; -let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0 in -def LO_label : ALU32_ri<(outs IntRegs:$dst), (ins bblabel:$label), - "$dst.l = #LO($label)", - []>; +let isReMaterializable = 1, isMoveImm = 1, + isCodeGenOnly = 1, hasSideEffects = 0 in +def HI_GOTREL : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global), + "$dst.h = #HI($global@GOTREL)", + []>; -let isReMaterializable = 1, isMoveImm = 1 , hasSideEffects = 0 in -def HI_label : ALU32_ri<(outs IntRegs:$dst), (ins bblabel:$label), - "$dst.h = #HI($label)", - []>; +let isReMaterializable = 1, isMoveImm = 1, + isCodeGenOnly = 1, hasSideEffects = 0 in +def LO_GOTREL : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global), + "$dst.l = #LO($global@GOTREL)", + []>; // This pattern is incorrect. When we add small data, we should change // this pattern to use memw(#foo). // This is for sdata. -let isMoveImm = 1 in -def CONST32 : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global), +let isMoveImm = 1, isAsmParserOnly = 1 in +def CONST32 : CONSTLDInst<(outs IntRegs:$dst), (ins globaladdress:$global), "$dst = CONST32(#$global)", [(set (i32 IntRegs:$dst), (load (HexagonCONST32 tglobaltlsaddr:$global)))]>; -// This is for non-sdata. -let isReMaterializable = 1, isMoveImm = 1 in -def CONST32_set : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global), - "$dst = CONST32(#$global)", - [(set (i32 IntRegs:$dst), - (HexagonCONST32 tglobaladdr:$global))]>; - -let isReMaterializable = 1, isMoveImm = 1 in -def CONST32_set_jt : LDInst2<(outs IntRegs:$dst), (ins jumptablebase:$jt), - "$dst = CONST32(#$jt)", - [(set (i32 IntRegs:$dst), - (HexagonCONST32 tjumptable:$jt))]>; - -let isReMaterializable = 1, isMoveImm = 1 in -def CONST32GP_set : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global), - "$dst = CONST32(#$global)", - [(set (i32 IntRegs:$dst), - (HexagonCONST32_GP tglobaladdr:$global))]>; - -let isReMaterializable = 1, isMoveImm = 1 in -def CONST32_Int_Real : LDInst2<(outs IntRegs:$dst), (ins i32imm:$global), +let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in +def CONST32_Int_Real : CONSTLDInst<(outs IntRegs:$dst), (ins i32imm:$global), "$dst = CONST32(#$global)", [(set (i32 IntRegs:$dst), imm:$global) ]>; -// Map BlockAddress lowering to CONST32_Int_Real -def : Pat<(HexagonCONST32_GP tblockaddress:$addr), - (CONST32_Int_Real tblockaddress:$addr)>; +// Map TLS addressses to a CONST32 instruction +def: Pat<(HexagonCONST32 tglobaltlsaddr:$addr), (A2_tfrsi s16Ext:$addr)>; +def: Pat<(HexagonCONST32 bbl:$label), (A2_tfrsi s16Ext:$label)>; -let isReMaterializable = 1, isMoveImm = 1 in -def CONST32_Label : LDInst2<(outs IntRegs:$dst), (ins bblabel:$label), - "$dst = CONST32($label)", - [(set (i32 IntRegs:$dst), (HexagonCONST32 bbl:$label))]>; - -let isReMaterializable = 1, isMoveImm = 1 in -def CONST64_Int_Real : LDInst2<(outs DoubleRegs:$dst), (ins i64imm:$global), +let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in +def CONST64_Int_Real : CONSTLDInst<(outs DoubleRegs:$dst), (ins i64imm:$global), "$dst = CONST64(#$global)", - [(set (i64 DoubleRegs:$dst), imm:$global) ]>; + [(set (i64 DoubleRegs:$dst), imm:$global)]>; -def TFR_PdFalse : SInst<(outs PredRegs:$dst), (ins), - "$dst = xor($dst, $dst)", - [(set (i1 PredRegs:$dst), 0)]>; +let hasSideEffects = 0, isReMaterializable = 1, isPseudo = 1, + isCodeGenOnly = 1 in +def TFR_PdTrue : SInst<(outs PredRegs:$dst), (ins), "", + [(set (i1 PredRegs:$dst), 1)]>; -def MPY_trsext : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - "$dst = mpy($src1, $src2)", - [(set (i32 IntRegs:$dst), - (trunc (i64 (srl (i64 (mul (i64 (sext (i32 IntRegs:$src1))), - (i64 (sext (i32 IntRegs:$src2))))), - (i32 32)))))]>; +let hasSideEffects = 0, isReMaterializable = 1, isPseudo = 1, + isCodeGenOnly = 1 in +def TFR_PdFalse : SInst<(outs PredRegs:$dst), (ins), "$dst = xor($dst, $dst)", + [(set (i1 PredRegs:$dst), 0)]>; // Pseudo instructions. def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; - -def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, +def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>; -def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; - def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart, [SDNPHasChain, SDNPOutGlue]>; +def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; -def SDT_SPCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; - -def call : SDNode<"HexagonISD::CALL", SDT_SPCall, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +def SDT_SPCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; // For tailcalls a HexagonTCRet SDNode has 3 SDNode Properties - a chain, // Optional Flag and Variable Arguments. // Its 1 Operand has pointer type. -def HexagonTCRet : SDNode<"HexagonISD::TC_RETURN", SDT_SPCall, - [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +def HexagonTCRet : SDNode<"HexagonISD::TC_RETURN", SDT_SPCall, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; -let Defs = [R29, R30], Uses = [R31, R30, R29] in { - def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt), - "Should never be emitted", - [(callseq_start timm:$amt)]>; -} +let Defs = [R29, R30], Uses = [R31, R30, R29], isPseudo = 1 in +def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt), + ".error \"should not emit\" ", + [(callseq_start timm:$amt)]>; -let Defs = [R29, R30, R31], Uses = [R29] in { - def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), - "Should never be emitted", - [(callseq_end timm:$amt1, timm:$amt2)]>; -} -// Call subroutine. -let isCall = 1, hasSideEffects = 0, - Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, - R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in { - def CALL : JInst<(outs), (ins calltarget:$dst), - "call $dst", []>; -} +let Defs = [R29, R30, R31], Uses = [R29], isPseudo = 1 in +def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), + ".error \"should not emit\" ", + [(callseq_end timm:$amt1, timm:$amt2)]>; // Call subroutine indirectly. -let Defs = VolatileV3.Regs, isCodeGenOnly = 0 in +let Defs = VolatileV3.Regs in def J2_callr : JUMPR_MISC_CALLR<0, 1>; // Indirect tail-call. -let isCodeGenOnly = 1, isCall = 1, isReturn = 1 in -def TCRETURNR : T_JMPr; +let isPseudo = 1, isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0, + isTerminator = 1, isCodeGenOnly = 1 in +def TCRETURNr : T_JMPr; // Direct tail-calls. -let isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0, -isTerminator = 1, isCodeGenOnly = 1 in { - def TCRETURNtg : JInst<(outs), (ins calltarget:$dst), "jump $dst", - [], "", J_tc_2early_SLOT23>; - def TCRETURNtext : JInst<(outs), (ins calltarget:$dst), "jump $dst", - [], "", J_tc_2early_SLOT23>; -} - -// Map call instruction. -def : Pat<(call (i32 IntRegs:$dst)), - (J2_callr (i32 IntRegs:$dst))>, Requires<[HasV2TOnly]>; -def : Pat<(call tglobaladdr:$dst), - (CALL tglobaladdr:$dst)>, Requires<[HasV2TOnly]>; -def : Pat<(call texternalsym:$dst), - (CALL texternalsym:$dst)>, Requires<[HasV2TOnly]>; -//Tail calls. -def : Pat<(HexagonTCRet tglobaladdr:$dst), - (TCRETURNtg tglobaladdr:$dst)>; -def : Pat<(HexagonTCRet texternalsym:$dst), - (TCRETURNtext texternalsym:$dst)>; -def : Pat<(HexagonTCRet (i32 IntRegs:$dst)), - (TCRETURNR (i32 IntRegs:$dst))>; - -// Atomic load and store support -// 8 bit atomic load -def : Pat<(atomic_load_8 ADDRriS11_0:$src1), - (i32 (L2_loadrub_io AddrFI:$src1, 0))>; - -def : Pat<(atomic_load_8 (add (i32 IntRegs:$src1), s11_0ImmPred:$offset)), - (i32 (L2_loadrub_io (i32 IntRegs:$src1), s11_0ImmPred:$offset))>; - -// 16 bit atomic load -def : Pat<(atomic_load_16 ADDRriS11_1:$src1), - (i32 (L2_loadruh_io AddrFI:$src1, 0))>; - -def : Pat<(atomic_load_16 (add (i32 IntRegs:$src1), s11_1ImmPred:$offset)), - (i32 (L2_loadruh_io (i32 IntRegs:$src1), s11_1ImmPred:$offset))>; - -def : Pat<(atomic_load_32 ADDRriS11_2:$src1), - (i32 (L2_loadri_io AddrFI:$src1, 0))>; - -def : Pat<(atomic_load_32 (add (i32 IntRegs:$src1), s11_2ImmPred:$offset)), - (i32 (L2_loadri_io (i32 IntRegs:$src1), s11_2ImmPred:$offset))>; - -// 64 bit atomic load -def : Pat<(atomic_load_64 ADDRriS11_3:$src1), - (i64 (L2_loadrd_io AddrFI:$src1, 0))>; - -def : Pat<(atomic_load_64 (add (i32 IntRegs:$src1), s11_3ImmPred:$offset)), - (i64 (L2_loadrd_io (i32 IntRegs:$src1), s11_3ImmPred:$offset))>; - - -def : Pat<(atomic_store_8 ADDRriS11_0:$src2, (i32 IntRegs:$src1)), - (S2_storerb_io AddrFI:$src2, 0, (i32 IntRegs:$src1))>; - -def : Pat<(atomic_store_8 (add (i32 IntRegs:$src2), s11_0ImmPred:$offset), - (i32 IntRegs:$src1)), - (S2_storerb_io (i32 IntRegs:$src2), s11_0ImmPred:$offset, - (i32 IntRegs:$src1))>; +let isPseudo = 1, isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0, + isTerminator = 1, isCodeGenOnly = 1 in +def TCRETURNi : JInst<(outs), (ins calltarget:$dst), "", []>; - -def : Pat<(atomic_store_16 ADDRriS11_1:$src2, (i32 IntRegs:$src1)), - (S2_storerh_io AddrFI:$src2, 0, (i32 IntRegs:$src1))>; - -def : Pat<(atomic_store_16 (i32 IntRegs:$src1), - (add (i32 IntRegs:$src2), s11_1ImmPred:$offset)), - (S2_storerh_io (i32 IntRegs:$src2), s11_1ImmPred:$offset, - (i32 IntRegs:$src1))>; - -def : Pat<(atomic_store_32 ADDRriS11_2:$src2, (i32 IntRegs:$src1)), - (S2_storeri_io AddrFI:$src2, 0, (i32 IntRegs:$src1))>; - -def : Pat<(atomic_store_32 (add (i32 IntRegs:$src2), s11_2ImmPred:$offset), - (i32 IntRegs:$src1)), - (S2_storeri_io (i32 IntRegs:$src2), s11_2ImmPred:$offset, - (i32 IntRegs:$src1))>; - - - - -def : Pat<(atomic_store_64 ADDRriS11_3:$src2, (i64 DoubleRegs:$src1)), - (S2_storerd_io AddrFI:$src2, 0, (i64 DoubleRegs:$src1))>; - -def : Pat<(atomic_store_64 (add (i32 IntRegs:$src2), s11_3ImmPred:$offset), - (i64 DoubleRegs:$src1)), - (S2_storerd_io (i32 IntRegs:$src2), s11_3ImmPred:$offset, - (i64 DoubleRegs:$src1))>; +//Tail calls. +def: Pat<(HexagonTCRet tglobaladdr:$dst), + (TCRETURNi tglobaladdr:$dst)>; +def: Pat<(HexagonTCRet texternalsym:$dst), + (TCRETURNi texternalsym:$dst)>; +def: Pat<(HexagonTCRet (i32 IntRegs:$dst)), + (TCRETURNr IntRegs:$dst)>; // Map from r0 = and(r1, 65535) to r0 = zxth(r1) -def : Pat <(and (i32 IntRegs:$src1), 65535), - (A2_zxth (i32 IntRegs:$src1))>; +def: Pat<(and (i32 IntRegs:$src1), 65535), + (A2_zxth IntRegs:$src1)>; // Map from r0 = and(r1, 255) to r0 = zxtb(r1). -def : Pat <(and (i32 IntRegs:$src1), 255), - (A2_zxtb (i32 IntRegs:$src1))>; +def: Pat<(and (i32 IntRegs:$src1), 255), + (A2_zxtb IntRegs:$src1)>; // Map Add(p1, true) to p1 = not(p1). // Add(p1, false) should never be produced, // if it does, it got to be mapped to NOOP. -def : Pat <(add (i1 PredRegs:$src1), -1), - (C2_not (i1 PredRegs:$src1))>; +def: Pat<(add (i1 PredRegs:$src1), -1), + (C2_not PredRegs:$src1)>; // Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i). -def : Pat <(select (not (i1 PredRegs:$src1)), s8ImmPred:$src2, s8ImmPred:$src3), - (i32 (TFR_condset_ii (i1 PredRegs:$src1), s8ImmPred:$src3, - s8ImmPred:$src2))>; +def: Pat<(select (not (i1 PredRegs:$src1)), s8ImmPred:$src2, s32ImmPred:$src3), + (C2_muxii PredRegs:$src1, s32ImmPred:$src3, s8ImmPred:$src2)>; // Map from p0 = pnot(p0); r0 = select(p0, #i, r1) -// => r0 = TFR_condset_ri(p0, r1, #i) -def : Pat <(select (not (i1 PredRegs:$src1)), s12ImmPred:$src2, - (i32 IntRegs:$src3)), - (i32 (TFR_condset_ri (i1 PredRegs:$src1), (i32 IntRegs:$src3), - s12ImmPred:$src2))>; +// => r0 = C2_muxir(p0, r1, #i) +def: Pat<(select (not (i1 PredRegs:$src1)), s32ImmPred:$src2, + (i32 IntRegs:$src3)), + (C2_muxir PredRegs:$src1, IntRegs:$src3, s32ImmPred:$src2)>; // Map from p0 = pnot(p0); r0 = mux(p0, r1, #i) -// => r0 = TFR_condset_ir(p0, #i, r1) -def : Pat <(select (not (i1 PredRegs:$src1)), IntRegs:$src2, s12ImmPred:$src3), - (i32 (TFR_condset_ir (i1 PredRegs:$src1), s12ImmPred:$src3, - (i32 IntRegs:$src2)))>; +// => r0 = C2_muxri (p0, #i, r1) +def: Pat<(select (not (i1 PredRegs:$src1)), IntRegs:$src2, s32ImmPred:$src3), + (C2_muxri PredRegs:$src1, s32ImmPred:$src3, IntRegs:$src2)>; // Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump. -def : Pat <(brcond (not (i1 PredRegs:$src1)), bb:$offset), - (J2_jumpf (i1 PredRegs:$src1), bb:$offset)>; - -// Map from p2 = pnot(p2); p1 = and(p0, p2) => p1 = and(p0, !p2). -def : Pat <(and (i1 PredRegs:$src1), (not (i1 PredRegs:$src2))), - (i1 (C2_andn (i1 PredRegs:$src1), (i1 PredRegs:$src2)))>; - - -let AddedComplexity = 100 in -def : Pat <(i64 (zextloadi1 (HexagonCONST32 tglobaladdr:$global))), - (i64 (A2_combinew (A2_tfrsi 0), - (L2_loadrub_io (CONST32_set tglobaladdr:$global), 0)))>, - Requires<[NoV4T]>; - -// Map from i1 loads to 32 bits. This assumes that the i1* is byte aligned. -let AddedComplexity = 10 in -def : Pat <(i32 (zextloadi1 ADDRriS11_0:$addr)), - (i32 (A2_and (i32 (L2_loadrb_io AddrFI:$addr, 0)), (A2_tfrsi 0x1)))>; +def: Pat<(brcond (not (i1 PredRegs:$src1)), bb:$offset), + (J2_jumpf PredRegs:$src1, bb:$offset)>; // Map from Rdd = sign_extend_inreg(Rss, i32) -> Rdd = A2_sxtw(Rss.lo). -def : Pat <(i64 (sext_inreg (i64 DoubleRegs:$src1), i32)), - (i64 (A2_sxtw (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg))))>; +def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i32)), + (A2_sxtw (LoReg DoubleRegs:$src1))>; -// Map from Rdd = sign_extend_inreg(Rss, i16) -> Rdd = A2_sxtw(SXTH(Rss.lo)). -def : Pat <(i64 (sext_inreg (i64 DoubleRegs:$src1), i16)), - (i64 (A2_sxtw (i32 (A2_sxth (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), - subreg_loreg))))))>; +// Map from Rdd = sign_extend_inreg(Rss, i16) -> Rdd = A2_sxtw(A2_sxth(Rss.lo)). +def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i16)), + (A2_sxtw (A2_sxth (LoReg DoubleRegs:$src1)))>; -// Map from Rdd = sign_extend_inreg(Rss, i8) -> Rdd = A2_sxtw(SXTB(Rss.lo)). -def : Pat <(i64 (sext_inreg (i64 DoubleRegs:$src1), i8)), - (i64 (A2_sxtw (i32 (A2_sxtb (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), - subreg_loreg))))))>; +// Map from Rdd = sign_extend_inreg(Rss, i8) -> Rdd = A2_sxtw(A2_sxtb(Rss.lo)). +def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i8)), + (A2_sxtw (A2_sxtb (LoReg DoubleRegs:$src1)))>; // We want to prevent emitting pnot's as much as possible. // Map brcond with an unsupported setcc to a J2_jumpf. @@ -4166,144 +4968,68 @@ def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), s10ImmPred:$src2)), bb:$offset), (J2_jumpf (C2_cmpeqi (i32 IntRegs:$src1), s10ImmPred:$src2), bb:$offset)>; -def : Pat <(brcond (i1 (setne (i1 PredRegs:$src1), (i1 -1))), bb:$offset), - (J2_jumpf (i1 PredRegs:$src1), bb:$offset)>; +def: Pat<(brcond (i1 (setne (i1 PredRegs:$src1), (i1 -1))), bb:$offset), + (J2_jumpf PredRegs:$src1, bb:$offset)>; -def : Pat <(brcond (i1 (setne (i1 PredRegs:$src1), (i1 0))), bb:$offset), - (J2_jumpt (i1 PredRegs:$src1), bb:$offset)>; +def: Pat<(brcond (i1 (setne (i1 PredRegs:$src1), (i1 0))), bb:$offset), + (J2_jumpt PredRegs:$src1, bb:$offset)>; // cmp.lt(Rs, Imm) -> !cmp.ge(Rs, Imm) -> !cmp.gt(Rs, Imm-1) -def : Pat <(brcond (i1 (setlt (i32 IntRegs:$src1), s8ImmPred:$src2)), - bb:$offset), - (J2_jumpf (C2_cmpgti (i32 IntRegs:$src1), - (DEC_CONST_SIGNED s8ImmPred:$src2)), bb:$offset)>; - -// cmp.lt(r0, r1) -> cmp.gt(r1, r0) -def : Pat <(brcond (i1 (setlt (i32 IntRegs:$src1), (i32 IntRegs:$src2))), - bb:$offset), - (J2_jumpt (C2_cmpgt (i32 IntRegs:$src2), (i32 IntRegs:$src1)), bb:$offset)>; - -def : Pat <(brcond (i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), - bb:$offset), - (J2_jumpf (C2_cmpgtup (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)), - bb:$offset)>; - -def : Pat <(brcond (i1 (setule (i32 IntRegs:$src1), (i32 IntRegs:$src2))), - bb:$offset), - (J2_jumpf (C2_cmpgtu (i32 IntRegs:$src1), (i32 IntRegs:$src2)), - bb:$offset)>; - -def : Pat <(brcond (i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), - bb:$offset), - (J2_jumpf (C2_cmpgtup (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)), - bb:$offset)>; +def: Pat<(brcond (i1 (setlt (i32 IntRegs:$src1), s8ImmPred:$src2)), bb:$offset), + (J2_jumpf (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s8ImmPred:$src2)), + bb:$offset)>; // Map from a 64-bit select to an emulated 64-bit mux. // Hexagon does not support 64-bit MUXes; so emulate with combines. -def : Pat <(select (i1 PredRegs:$src1), (i64 DoubleRegs:$src2), - (i64 DoubleRegs:$src3)), - (i64 (A2_combinew (i32 (C2_mux (i1 PredRegs:$src1), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), - subreg_hireg)), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src3), - subreg_hireg)))), - (i32 (C2_mux (i1 PredRegs:$src1), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), - subreg_loreg)), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src3), - subreg_loreg))))))>; +def: Pat<(select (i1 PredRegs:$src1), (i64 DoubleRegs:$src2), + (i64 DoubleRegs:$src3)), + (A2_combinew (C2_mux PredRegs:$src1, (HiReg DoubleRegs:$src2), + (HiReg DoubleRegs:$src3)), + (C2_mux PredRegs:$src1, (LoReg DoubleRegs:$src2), + (LoReg DoubleRegs:$src3)))>; // Map from a 1-bit select to logical ops. // From LegalizeDAG.cpp: (B1 ? B2 : B3) <=> (B1 & B2)|(!B1&B3). -def : Pat <(select (i1 PredRegs:$src1), (i1 PredRegs:$src2), - (i1 PredRegs:$src3)), - (C2_or (C2_and (i1 PredRegs:$src1), (i1 PredRegs:$src2)), - (C2_and (C2_not (i1 PredRegs:$src1)), (i1 PredRegs:$src3)))>; - -// Map Pd = load(addr) -> Rs = load(addr); Pd = Rs. -def : Pat<(i1 (load ADDRriS11_2:$addr)), - (i1 (C2_tfrrp (i32 (L2_loadrb_io AddrFI:$addr, 0))))>; +def: Pat<(select (i1 PredRegs:$src1), (i1 PredRegs:$src2), (i1 PredRegs:$src3)), + (C2_or (C2_and PredRegs:$src1, PredRegs:$src2), + (C2_and (C2_not PredRegs:$src1), PredRegs:$src3))>; // Map for truncating from 64 immediates to 32 bit immediates. -def : Pat<(i32 (trunc (i64 DoubleRegs:$src))), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src), subreg_loreg))>; +def: Pat<(i32 (trunc (i64 DoubleRegs:$src))), + (LoReg DoubleRegs:$src)>; // Map for truncating from i64 immediates to i1 bit immediates. -def : Pat<(i1 (trunc (i64 DoubleRegs:$src))), - (i1 (C2_tfrrp (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src), - subreg_loreg))))>; - -// Map memb(Rs) = Rdd -> memb(Rs) = Rt. -def : Pat<(truncstorei8 (i64 DoubleRegs:$src), ADDRriS11_0:$addr), - (S2_storerb_io AddrFI:$addr, 0, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src), - subreg_loreg)))>; - -// Map memh(Rs) = Rdd -> memh(Rs) = Rt. -def : Pat<(truncstorei16 (i64 DoubleRegs:$src), ADDRriS11_0:$addr), - (S2_storerh_io AddrFI:$addr, 0, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src), - subreg_loreg)))>; -// Map memw(Rs) = Rdd -> memw(Rs) = Rt -def : Pat<(truncstorei32 (i64 DoubleRegs:$src), ADDRriS11_0:$addr), - (S2_storeri_io AddrFI:$addr, 0, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src), - subreg_loreg)))>; - -// Map memw(Rs) = Rdd -> memw(Rs) = Rt. -def : Pat<(truncstorei32 (i64 DoubleRegs:$src), ADDRriS11_0:$addr), - (S2_storeri_io AddrFI:$addr, 0, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src), - subreg_loreg)))>; - -// Map from i1 = constant<-1>; memw(addr) = i1 -> r0 = 1; memw(addr) = r0. -def : Pat<(store (i1 -1), ADDRriS11_2:$addr), - (S2_storerb_io AddrFI:$addr, 0, (A2_tfrsi 1))>; - - -// Map from i1 = constant<-1>; store i1 -> r0 = 1; store r0. -def : Pat<(store (i1 -1), ADDRriS11_2:$addr), - (S2_storerb_io AddrFI:$addr, 0, (A2_tfrsi 1))>; - -// Map from memb(Rs) = Pd -> Rt = mux(Pd, #0, #1); store Rt. -def : Pat<(store (i1 PredRegs:$src1), ADDRriS11_2:$addr), - (S2_storerb_io AddrFI:$addr, 0, (i32 (C2_muxii (i1 PredRegs:$src1), 1, 0)) )>; - -// Map Rdd = anyext(Rs) -> Rdd = A2_sxtw(Rs). -// Hexagon_TODO: We can probably use combine but that will cost 2 instructions. -// Better way to do this? -def : Pat<(i64 (anyext (i32 IntRegs:$src1))), - (i64 (A2_sxtw (i32 IntRegs:$src1)))>; - -// Map cmple -> cmpgt. +def: Pat<(i1 (trunc (i64 DoubleRegs:$src))), + (C2_tfrrp (LoReg DoubleRegs:$src))>; + // rs <= rt -> !(rs > rt). -def : Pat<(i1 (setle (i32 IntRegs:$src1), s10ExtPred:$src2)), - (i1 (C2_not (C2_cmpgti (i32 IntRegs:$src1), s10ExtPred:$src2)))>; +let AddedComplexity = 30 in +def: Pat<(i1 (setle (i32 IntRegs:$src1), s32ImmPred:$src2)), + (C2_not (C2_cmpgti IntRegs:$src1, s32ImmPred:$src2))>; // rs <= rt -> !(rs > rt). def : Pat<(i1 (setle (i32 IntRegs:$src1), (i32 IntRegs:$src2))), (i1 (C2_not (C2_cmpgt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))>; // Rss <= Rtt -> !(Rss > Rtt). -def : Pat<(i1 (setle (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), - (i1 (C2_not (C2_cmpgtp (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))))>; +def: Pat<(i1 (setle (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), + (C2_not (C2_cmpgtp DoubleRegs:$src1, DoubleRegs:$src2))>; // Map cmpne -> cmpeq. // Hexagon_TODO: We should improve on this. // rs != rt -> !(rs == rt). -def : Pat <(i1 (setne (i32 IntRegs:$src1), s10ExtPred:$src2)), - (i1 (C2_not(i1 (C2_cmpeqi (i32 IntRegs:$src1), s10ExtPred:$src2))))>; - -// Map cmpne(Rs) -> !cmpeqe(Rs). -// rs != rt -> !(rs == rt). -def : Pat <(i1 (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2))), - (i1 (C2_not (i1 (C2_cmpeq (i32 IntRegs:$src1), (i32 IntRegs:$src2)))))>; +let AddedComplexity = 30 in +def: Pat<(i1 (setne (i32 IntRegs:$src1), s32ImmPred:$src2)), + (C2_not (C2_cmpeqi IntRegs:$src1, s32ImmPred:$src2))>; // Convert setne back to xor for hexagon since we compute w/ pred registers. -def : Pat <(i1 (setne (i1 PredRegs:$src1), (i1 PredRegs:$src2))), - (i1 (C2_xor (i1 PredRegs:$src1), (i1 PredRegs:$src2)))>; +def: Pat<(i1 (setne (i1 PredRegs:$src1), (i1 PredRegs:$src2))), + (C2_xor PredRegs:$src1, PredRegs:$src2)>; // Map cmpne(Rss) -> !cmpew(Rss). // rs != rt -> !(rs == rt). -def : Pat <(i1 (setne (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), - (i1 (C2_not (i1 (C2_cmpeqp (i64 DoubleRegs:$src1), - (i64 DoubleRegs:$src2)))))>; +def: Pat<(i1 (setne (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), + (C2_not (C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2))>; // Map cmpge(Rs, Rt) -> !(cmpgt(Rs, Rt). // rs >= rt -> !(rt > rs). @@ -4311,366 +5037,120 @@ def : Pat <(i1 (setge (i32 IntRegs:$src1), (i32 IntRegs:$src2))), (i1 (C2_not (i1 (C2_cmpgt (i32 IntRegs:$src2), (i32 IntRegs:$src1)))))>; // cmpge(Rs, Imm) -> cmpgt(Rs, Imm-1) -def : Pat <(i1 (setge (i32 IntRegs:$src1), s8ExtPred:$src2)), - (i1 (C2_cmpgti (i32 IntRegs:$src1), (DEC_CONST_SIGNED s8ExtPred:$src2)))>; +let AddedComplexity = 30 in +def: Pat<(i1 (setge (i32 IntRegs:$src1), s32ImmPred:$src2)), + (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s32ImmPred:$src2))>; // Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss). // rss >= rtt -> !(rtt > rss). -def : Pat <(i1 (setge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), - (i1 (C2_not (i1 (C2_cmpgtp (i64 DoubleRegs:$src2), - (i64 DoubleRegs:$src1)))))>; +def: Pat<(i1 (setge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), + (C2_not (C2_cmpgtp DoubleRegs:$src2, DoubleRegs:$src1))>; // Map cmplt(Rs, Imm) -> !cmpge(Rs, Imm). // !cmpge(Rs, Imm) -> !cmpgt(Rs, Imm-1). // rs < rt -> !(rs >= rt). -def : Pat <(i1 (setlt (i32 IntRegs:$src1), s8ExtPred:$src2)), - (i1 (C2_not (C2_cmpgti (i32 IntRegs:$src1), (DEC_CONST_SIGNED s8ExtPred:$src2))))>; - -// Map cmplt(Rs, Rt) -> cmpgt(Rt, Rs). -// rs < rt -> rt > rs. -// We can let assembler map it, or we can do in the compiler itself. -def : Pat <(i1 (setlt (i32 IntRegs:$src1), (i32 IntRegs:$src2))), - (i1 (C2_cmpgt (i32 IntRegs:$src2), (i32 IntRegs:$src1)))>; - -// Map cmplt(Rss, Rtt) -> cmpgt(Rtt, Rss). -// rss < rtt -> (rtt > rss). -def : Pat <(i1 (setlt (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), - (i1 (C2_cmpgtp (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)))>; - -// Map from cmpltu(Rs, Rd) -> cmpgtu(Rd, Rs) -// rs < rt -> rt > rs. -// We can let assembler map it, or we can do in the compiler itself. -def : Pat <(i1 (setult (i32 IntRegs:$src1), (i32 IntRegs:$src2))), - (i1 (C2_cmpgtu (i32 IntRegs:$src2), (i32 IntRegs:$src1)))>; - -// Map from cmpltu(Rss, Rdd) -> cmpgtu(Rdd, Rss). -// rs < rt -> rt > rs. -def : Pat <(i1 (setult (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), - (i1 (C2_cmpgtup (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)))>; +let AddedComplexity = 30 in +def: Pat<(i1 (setlt (i32 IntRegs:$src1), s32ImmPred:$src2)), + (C2_not (C2_cmpgti IntRegs:$src1, + (DEC_CONST_SIGNED s32ImmPred:$src2)))>; // Generate cmpgeu(Rs, #0) -> cmpeq(Rs, Rs) -def : Pat <(i1 (setuge (i32 IntRegs:$src1), 0)), - (i1 (C2_cmpeq (i32 IntRegs:$src1), (i32 IntRegs:$src1)))>; +def: Pat<(i1 (setuge (i32 IntRegs:$src1), 0)), + (C2_cmpeq IntRegs:$src1, IntRegs:$src1)>; // Generate cmpgeu(Rs, #u8) -> cmpgtu(Rs, #u8 -1) -def : Pat <(i1 (setuge (i32 IntRegs:$src1), u8ExtPred:$src2)), - (i1 (C2_cmpgtui (i32 IntRegs:$src1), (DEC_CONST_UNSIGNED u8ExtPred:$src2)))>; +def: Pat<(i1 (setuge (i32 IntRegs:$src1), u32ImmPred:$src2)), + (C2_cmpgtui IntRegs:$src1, (DEC_CONST_UNSIGNED u32ImmPred:$src2))>; // Generate cmpgtu(Rs, #u9) -def : Pat <(i1 (setugt (i32 IntRegs:$src1), u9ExtPred:$src2)), - (i1 (C2_cmpgtui (i32 IntRegs:$src1), u9ExtPred:$src2))>; +def: Pat<(i1 (setugt (i32 IntRegs:$src1), u32ImmPred:$src2)), + (C2_cmpgtui IntRegs:$src1, u32ImmPred:$src2)>; // Map from Rs >= Rt -> !(Rt > Rs). // rs >= rt -> !(rt > rs). -def : Pat <(i1 (setuge (i32 IntRegs:$src1), (i32 IntRegs:$src2))), - (i1 (C2_not (C2_cmpgtu (i32 IntRegs:$src2), (i32 IntRegs:$src1))))>; - -// Map from Rs >= Rt -> !(Rt > Rs). -// rs >= rt -> !(rt > rs). -def : Pat <(i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), - (i1 (C2_not (C2_cmpgtup (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1))))>; - -// Map from cmpleu(Rs, Rt) -> !cmpgtu(Rs, Rt). -// Map from (Rs <= Rt) -> !(Rs > Rt). -def : Pat <(i1 (setule (i32 IntRegs:$src1), (i32 IntRegs:$src2))), - (i1 (C2_not (C2_cmpgtu (i32 IntRegs:$src1), (i32 IntRegs:$src2))))>; +def: Pat<(i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), + (C2_not (C2_cmpgtup DoubleRegs:$src2, DoubleRegs:$src1))>; // Map from cmpleu(Rss, Rtt) -> !cmpgtu(Rss, Rtt-1). // Map from (Rs <= Rt) -> !(Rs > Rt). -def : Pat <(i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), - (i1 (C2_not (C2_cmpgtup (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))))>; +def: Pat<(i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), + (C2_not (C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2))>; // Sign extends. // i1 -> i32 -def : Pat <(i32 (sext (i1 PredRegs:$src1))), - (i32 (C2_muxii (i1 PredRegs:$src1), -1, 0))>; +def: Pat<(i32 (sext (i1 PredRegs:$src1))), + (C2_muxii PredRegs:$src1, -1, 0)>; // i1 -> i64 -def : Pat <(i64 (sext (i1 PredRegs:$src1))), - (i64 (A2_combinew (A2_tfrsi -1), (C2_muxii (i1 PredRegs:$src1), -1, 0)))>; - -// Convert sign-extended load back to load and sign extend. -// i8 -> i64 -def: Pat <(i64 (sextloadi8 ADDRriS11_0:$src1)), - (i64 (A2_sxtw (L2_loadrb_io AddrFI:$src1, 0)))>; - -// Convert any-extended load back to load and sign extend. -// i8 -> i64 -def: Pat <(i64 (extloadi8 ADDRriS11_0:$src1)), - (i64 (A2_sxtw (L2_loadrb_io AddrFI:$src1, 0)))>; - -// Convert sign-extended load back to load and sign extend. -// i16 -> i64 -def: Pat <(i64 (sextloadi16 ADDRriS11_1:$src1)), - (i64 (A2_sxtw (L2_loadrh_io AddrFI:$src1, 0)))>; - -// Convert sign-extended load back to load and sign extend. -// i32 -> i64 -def: Pat <(i64 (sextloadi32 ADDRriS11_2:$src1)), - (i64 (A2_sxtw (L2_loadri_io AddrFI:$src1, 0)))>; - +def: Pat<(i64 (sext (i1 PredRegs:$src1))), + (A2_combinew (A2_tfrsi -1), (C2_muxii PredRegs:$src1, -1, 0))>; // Zero extends. // i1 -> i32 -def : Pat <(i32 (zext (i1 PredRegs:$src1))), - (i32 (C2_muxii (i1 PredRegs:$src1), 1, 0))>; - -// i1 -> i64 -def : Pat <(i64 (zext (i1 PredRegs:$src1))), - (i64 (A2_combinew (A2_tfrsi 0), (C2_muxii (i1 PredRegs:$src1), 1, 0)))>, - Requires<[NoV4T]>; - -// i32 -> i64 -def : Pat <(i64 (zext (i32 IntRegs:$src1))), - (i64 (A2_combinew (A2_tfrsi 0), (i32 IntRegs:$src1)))>, - Requires<[NoV4T]>; - -// i8 -> i64 -def: Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)), - (i64 (A2_combinew (A2_tfrsi 0), (L2_loadrub_io AddrFI:$src1, 0)))>, - Requires<[NoV4T]>; - -let AddedComplexity = 20 in -def: Pat <(i64 (zextloadi8 (add (i32 IntRegs:$src1), - s11_0ExtPred:$offset))), - (i64 (A2_combinew (A2_tfrsi 0), (L2_loadrub_io IntRegs:$src1, - s11_0ExtPred:$offset)))>, - Requires<[NoV4T]>; - -// i1 -> i64 -def: Pat <(i64 (zextloadi1 ADDRriS11_0:$src1)), - (i64 (A2_combinew (A2_tfrsi 0), (L2_loadrub_io AddrFI:$src1, 0)))>, - Requires<[NoV4T]>; - -let AddedComplexity = 20 in -def: Pat <(i64 (zextloadi1 (add (i32 IntRegs:$src1), - s11_0ExtPred:$offset))), - (i64 (A2_combinew (A2_tfrsi 0), (L2_loadrub_io IntRegs:$src1, - s11_0ExtPred:$offset)))>, - Requires<[NoV4T]>; - -// i16 -> i64 -def: Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)), - (i64 (A2_combinew (A2_tfrsi 0), (L2_loadruh_io AddrFI:$src1, 0)))>, - Requires<[NoV4T]>; - -let AddedComplexity = 20 in -def: Pat <(i64 (zextloadi16 (add (i32 IntRegs:$src1), - s11_1ExtPred:$offset))), - (i64 (A2_combinew (A2_tfrsi 0), (L2_loadruh_io IntRegs:$src1, - s11_1ExtPred:$offset)))>, - Requires<[NoV4T]>; - -// i32 -> i64 -def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)), - (i64 (A2_combinew (A2_tfrsi 0), (L2_loadri_io AddrFI:$src1, 0)))>, - Requires<[NoV4T]>; - -let AddedComplexity = 100 in -def: Pat <(i64 (zextloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))), - (i64 (A2_combinew (A2_tfrsi 0), (L2_loadri_io IntRegs:$src1, - s11_2ExtPred:$offset)))>, - Requires<[NoV4T]>; - -let AddedComplexity = 10 in -def: Pat <(i32 (zextloadi1 ADDRriS11_0:$src1)), - (i32 (L2_loadri_io AddrFI:$src1, 0))>; +def: Pat<(i32 (zext (i1 PredRegs:$src1))), + (C2_muxii PredRegs:$src1, 1, 0)>; // Map from Rs = Pd to Pd = mux(Pd, #1, #0) -def : Pat <(i32 (zext (i1 PredRegs:$src1))), - (i32 (C2_muxii (i1 PredRegs:$src1), 1, 0))>; - -// Map from Rs = Pd to Pd = mux(Pd, #1, #0) -def : Pat <(i32 (anyext (i1 PredRegs:$src1))), - (i32 (C2_muxii (i1 PredRegs:$src1), 1, 0))>; - -// Map from Rss = Pd to Rdd = A2_sxtw (mux(Pd, #1, #0)) -def : Pat <(i64 (anyext (i1 PredRegs:$src1))), - (i64 (A2_sxtw (i32 (C2_muxii (i1 PredRegs:$src1), 1, 0))))>; - - -let AddedComplexity = 100 in -def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh), - (i32 32))), - (i64 (zextloadi32 (i32 (add IntRegs:$src2, - s11_2ExtPred:$offset2)))))), - (i64 (A2_combinew (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg), - (L2_loadri_io IntRegs:$src2, - s11_2ExtPred:$offset2)))>; - -def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh), - (i32 32))), - (i64 (zextloadi32 ADDRriS11_2:$srcLow)))), - (i64 (A2_combinew (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg), - (L2_loadri_io AddrFI:$srcLow, 0)))>; - -def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh), - (i32 32))), - (i64 (zext (i32 IntRegs:$srcLow))))), - (i64 (A2_combinew (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg), - IntRegs:$srcLow))>; - -let AddedComplexity = 100 in -def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh), - (i32 32))), - (i64 (zextloadi32 (i32 (add IntRegs:$src2, - s11_2ExtPred:$offset2)))))), - (i64 (A2_combinew (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg), - (L2_loadri_io IntRegs:$src2, - s11_2ExtPred:$offset2)))>; - -def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh), - (i32 32))), - (i64 (zextloadi32 ADDRriS11_2:$srcLow)))), - (i64 (A2_combinew (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg), - (L2_loadri_io AddrFI:$srcLow, 0)))>; - -def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh), - (i32 32))), - (i64 (zext (i32 IntRegs:$srcLow))))), - (i64 (A2_combinew (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg), - IntRegs:$srcLow))>; - -// Any extended 64-bit load. -// anyext i32 -> i64 -def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)), - (i64 (A2_combinew (A2_tfrsi 0), (L2_loadri_io AddrFI:$src1, 0)))>, - Requires<[NoV4T]>; - -// When there is an offset we should prefer the pattern below over the pattern above. -// The complexity of the above is 13 (gleaned from HexagonGenDAGIsel.inc) -// So this complexity below is comfortably higher to allow for choosing the below. -// If this is not done then we generate addresses such as -// ******************************************** -// r1 = add (r0, #4) -// r1 = memw(r1 + #0) -// instead of -// r1 = memw(r0 + #4) -// ******************************************** -let AddedComplexity = 100 in -def: Pat <(i64 (extloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))), - (i64 (A2_combinew (A2_tfrsi 0), (L2_loadri_io IntRegs:$src1, - s11_2ExtPred:$offset)))>, - Requires<[NoV4T]>; - -// anyext i16 -> i64. -def: Pat <(i64 (extloadi16 ADDRriS11_2:$src1)), - (i64 (A2_combinew (A2_tfrsi 0), (L2_loadrh_io AddrFI:$src1, 0)))>, - Requires<[NoV4T]>; +def: Pat<(i32 (anyext (i1 PredRegs:$src1))), + (C2_muxii PredRegs:$src1, 1, 0)>; -let AddedComplexity = 20 in -def: Pat <(i64 (extloadi16 (add (i32 IntRegs:$src1), - s11_1ExtPred:$offset))), - (i64 (A2_combinew (A2_tfrsi 0), (L2_loadrh_io IntRegs:$src1, - s11_1ExtPred:$offset)))>, - Requires<[NoV4T]>; - -// Map from Rdd = zxtw(Rs) -> Rdd = combine(0, Rs). -def : Pat<(i64 (zext (i32 IntRegs:$src1))), - (i64 (A2_combinew (A2_tfrsi 0), (i32 IntRegs:$src1)))>, - Requires<[NoV4T]>; +// Map from Rss = Pd to Rdd = sxtw (mux(Pd, #1, #0)) +def: Pat<(i64 (anyext (i1 PredRegs:$src1))), + (A2_sxtw (C2_muxii PredRegs:$src1, 1, 0))>; // Multiply 64-bit unsigned and use upper result. def : Pat <(mulhu (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)), - (i64 - (M2_dpmpyuu_acc_s0 - (i64 - (A2_combinew - (A2_tfrsi 0), - (i32 - (EXTRACT_SUBREG - (i64 - (S2_lsr_i_p - (i64 - (M2_dpmpyuu_acc_s0 - (i64 - (M2_dpmpyuu_acc_s0 - (i64 - (A2_combinew (A2_tfrsi 0), - (i32 - (EXTRACT_SUBREG - (i64 - (S2_lsr_i_p - (i64 - (M2_dpmpyuu_s0 - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), - subreg_loreg)), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), - subreg_loreg)))), 32)), - subreg_loreg)))), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg)), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_loreg)))), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg)), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg)))), - 32)), subreg_loreg)))), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg)), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg))))>; - -// Multiply 64-bit signed and use upper result. -def : Pat <(mulhs (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)), - (i64 - (M2_dpmpyss_acc_s0 - (i64 - (A2_combinew (A2_tfrsi 0), - (i32 - (EXTRACT_SUBREG - (i64 - (S2_lsr_i_p - (i64 - (M2_dpmpyss_acc_s0 - (i64 - (M2_dpmpyss_acc_s0 - (i64 - (A2_combinew (A2_tfrsi 0), - (i32 - (EXTRACT_SUBREG - (i64 - (S2_lsr_i_p - (i64 - (M2_dpmpyuu_s0 - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), - subreg_loreg)), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), - subreg_loreg)))), 32)), - subreg_loreg)))), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg)), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_loreg)))), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg)), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg)))), - 32)), subreg_loreg)))), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg)), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg))))>; + (A2_addp + (M2_dpmpyuu_acc_s0 + (S2_lsr_i_p + (A2_addp + (M2_dpmpyuu_acc_s0 + (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $src1), (LoReg $src2)), 32), + (HiReg $src1), + (LoReg $src2)), + (A2_combinew (A2_tfrsi 0), + (LoReg (M2_dpmpyuu_s0 (LoReg $src1), (HiReg $src2))))), + 32), + (HiReg $src1), + (HiReg $src2)), + (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $src1), (HiReg $src2)), 32) +)>; // Hexagon specific ISD nodes. -//def SDTHexagonADJDYNALLOC : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>]>; -def SDTHexagonADJDYNALLOC : SDTypeProfile<1, 2, - [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; -def Hexagon_ADJDYNALLOC : SDNode<"HexagonISD::ADJDYNALLOC", - SDTHexagonADJDYNALLOC>; -// Needed to tag these instructions for stack layout. -let usesCustomInserter = 1 in -def ADJDYNALLOC : ALU32_ri<(outs IntRegs:$dst), (ins IntRegs:$src1, - s16Imm:$src2), - "$dst = add($src1, #$src2)", - [(set (i32 IntRegs:$dst), - (Hexagon_ADJDYNALLOC (i32 IntRegs:$src1), - s16ImmPred:$src2))]>; +def SDTHexagonALLOCA : SDTypeProfile<1, 2, + [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; +def HexagonALLOCA : SDNode<"HexagonISD::ALLOCA", SDTHexagonALLOCA, + [SDNPHasChain]>; + +// The reason for the custom inserter is to record all ALLOCA instructions +// in MachineFunctionInfo. +let Defs = [R29], isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 1, + usesCustomInserter = 1 in +def ALLOCA: ALU32Inst<(outs IntRegs:$Rd), + (ins IntRegs:$Rs, u32Imm:$A), "", + [(set (i32 IntRegs:$Rd), + (HexagonALLOCA (i32 IntRegs:$Rs), (i32 imm:$A)))]>; + +let isCodeGenOnly = 1, isPseudo = 1, Uses = [R30], hasSideEffects = 0 in +def ALIGNA : ALU32Inst<(outs IntRegs:$Rd), (ins u32Imm:$A), "", []>; def SDTHexagonARGEXTEND : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>; def Hexagon_ARGEXTEND : SDNode<"HexagonISD::ARGEXTEND", SDTHexagonARGEXTEND>; +let isCodeGenOnly = 1 in def ARGEXTEND : ALU32_rr <(outs IntRegs:$dst), (ins IntRegs:$src1), "$dst = $src1", [(set (i32 IntRegs:$dst), (Hexagon_ARGEXTEND (i32 IntRegs:$src1)))]>; let AddedComplexity = 100 in -def : Pat<(i32 (sext_inreg (Hexagon_ARGEXTEND (i32 IntRegs:$src1)), i16)), - (COPY (i32 IntRegs:$src1))>; +def: Pat<(i32 (sext_inreg (Hexagon_ARGEXTEND (i32 IntRegs:$src1)), i16)), + (i32 IntRegs:$src1)>; -def HexagonWrapperJT: SDNode<"HexagonISD::WrapperJT", SDTIntUnaryOp>; +def HexagonJT: SDNode<"HexagonISD::JT", SDTIntUnaryOp>; +def HexagonCP: SDNode<"HexagonISD::CP", SDTIntUnaryOp>; -def : Pat<(HexagonWrapperJT tjumptable:$dst), - (i32 (CONST32_set_jt tjumptable:$dst))>; +def: Pat<(HexagonJT tjumptable:$dst), (A2_tfrsi s16Ext:$dst)>; +def: Pat<(HexagonCP tconstpool:$dst), (A2_tfrsi s16Ext:$dst)>; // XTYPE/SHIFT // @@ -4820,7 +5300,6 @@ let AddedComplexity = 100 in defm _xacc : xtype_imm_base< opc1, "^= ", OpNode, xor, 0b100, minOp>; } -let isCodeGenOnly = 0 in { defm S2_asr : xtype_imm_acc<"asr", sra, 0b00>; defm S2_lsr : xtype_imm_acc<"lsr", srl, 0b01>, @@ -4828,7 +5307,6 @@ defm S2_lsr : xtype_imm_acc<"lsr", srl, 0b01>, defm S2_asl : xtype_imm_acc<"asl", shl, 0b10>, xtype_xor_imm_acc<"asl", shl, 0b10>; -} multiclass xtype_reg_acc_r<string opc1, SDNode OpNode, bits<2>minOp> { let AddedComplexity = 100 in @@ -4854,12 +5332,10 @@ multiclass xtype_reg_acc<string OpcStr, SDNode OpNode, bits<2> minOp > { defm _r_p : xtype_reg_acc_p <OpcStr, OpNode, minOp>; } -let isCodeGenOnly = 0 in { defm S2_asl : xtype_reg_acc<"asl", shl, 0b10>; defm S2_asr : xtype_reg_acc<"asr", sra, 0b00>; defm S2_lsr : xtype_reg_acc<"lsr", srl, 0b01>; defm S2_lsl : xtype_reg_acc<"lsl", shl, 0b11>; -} //===----------------------------------------------------------------------===// let hasSideEffects = 0 in @@ -4890,9 +5366,42 @@ class T_S3op_64 <string mnemonic, bits<2> MajOp, bits<3> MinOp, bit SwapOps, : T_S3op_1 <mnemonic, DoubleRegs, MajOp, MinOp, SwapOps, isSat, isRnd, hasShift>; -let isCodeGenOnly = 0 in +let Itinerary = S_3op_tc_1_SLOT23 in { + def S2_shuffeb : T_S3op_64 < "shuffeb", 0b00, 0b010, 0>; + def S2_shuffeh : T_S3op_64 < "shuffeh", 0b00, 0b110, 0>; + def S2_shuffob : T_S3op_64 < "shuffob", 0b00, 0b100, 1>; + def S2_shuffoh : T_S3op_64 < "shuffoh", 0b10, 0b000, 1>; + + def S2_vtrunewh : T_S3op_64 < "vtrunewh", 0b10, 0b010, 0>; + def S2_vtrunowh : T_S3op_64 < "vtrunowh", 0b10, 0b100, 0>; +} + def S2_lfsp : T_S3op_64 < "lfs", 0b10, 0b110, 0>; +let hasSideEffects = 0 in +class T_S3op_2 <string mnemonic, bits<3> MajOp, bit SwapOps> + : SInst < (outs DoubleRegs:$Rdd), + (ins DoubleRegs:$Rss, DoubleRegs:$Rtt, PredRegs:$Pu), + "$Rdd = "#mnemonic#"($Rss, $Rtt, $Pu)", + [], "", S_3op_tc_1_SLOT23 > { + bits<5> Rdd; + bits<5> Rss; + bits<5> Rtt; + bits<2> Pu; + + let IClass = 0b1100; + + let Inst{27-24} = 0b0010; + let Inst{23-21} = MajOp; + let Inst{20-16} = !if (SwapOps, Rtt, Rss); + let Inst{12-8} = !if (SwapOps, Rss, Rtt); + let Inst{6-5} = Pu; + let Inst{4-0} = Rdd; + } + +def S2_valignrb : T_S3op_2 < "valignb", 0b000, 1>; +def S2_vsplicerb : T_S3op_2 < "vspliceb", 0b100, 0>; + //===----------------------------------------------------------------------===// // Template class used by vector shift, vector rotate, vector neg, // 32-bit shift, 64-bit shifts, etc. @@ -4943,31 +5452,78 @@ class T_S3op_shiftVect <string mnemonic, bits<2> MajOp, bits<2> MinOp> // Shift by register // Rdd=[asr|lsr|asl|lsl](Rss,Rt) -let isCodeGenOnly = 0 in { def S2_asr_r_p : T_S3op_shift64 < "asr", sra, 0b00>; def S2_lsr_r_p : T_S3op_shift64 < "lsr", srl, 0b01>; def S2_asl_r_p : T_S3op_shift64 < "asl", shl, 0b10>; def S2_lsl_r_p : T_S3op_shift64 < "lsl", shl, 0b11>; -} // Rd=[asr|lsr|asl|lsl](Rs,Rt) -let isCodeGenOnly = 0 in { def S2_asr_r_r : T_S3op_shift32<"asr", sra, 0b00>; def S2_lsr_r_r : T_S3op_shift32<"lsr", srl, 0b01>; def S2_asl_r_r : T_S3op_shift32<"asl", shl, 0b10>; def S2_lsl_r_r : T_S3op_shift32<"lsl", shl, 0b11>; -} // Shift by register with saturation // Rd=asr(Rs,Rt):sat // Rd=asl(Rs,Rt):sat -let Defs = [USR_OVF], isCodeGenOnly = 0 in { +let Defs = [USR_OVF] in { def S2_asr_r_r_sat : T_S3op_shift32_Sat<"asr", 0b00>; def S2_asl_r_r_sat : T_S3op_shift32_Sat<"asl", 0b10>; } +let hasNewValue = 1, hasSideEffects = 0 in +class T_S3op_8 <string opc, bits<3> MinOp, bit isSat, bit isRnd, bit hasShift, bit hasSplat = 0> + : SInst < (outs IntRegs:$Rd), + (ins DoubleRegs:$Rss, IntRegs:$Rt), + "$Rd = "#opc#"($Rss, $Rt"#!if(hasSplat, "*", "")#")" + #!if(hasShift, ":<<1", "") + #!if(isRnd, ":rnd", "") + #!if(isSat, ":sat", ""), + [], "", S_3op_tc_1_SLOT23 > { + bits<5> Rd; + bits<5> Rss; + bits<5> Rt; + + let IClass = 0b1100; + + let Inst{27-24} = 0b0101; + let Inst{20-16} = Rss; + let Inst{12-8} = Rt; + let Inst{7-5} = MinOp; + let Inst{4-0} = Rd; + } + +def S2_asr_r_svw_trun : T_S3op_8<"vasrw", 0b010, 0, 0, 0>; + +let Defs = [USR_OVF], Itinerary = S_3op_tc_2_SLOT23 in +def S2_vcrotate : T_S3op_shiftVect < "vcrotate", 0b11, 0b00>; + +let hasSideEffects = 0 in +class T_S3op_7 <string mnemonic, bit MajOp > + : SInst <(outs DoubleRegs:$Rdd), + (ins DoubleRegs:$Rss, DoubleRegs:$Rtt, u3Imm:$u3), + "$Rdd = "#mnemonic#"($Rss, $Rtt, #$u3)" , + [], "", S_3op_tc_1_SLOT23 > { + bits<5> Rdd; + bits<5> Rss; + bits<5> Rtt; + bits<3> u3; + + let IClass = 0b1100; + + let Inst{27-24} = 0b0000; + let Inst{23} = MajOp; + let Inst{20-16} = !if(MajOp, Rss, Rtt); + let Inst{12-8} = !if(MajOp, Rtt, Rss); + let Inst{7-5} = u3; + let Inst{4-0} = Rdd; + } + +def S2_valignib : T_S3op_7 < "valignb", 0>; +def S2_vspliceib : T_S3op_7 < "vspliceb", 1>; + //===----------------------------------------------------------------------===// // Template class for 'insert bitfield' instructions //===----------------------------------------------------------------------===// @@ -5021,17 +5577,45 @@ class T_S2op_insert <bits<4> RegTyBits, RegisterClass RC, Operand ImmOp> // Rx=insert(Rs,Rtt) // Rx=insert(Rs,#u5,#U5) -let hasNewValue = 1, isCodeGenOnly = 0 in { +let hasNewValue = 1 in { def S2_insert_rp : T_S3op_insert <"insert", IntRegs>; def S2_insert : T_S2op_insert <0b1111, IntRegs, u5Imm>; } // Rxx=insert(Rss,Rtt) // Rxx=insert(Rss,#u6,#U6) -let isCodeGenOnly = 0 in { def S2_insertp_rp : T_S3op_insert<"insert", DoubleRegs>; def S2_insertp : T_S2op_insert <0b0011, DoubleRegs, u6Imm>; -} + + +def SDTHexagonINSERT: + SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, + SDTCisInt<0>, SDTCisVT<3, i32>, SDTCisVT<4, i32>]>; +def SDTHexagonINSERTRP: + SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, + SDTCisInt<0>, SDTCisVT<3, i64>]>; + +def HexagonINSERT : SDNode<"HexagonISD::INSERT", SDTHexagonINSERT>; +def HexagonINSERTRP : SDNode<"HexagonISD::INSERTRP", SDTHexagonINSERTRP>; + +def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, u5ImmPred:$u1, u5ImmPred:$u2), + (S2_insert I32:$Rs, I32:$Rt, u5ImmPred:$u1, u5ImmPred:$u2)>; +def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, u6ImmPred:$u1, u6ImmPred:$u2), + (S2_insertp I64:$Rs, I64:$Rt, u6ImmPred:$u1, u6ImmPred:$u2)>; +def: Pat<(HexagonINSERTRP I32:$Rs, I32:$Rt, I64:$Ru), + (S2_insert_rp I32:$Rs, I32:$Rt, I64:$Ru)>; +def: Pat<(HexagonINSERTRP I64:$Rs, I64:$Rt, I64:$Ru), + (S2_insertp_rp I64:$Rs, I64:$Rt, I64:$Ru)>; + +let AddedComplexity = 100 in +def: Pat<(or (or (shl (HexagonINSERT (i32 (zextloadi8 (add I32:$b, 2))), + (i32 (extloadi8 (add I32:$b, 3))), + 24, 8), + (i32 16)), + (shl (i32 (zextloadi8 (add I32:$b, 1))), (i32 8))), + (zextloadi8 I32:$b)), + (A2_swiz (L2_loadri_io I32:$b, 0))>; + //===----------------------------------------------------------------------===// // Template class for 'extract bitfield' instructions @@ -5089,18 +5673,39 @@ class T_S2op_extract <string mnemonic, bits<4> RegTyBits, // Rdd=extractu(Rss,Rtt) // Rdd=extractu(Rss,#u6,#U6) -let isCodeGenOnly = 0 in { def S2_extractup_rp : T_S3op_64 < "extractu", 0b00, 0b000, 0>; def S2_extractup : T_S2op_extract <"extractu", 0b0001, DoubleRegs, u6Imm>; -} // Rd=extractu(Rs,Rtt) // Rd=extractu(Rs,#u5,#U5) -let hasNewValue = 1, isCodeGenOnly = 0 in { +let hasNewValue = 1 in { def S2_extractu_rp : T_S3op_extract<"extractu", 0b00>; def S2_extractu : T_S2op_extract <"extractu", 0b1101, IntRegs, u5Imm>; } +def SDTHexagonEXTRACTU: + SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>, + SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; +def SDTHexagonEXTRACTURP: + SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>, + SDTCisVT<2, i64>]>; + +def HexagonEXTRACTU : SDNode<"HexagonISD::EXTRACTU", SDTHexagonEXTRACTU>; +def HexagonEXTRACTURP : SDNode<"HexagonISD::EXTRACTURP", SDTHexagonEXTRACTURP>; + +def: Pat<(HexagonEXTRACTU I32:$src1, u5ImmPred:$src2, u5ImmPred:$src3), + (S2_extractu I32:$src1, u5ImmPred:$src2, u5ImmPred:$src3)>; +def: Pat<(HexagonEXTRACTU I64:$src1, u6ImmPred:$src2, u6ImmPred:$src3), + (S2_extractup I64:$src1, u6ImmPred:$src2, u6ImmPred:$src3)>; +def: Pat<(HexagonEXTRACTURP I32:$src1, I64:$src2), + (S2_extractu_rp I32:$src1, I64:$src2)>; +def: Pat<(HexagonEXTRACTURP I64:$src1, I64:$src2), + (S2_extractup_rp I64:$src1, I64:$src2)>; + +// Change the sign of the immediate for Rd=-mpyi(Rs,#u8) +def: Pat<(mul (i32 IntRegs:$src1), (ineg n8ImmPred:$src2)), + (M2_mpysin IntRegs:$src1, u8ImmPred:$src2)>; + //===----------------------------------------------------------------------===// // :raw for of tableindx[bdhw] insns //===----------------------------------------------------------------------===// @@ -5127,16 +5732,26 @@ class tableidxRaw<string OpStr, bits<2>MinOp> let Inst{4-0} = Rx; } -let isCodeGenOnly = 0 in { def S2_tableidxb : tableidxRaw<"tableidxb", 0b00>; def S2_tableidxh : tableidxRaw<"tableidxh", 0b01>; def S2_tableidxw : tableidxRaw<"tableidxw", 0b10>; def S2_tableidxd : tableidxRaw<"tableidxd", 0b11>; -} -// Change the sign of the immediate for Rd=-mpyi(Rs,#u8) -def : Pat <(mul (i32 IntRegs:$src1), (ineg n8ImmPred:$src2)), - (i32 (M2_mpysin (i32 IntRegs:$src1), u8ImmPred:$src2))>; +//===----------------------------------------------------------------------===// +// Template class for 'table index' instructions which are assembler mapped +// to their :raw format. +//===----------------------------------------------------------------------===// +let isPseudo = 1 in +class tableidx_goodsyntax <string mnemonic> + : SInst <(outs IntRegs:$Rx), + (ins IntRegs:$_dst_, IntRegs:$Rs, u4Imm:$u4, u5Imm:$u5), + "$Rx = "#mnemonic#"($Rs, #$u4, #$u5)", + [], "$Rx = $_dst_" >; + +def S2_tableidxb_goodsyntax : tableidx_goodsyntax<"tableidxb">; +def S2_tableidxh_goodsyntax : tableidx_goodsyntax<"tableidxh">; +def S2_tableidxw_goodsyntax : tableidx_goodsyntax<"tableidxw">; +def S2_tableidxd_goodsyntax : tableidx_goodsyntax<"tableidxd">; //===----------------------------------------------------------------------===// // V3 Instructions + @@ -5167,3 +5782,9 @@ include "HexagonInstrInfoV5.td" //===----------------------------------------------------------------------===// // V5 Instructions - //===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ALU32/64/Vector + +//===----------------------------------------------------------------------===/// + +include "HexagonInstrInfoVector.td" diff --git a/lib/Target/Hexagon/HexagonInstrInfoV3.td b/lib/Target/Hexagon/HexagonInstrInfoV3.td index 8e9147600fa6..84d035da451b 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV3.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV3.td @@ -21,8 +21,7 @@ def callv3nr : SDNode<"HexagonISD::CALLv3nr", SDT_SPCall, // J + //===----------------------------------------------------------------------===// // Call subroutine. -let isCall = 1, hasSideEffects = 1, validSubTargets = HasV3SubT, - Defs = VolatileV3.Regs, isPredicable = 1, +let isCall = 1, hasSideEffects = 1, Defs = VolatileV3.Regs, isPredicable = 1, isExtended = 0, isExtendable = 1, opExtendable = 0, isExtentSigned = 1, opExtentBits = 24, opExtentAlign = 2 in class T_Call<string ExtStr> @@ -37,8 +36,7 @@ class T_Call<string ExtStr> let Inst{0} = 0b0; } -let isCall = 1, hasSideEffects = 1, validSubTargets = HasV3SubT, - Defs = VolatileV3.Regs, isPredicated = 1, +let isCall = 1, hasSideEffects = 1, Defs = VolatileV3.Regs, isPredicated = 1, isExtended = 0, isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 17, opExtentAlign = 2 in class T_CallPred<bit IfTrue, string ExtStr> @@ -64,9 +62,11 @@ multiclass T_Calls<string ExtStr> { def f : T_CallPred<0, ExtStr>; } -let isCodeGenOnly = 0 in defm J2_call: T_Calls<"">, PredRel; +let isCodeGenOnly = 1, isCall = 1, hasSideEffects = 1, Defs = VolatileV3.Regs in +def CALLv3nr : T_Call<"">, PredRel; + //===----------------------------------------------------------------------===// // J - //===----------------------------------------------------------------------===// @@ -76,13 +76,10 @@ defm J2_call: T_Calls<"">, PredRel; // JR + //===----------------------------------------------------------------------===// // Call subroutine from register. -let isCall = 1, hasSideEffects = 0, - Defs = [D0, D1, D2, D3, D4, D5, D6, D7, R28, R31, - P0, P1, P2, P3, LC0, LC1, SA0, SA1] in { - def CALLRv3 : JRInst<(outs), (ins IntRegs:$dst), - "callr $dst", - []>, Requires<[HasV3TOnly]>; - } + +let isCodeGenOnly = 1, Defs = VolatileV3.Regs in { + def CALLRv3nr : JUMPR_MISC_CALLR<0, 1>; // Call, no return. +} //===----------------------------------------------------------------------===// // JR - @@ -92,20 +89,16 @@ let isCall = 1, hasSideEffects = 0, // ALU64/ALU + //===----------------------------------------------------------------------===// - -let Defs = [USR_OVF], Itinerary = ALU64_tc_2_SLOT23, - validSubTargets = HasV3SubT, isCodeGenOnly = 0 in +let Defs = [USR_OVF], Itinerary = ALU64_tc_2_SLOT23 in def A2_addpsat : T_ALU64_arith<"add", 0b011, 0b101, 1, 0, 1>; class T_ALU64_addsp_hl<string suffix, bits<3> MinOp> : T_ALU64_rr<"add", suffix, 0b0011, 0b011, MinOp, 0, 0, "">; -let isCodeGenOnly = 0 in { def A2_addspl : T_ALU64_addsp_hl<":raw:lo", 0b110>; def A2_addsph : T_ALU64_addsp_hl<":raw:hi", 0b111>; -} -let hasSideEffects = 0, isCodeGenOnly = 0 in +let hasSideEffects = 0, isAsmParserOnly = 1 in def A2_addsp : ALU64_rr<(outs DoubleRegs:$Rd), (ins IntRegs:$Rs, DoubleRegs:$Rt), "$Rd = add($Rs, $Rt)", [(set (i64 DoubleRegs:$Rd), (i64 (add (i64 (sext (i32 IntRegs:$Rs))), @@ -134,12 +127,10 @@ class T_XTYPE_MIN_MAX_P<bit isMax, bit isUnsigned> let Inst{4-0} = Rd; } -let isCodeGenOnly = 0 in { def A2_minp : T_XTYPE_MIN_MAX_P<0, 0>; def A2_minup : T_XTYPE_MIN_MAX_P<0, 1>; def A2_maxp : T_XTYPE_MIN_MAX_P<1, 0>; def A2_maxup : T_XTYPE_MIN_MAX_P<1, 1>; -} multiclass MinMax_pats_p<PatFrag Op, InstHexagon Inst, InstHexagon SwapInst> { defm: T_MinMax_pats<Op, DoubleRegs, i64, Inst, SwapInst>; @@ -164,25 +155,112 @@ let AddedComplexity = 200 in { //def : Pat <(brcond (i1 (seteq (i32 IntRegs:$src1), 0)), bb:$offset), -// (JMP_RegEzt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>; +// (JMP_RegEzt (i32 IntRegs:$src1), bb:$offset)>; //def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), 0)), bb:$offset), -// (JMP_RegNzt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>; +// (JMP_RegNzt (i32 IntRegs:$src1), bb:$offset)>; //def : Pat <(brcond (i1 (setle (i32 IntRegs:$src1), 0)), bb:$offset), -// (JMP_RegLezt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>; +// (JMP_RegLezt (i32 IntRegs:$src1), bb:$offset)>; //def : Pat <(brcond (i1 (setge (i32 IntRegs:$src1), 0)), bb:$offset), -// (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>; +// (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>; //def : Pat <(brcond (i1 (setgt (i32 IntRegs:$src1), -1)), bb:$offset), -// (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>; - +// (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>; // Map call instruction -def : Pat<(call (i32 IntRegs:$dst)), - (J2_call (i32 IntRegs:$dst))>, Requires<[HasV3T]>; -def : Pat<(call tglobaladdr:$dst), - (J2_call tglobaladdr:$dst)>, Requires<[HasV3T]>; -def : Pat<(call texternalsym:$dst), - (J2_call texternalsym:$dst)>, Requires<[HasV3T]>; +def : Pat<(callv3 (i32 IntRegs:$dst)), + (J2_callr (i32 IntRegs:$dst))>; +def : Pat<(callv3 tglobaladdr:$dst), + (J2_call tglobaladdr:$dst)>; +def : Pat<(callv3 texternalsym:$dst), + (J2_call texternalsym:$dst)>; +def : Pat<(callv3 tglobaltlsaddr:$dst), + (J2_call tglobaltlsaddr:$dst)>; + +def : Pat<(callv3nr (i32 IntRegs:$dst)), + (CALLRv3nr (i32 IntRegs:$dst))>; +def : Pat<(callv3nr tglobaladdr:$dst), + (CALLv3nr tglobaladdr:$dst)>; +def : Pat<(callv3nr texternalsym:$dst), + (CALLv3nr texternalsym:$dst)>; + +//===----------------------------------------------------------------------===// +// :raw form of vrcmpys:hi/lo insns +//===----------------------------------------------------------------------===// +// Vector reduce complex multiply by scalar. +let Defs = [USR_OVF], hasSideEffects = 0 in +class T_vrcmpRaw<string HiLo, bits<3>MajOp>: + MInst<(outs DoubleRegs:$Rdd), + (ins DoubleRegs:$Rss, DoubleRegs:$Rtt), + "$Rdd = vrcmpys($Rss, $Rtt):<<1:sat:raw:"#HiLo, []> { + bits<5> Rdd; + bits<5> Rss; + bits<5> Rtt; + + let IClass = 0b1110; + + let Inst{27-24} = 0b1000; + let Inst{23-21} = MajOp; + let Inst{20-16} = Rss; + let Inst{12-8} = Rtt; + let Inst{7-5} = 0b100; + let Inst{4-0} = Rdd; +} + +def M2_vrcmpys_s1_h: T_vrcmpRaw<"hi", 0b101>; +def M2_vrcmpys_s1_l: T_vrcmpRaw<"lo", 0b111>; + +// Assembler mapped to M2_vrcmpys_s1_h or M2_vrcmpys_s1_l +let hasSideEffects = 0, isAsmParserOnly = 1 in +def M2_vrcmpys_s1 + : MInst<(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss, IntRegs:$Rt), + "$Rdd=vrcmpys($Rss,$Rt):<<1:sat">; + +// Vector reduce complex multiply by scalar with accumulation. +let Defs = [USR_OVF], hasSideEffects = 0 in +class T_vrcmpys_acc<string HiLo, bits<3>MajOp>: + MInst <(outs DoubleRegs:$Rxx), + (ins DoubleRegs:$_src_, DoubleRegs:$Rss, DoubleRegs:$Rtt), + "$Rxx += vrcmpys($Rss, $Rtt):<<1:sat:raw:"#HiLo, [], + "$Rxx = $_src_"> { + bits<5> Rxx; + bits<5> Rss; + bits<5> Rtt; + + let IClass = 0b1110; + + let Inst{27-24} = 0b1010; + let Inst{23-21} = MajOp; + let Inst{20-16} = Rss; + let Inst{12-8} = Rtt; + let Inst{7-5} = 0b100; + let Inst{4-0} = Rxx; + } + +def M2_vrcmpys_acc_s1_h: T_vrcmpys_acc<"hi", 0b101>; +def M2_vrcmpys_acc_s1_l: T_vrcmpys_acc<"lo", 0b111>; + +// Assembler mapped to M2_vrcmpys_acc_s1_h or M2_vrcmpys_acc_s1_l + +let isAsmParserOnly = 1 in +def M2_vrcmpys_acc_s1 + : MInst <(outs DoubleRegs:$dst), + (ins DoubleRegs:$dst2, DoubleRegs:$src1, IntRegs:$src2), + "$dst += vrcmpys($src1, $src2):<<1:sat", [], + "$dst2 = $dst">; + +def M2_vrcmpys_s1rp_h : T_MType_vrcmpy <"vrcmpys", 0b101, 0b110, 1>; +def M2_vrcmpys_s1rp_l : T_MType_vrcmpy <"vrcmpys", 0b101, 0b111, 0>; + +// Assembler mapped to M2_vrcmpys_s1rp_h or M2_vrcmpys_s1rp_l +let isAsmParserOnly = 1 in +def M2_vrcmpys_s1rp + : MInst <(outs IntRegs:$Rd), (ins DoubleRegs:$Rss, IntRegs:$Rt), + "$Rd=vrcmpys($Rss,$Rt):<<1:rnd:sat">; + + +// S2_cabacdecbin: Cabac decode bin. +let Defs = [P0], isPredicateLate = 1, Itinerary = S_3op_tc_1_SLOT23 in +def S2_cabacdecbin : T_S3op_64 < "decbin", 0b11, 0b110, 0>; diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td index 08bfd676fed8..8b667c645156 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV4.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -11,6 +11,28 @@ // //===----------------------------------------------------------------------===// +def DuplexIClass0: InstDuplex < 0 >; +def DuplexIClass1: InstDuplex < 1 >; +def DuplexIClass2: InstDuplex < 2 >; +let isExtendable = 1 in { + def DuplexIClass3: InstDuplex < 3 >; + def DuplexIClass4: InstDuplex < 4 >; + def DuplexIClass5: InstDuplex < 5 >; + def DuplexIClass6: InstDuplex < 6 >; + def DuplexIClass7: InstDuplex < 7 >; +} +def DuplexIClass8: InstDuplex < 8 >; +def DuplexIClass9: InstDuplex < 9 >; +def DuplexIClassA: InstDuplex < 0xA >; +def DuplexIClassB: InstDuplex < 0xB >; +def DuplexIClassC: InstDuplex < 0xC >; +def DuplexIClassD: InstDuplex < 0xD >; +def DuplexIClassE: InstDuplex < 0xE >; +def DuplexIClassF: InstDuplex < 0xF >; + +def addrga: PatLeaf<(i32 AddrGA:$Addr)>; +def addrgp: PatLeaf<(i32 AddrGP:$Addr)>; + let hasSideEffects = 0 in class T_Immext<Operand ImmType> : EXTENDERInst<(outs), (ins ImmType:$imm), @@ -35,19 +57,9 @@ def BITPOS32 : SDNodeXForm<imm, [{ // Return the bit position we will set [0-31]. // As an SDNode. int32_t imm = N->getSExtValue(); - return XformMskToBitPosU5Imm(imm); + return XformMskToBitPosU5Imm(imm, SDLoc(N)); }]>; -// Fold (add (CONST32 tglobaladdr:$addr) <offset>) into a global address. -def FoldGlobalAddr : ComplexPattern<i32, 1, "foldGlobalAddress", [], []>; - -// Fold (add (CONST32_GP tglobaladdr:$addr) <offset>) into a global address. -def FoldGlobalAddrGP : ComplexPattern<i32, 1, "foldGlobalAddressGP", [], []>; - -def NumUsesBelowThresCONST32 : PatFrag<(ops node:$addr), - (HexagonCONST32 node:$addr), [{ - return hasNumUsesBelowThresGA(N->getOperand(0).getNode()); -}]>; // Hexagon V4 Architecture spec defines 8 instruction classes: // LD ST ALU32 XTYPE J JR MEMOP NV CR SYSTEM(system is not implemented in the @@ -119,21 +131,19 @@ class T_ALU32_3op_not<string mnemonic, bits<3> MajOp, bits<3> MinOp, let AsmString = "$Rd = "#mnemonic#"($Rs, ~$Rt)"; } -let BaseOpcode = "andn_rr", CextOpcode = "andn", isCodeGenOnly = 0 in +let BaseOpcode = "andn_rr", CextOpcode = "andn" in def A4_andn : T_ALU32_3op_not<"and", 0b001, 0b100, 1>; -let BaseOpcode = "orn_rr", CextOpcode = "orn", isCodeGenOnly = 0 in +let BaseOpcode = "orn_rr", CextOpcode = "orn" in def A4_orn : T_ALU32_3op_not<"or", 0b001, 0b101, 1>; -let CextOpcode = "rcmp.eq", isCodeGenOnly = 0 in +let CextOpcode = "rcmp.eq" in def A4_rcmpeq : T_ALU32_3op<"cmp.eq", 0b011, 0b010, 0, 1>; -let CextOpcode = "!rcmp.eq", isCodeGenOnly = 0 in +let CextOpcode = "!rcmp.eq" in def A4_rcmpneq : T_ALU32_3op<"!cmp.eq", 0b011, 0b011, 0, 1>; -let isCodeGenOnly = 0 in { def C4_cmpneq : T_ALU32_3op_cmp<"!cmp.eq", 0b00, 1, 1>; def C4_cmplte : T_ALU32_3op_cmp<"!cmp.gt", 0b10, 1, 0>; def C4_cmplteu : T_ALU32_3op_cmp<"!cmp.gtu", 0b11, 1, 0>; -} // Pats for instruction selection. @@ -146,11 +156,15 @@ class CmpInReg<PatFrag Op> def: T_cmp32_rr_pat<A4_rcmpeq, CmpInReg<seteq>, i32>; def: T_cmp32_rr_pat<A4_rcmpneq, CmpInReg<setne>, i32>; +def: T_cmp32_rr_pat<C4_cmpneq, setne, i1>; +def: T_cmp32_rr_pat<C4_cmplteu, setule, i1>; + +def: T_cmp32_rr_pat<C4_cmplteu, RevCmp<setuge>, i1>; + class T_CMP_rrbh<string mnemonic, bits<3> MinOp, bit IsComm> : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt), "$Pd = "#mnemonic#"($Rs, $Rt)", [], "", S_3op_tc_2early_SLOT23>, ImmRegRel { - let validSubTargets = HasV4SubT; let InputType = "reg"; let CextOpcode = mnemonic; let isCompare = 1; @@ -169,13 +183,26 @@ class T_CMP_rrbh<string mnemonic, bits<3> MinOp, bit IsComm> let Inst{1-0} = Pd; } -let isCodeGenOnly = 0 in { def A4_cmpbeq : T_CMP_rrbh<"cmpb.eq", 0b110, 1>; def A4_cmpbgt : T_CMP_rrbh<"cmpb.gt", 0b010, 0>; def A4_cmpbgtu : T_CMP_rrbh<"cmpb.gtu", 0b111, 0>; def A4_cmpheq : T_CMP_rrbh<"cmph.eq", 0b011, 1>; def A4_cmphgt : T_CMP_rrbh<"cmph.gt", 0b100, 0>; def A4_cmphgtu : T_CMP_rrbh<"cmph.gtu", 0b101, 0>; + +let AddedComplexity = 100 in { + def: Pat<(i1 (seteq (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), + 255), 0)), + (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt)>; + def: Pat<(i1 (setne (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), + 255), 0)), + (C2_not (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt))>; + def: Pat<(i1 (seteq (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), + 65535), 0)), + (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt)>; + def: Pat<(i1 (setne (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), + 65535), 0)), + (C2_not (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt))>; } class T_CMP_ribh<string mnemonic, bits<2> MajOp, bit IsHalf, bit IsComm, @@ -183,7 +210,6 @@ class T_CMP_ribh<string mnemonic, bits<2> MajOp, bit IsHalf, bit IsComm, : ALU64Inst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, ImmType:$Imm), "$Pd = "#mnemonic#"($Rs, #$Imm)", [], "", ALU64_tc_2early_SLOT23>, ImmRegRel { - let validSubTargets = HasV4SubT; let InputType = "imm"; let CextOpcode = mnemonic; let isCompare = 1; @@ -208,19 +234,17 @@ class T_CMP_ribh<string mnemonic, bits<2> MajOp, bit IsHalf, bit IsComm, let Inst{1-0} = Pd; } -let isCodeGenOnly = 0 in { def A4_cmpbeqi : T_CMP_ribh<"cmpb.eq", 0b00, 0, 1, u8Imm, 0, 0, 8>; def A4_cmpbgti : T_CMP_ribh<"cmpb.gt", 0b01, 0, 0, s8Imm, 0, 1, 8>; def A4_cmpbgtui : T_CMP_ribh<"cmpb.gtu", 0b10, 0, 0, u7Ext, 1, 0, 7>; def A4_cmpheqi : T_CMP_ribh<"cmph.eq", 0b00, 1, 1, s8Ext, 1, 1, 8>; def A4_cmphgti : T_CMP_ribh<"cmph.gt", 0b01, 1, 0, s8Ext, 1, 1, 8>; def A4_cmphgtui : T_CMP_ribh<"cmph.gtu", 0b10, 1, 0, u7Ext, 1, 0, 7>; -} + class T_RCMP_EQ_ri<string mnemonic, bit IsNeg> : ALU32_ri<(outs IntRegs:$Rd), (ins IntRegs:$Rs, s8Ext:$s8), "$Rd = "#mnemonic#"($Rs, #$s8)", [], "", ALU32_2op_tc_1_SLOT0123>, ImmRegRel { - let validSubTargets = HasV4SubT; let InputType = "imm"; let CextOpcode = !if (IsNeg, "!rcmp.eq", "rcmp.eq"); let isExtendable = 1; @@ -243,22 +267,19 @@ class T_RCMP_EQ_ri<string mnemonic, bit IsNeg> let Inst{4-0} = Rd; } -let isCodeGenOnly = 0 in { def A4_rcmpeqi : T_RCMP_EQ_ri<"cmp.eq", 0>; def A4_rcmpneqi : T_RCMP_EQ_ri<"!cmp.eq", 1>; -} -def: Pat<(i32 (zext (i1 (seteq (i32 IntRegs:$Rs), s8ExtPred:$s8)))), - (A4_rcmpeqi IntRegs:$Rs, s8ExtPred:$s8)>; -def: Pat<(i32 (zext (i1 (setne (i32 IntRegs:$Rs), s8ExtPred:$s8)))), - (A4_rcmpneqi IntRegs:$Rs, s8ExtPred:$s8)>; +def: Pat<(i32 (zext (i1 (seteq (i32 IntRegs:$Rs), s32ImmPred:$s8)))), + (A4_rcmpeqi IntRegs:$Rs, s32ImmPred:$s8)>; +def: Pat<(i32 (zext (i1 (setne (i32 IntRegs:$Rs), s32ImmPred:$s8)))), + (A4_rcmpneqi IntRegs:$Rs, s32ImmPred:$s8)>; // Preserve the S2_tstbit_r generation def: Pat<(i32 (zext (i1 (setne (i32 (and (i32 (shl 1, (i32 IntRegs:$src2))), (i32 IntRegs:$src1))), 0)))), (C2_muxii (S2_tstbit_r IntRegs:$src1, IntRegs:$src2), 1, 0)>; - //===----------------------------------------------------------------------===// // ALU32 - //===----------------------------------------------------------------------===// @@ -286,26 +307,23 @@ class T_Combine1 <bits<2> MajOp, dag ins, string AsmStr> let Inst{4-0} = Rdd; } -let opExtendable = 2, isCodeGenOnly = 0 in +let opExtendable = 2 in def A4_combineri : T_Combine1<0b00, (ins IntRegs:$Rs, s8Ext:$s8), "$Rdd = combine($Rs, #$s8)">; -let opExtendable = 1, isCodeGenOnly = 0 in +let opExtendable = 1 in def A4_combineir : T_Combine1<0b01, (ins s8Ext:$s8, IntRegs:$Rs), "$Rdd = combine(#$s8, $Rs)">; -def HexagonWrapperCombineRI_V4 : - SDNode<"HexagonISD::WrapperCombineRI_V4", SDTHexagonI64I32I32>; -def HexagonWrapperCombineIR_V4 : - SDNode<"HexagonISD::WrapperCombineIR_V4", SDTHexagonI64I32I32>; +// The complexity of the combines involving immediates should be greater +// than the complexity of the combine with two registers. +let AddedComplexity = 50 in { +def: Pat<(HexagonCOMBINE IntRegs:$r, s32ImmPred:$i), + (A4_combineri IntRegs:$r, s32ImmPred:$i)>; -def : Pat <(HexagonWrapperCombineRI_V4 IntRegs:$r, s8ExtPred:$i), - (A4_combineri IntRegs:$r, s8ExtPred:$i)>, - Requires<[HasV4T]>; - -def : Pat <(HexagonWrapperCombineIR_V4 s8ExtPred:$i, IntRegs:$r), - (A4_combineir s8ExtPred:$i, IntRegs:$r)>, - Requires<[HasV4T]>; +def: Pat<(HexagonCOMBINE s32ImmPred:$i, IntRegs:$r), + (A4_combineir s32ImmPred:$i, IntRegs:$r)>; +} // A4_combineii: Set two small immediates. let hasSideEffects = 0, isExtendable = 1, opExtentBits = 6, opExtendable = 2 in @@ -323,6 +341,12 @@ def A4_combineii: ALU32Inst<(outs DoubleRegs:$Rdd), (ins s8Imm:$s8, u6Ext:$U6), let Inst{4-0} = Rdd; } +// The complexity of the combine with two immediates should be greater than +// the complexity of a combine involving a register. +let AddedComplexity = 75 in +def: Pat<(HexagonCOMBINE s8ImmPred:$s8, u32ImmPred:$u6), + (A4_combineii imm:$s8, imm:$u6)>; + //===----------------------------------------------------------------------===// // ALU32/PERM - //===----------------------------------------------------------------------===// @@ -330,24 +354,182 @@ def A4_combineii: ALU32Inst<(outs DoubleRegs:$Rdd), (ins s8Imm:$s8, u6Ext:$U6), //===----------------------------------------------------------------------===// // LD + //===----------------------------------------------------------------------===// + +def Zext64: OutPatFrag<(ops node:$Rs), + (i64 (A4_combineir 0, (i32 $Rs)))>; +def Sext64: OutPatFrag<(ops node:$Rs), + (i64 (A2_sxtw (i32 $Rs)))>; + +// Patterns to generate indexed loads with different forms of the address: +// - frameindex, +// - base + offset, +// - base (without offset). +multiclass Loadxm_pat<PatFrag Load, ValueType VT, PatFrag ValueMod, + PatLeaf ImmPred, InstHexagon MI> { + def: Pat<(VT (Load AddrFI:$fi)), + (VT (ValueMod (MI AddrFI:$fi, 0)))>; + def: Pat<(VT (Load (add AddrFI:$fi, ImmPred:$Off))), + (VT (ValueMod (MI AddrFI:$fi, imm:$Off)))>; + def: Pat<(VT (Load (add IntRegs:$Rs, ImmPred:$Off))), + (VT (ValueMod (MI IntRegs:$Rs, imm:$Off)))>; + def: Pat<(VT (Load (i32 IntRegs:$Rs))), + (VT (ValueMod (MI IntRegs:$Rs, 0)))>; +} + +defm: Loadxm_pat<extloadi1, i64, Zext64, s32_0ImmPred, L2_loadrub_io>; +defm: Loadxm_pat<extloadi8, i64, Zext64, s32_0ImmPred, L2_loadrub_io>; +defm: Loadxm_pat<extloadi16, i64, Zext64, s31_1ImmPred, L2_loadruh_io>; +defm: Loadxm_pat<zextloadi1, i64, Zext64, s32_0ImmPred, L2_loadrub_io>; +defm: Loadxm_pat<zextloadi8, i64, Zext64, s32_0ImmPred, L2_loadrub_io>; +defm: Loadxm_pat<zextloadi16, i64, Zext64, s31_1ImmPred, L2_loadruh_io>; +defm: Loadxm_pat<sextloadi8, i64, Sext64, s32_0ImmPred, L2_loadrb_io>; +defm: Loadxm_pat<sextloadi16, i64, Sext64, s31_1ImmPred, L2_loadrh_io>; + +// Map Rdd = anyext(Rs) -> Rdd = combine(#0, Rs). +def: Pat<(i64 (anyext (i32 IntRegs:$src1))), (Zext64 IntRegs:$src1)>; + //===----------------------------------------------------------------------===// // Template class for load instructions with Absolute set addressing mode. //===----------------------------------------------------------------------===// -let isExtended = 1, opExtendable = 2, hasSideEffects = 0, -validSubTargets = HasV4SubT, addrMode = AbsoluteSet in -class T_LD_abs_set<string mnemonic, RegisterClass RC>: - LDInst2<(outs RC:$dst1, IntRegs:$dst2), - (ins u0AlwaysExt:$addr), - "$dst1 = "#mnemonic#"($dst2=##$addr)", - []>, - Requires<[HasV4T]>; +let isExtended = 1, opExtendable = 2, opExtentBits = 6, addrMode = AbsoluteSet, + hasSideEffects = 0 in +class T_LD_abs_set<string mnemonic, RegisterClass RC, bits<4>MajOp>: + LDInst<(outs RC:$dst1, IntRegs:$dst2), + (ins u6Ext:$addr), + "$dst1 = "#mnemonic#"($dst2 = #$addr)", + []> { + bits<7> name; + bits<5> dst1; + bits<5> dst2; + bits<6> addr; + + let IClass = 0b1001; + let Inst{27-25} = 0b101; + let Inst{24-21} = MajOp; + let Inst{13-12} = 0b01; + let Inst{4-0} = dst1; + let Inst{20-16} = dst2; + let Inst{11-8} = addr{5-2}; + let Inst{6-5} = addr{1-0}; +} + +let accessSize = ByteAccess, hasNewValue = 1 in { + def L4_loadrb_ap : T_LD_abs_set <"memb", IntRegs, 0b1000>; + def L4_loadrub_ap : T_LD_abs_set <"memub", IntRegs, 0b1001>; +} + +let accessSize = HalfWordAccess, hasNewValue = 1 in { + def L4_loadrh_ap : T_LD_abs_set <"memh", IntRegs, 0b1010>; + def L4_loadruh_ap : T_LD_abs_set <"memuh", IntRegs, 0b1011>; + def L4_loadbsw2_ap : T_LD_abs_set <"membh", IntRegs, 0b0001>; + def L4_loadbzw2_ap : T_LD_abs_set <"memubh", IntRegs, 0b0011>; +} + +let accessSize = WordAccess, hasNewValue = 1 in + def L4_loadri_ap : T_LD_abs_set <"memw", IntRegs, 0b1100>; + +let accessSize = WordAccess in { + def L4_loadbzw4_ap : T_LD_abs_set <"memubh", DoubleRegs, 0b0101>; + def L4_loadbsw4_ap : T_LD_abs_set <"membh", DoubleRegs, 0b0111>; +} -def LDrid_abs_set_V4 : T_LD_abs_set <"memd", DoubleRegs>; -def LDrib_abs_set_V4 : T_LD_abs_set <"memb", IntRegs>; -def LDriub_abs_set_V4 : T_LD_abs_set <"memub", IntRegs>; -def LDrih_abs_set_V4 : T_LD_abs_set <"memh", IntRegs>; -def LDriw_abs_set_V4 : T_LD_abs_set <"memw", IntRegs>; -def LDriuh_abs_set_V4 : T_LD_abs_set <"memuh", IntRegs>; +let accessSize = DoubleWordAccess in +def L4_loadrd_ap : T_LD_abs_set <"memd", DoubleRegs, 0b1110>; + +let accessSize = ByteAccess in + def L4_loadalignb_ap : T_LD_abs_set <"memb_fifo", DoubleRegs, 0b0100>; + +let accessSize = HalfWordAccess in +def L4_loadalignh_ap : T_LD_abs_set <"memh_fifo", DoubleRegs, 0b0010>; + +// Load - Indirect with long offset +let InputType = "imm", addrMode = BaseLongOffset, isExtended = 1, +opExtentBits = 6, opExtendable = 3 in +class T_LoadAbsReg <string mnemonic, string CextOp, RegisterClass RC, + bits<4> MajOp> + : LDInst <(outs RC:$dst), (ins IntRegs:$src1, u2Imm:$src2, u6Ext:$src3), + "$dst = "#mnemonic#"($src1<<#$src2 + #$src3)", + [] >, ImmRegShl { + bits<5> dst; + bits<5> src1; + bits<2> src2; + bits<6> src3; + let CextOpcode = CextOp; + let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1); + + let IClass = 0b1001; + let Inst{27-25} = 0b110; + let Inst{24-21} = MajOp; + let Inst{20-16} = src1; + let Inst{13} = src2{1}; + let Inst{12} = 0b1; + let Inst{11-8} = src3{5-2}; + let Inst{7} = src2{0}; + let Inst{6-5} = src3{1-0}; + let Inst{4-0} = dst; + } + +let accessSize = ByteAccess in { + def L4_loadrb_ur : T_LoadAbsReg<"memb", "LDrib", IntRegs, 0b1000>; + def L4_loadrub_ur : T_LoadAbsReg<"memub", "LDriub", IntRegs, 0b1001>; + def L4_loadalignb_ur : T_LoadAbsReg<"memb_fifo", "LDrib_fifo", + DoubleRegs, 0b0100>; +} + +let accessSize = HalfWordAccess in { + def L4_loadrh_ur : T_LoadAbsReg<"memh", "LDrih", IntRegs, 0b1010>; + def L4_loadruh_ur : T_LoadAbsReg<"memuh", "LDriuh", IntRegs, 0b1011>; + def L4_loadbsw2_ur : T_LoadAbsReg<"membh", "LDribh2", IntRegs, 0b0001>; + def L4_loadbzw2_ur : T_LoadAbsReg<"memubh", "LDriubh2", IntRegs, 0b0011>; + def L4_loadalignh_ur : T_LoadAbsReg<"memh_fifo", "LDrih_fifo", + DoubleRegs, 0b0010>; +} + +let accessSize = WordAccess in { + def L4_loadri_ur : T_LoadAbsReg<"memw", "LDriw", IntRegs, 0b1100>; + def L4_loadbsw4_ur : T_LoadAbsReg<"membh", "LDribh4", DoubleRegs, 0b0111>; + def L4_loadbzw4_ur : T_LoadAbsReg<"memubh", "LDriubh4", DoubleRegs, 0b0101>; +} + +let accessSize = DoubleWordAccess in +def L4_loadrd_ur : T_LoadAbsReg<"memd", "LDrid", DoubleRegs, 0b1110>; + + +multiclass T_LoadAbsReg_Pat <PatFrag ldOp, InstHexagon MI, ValueType VT = i32> { + def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2ImmPred:$src2), + (HexagonCONST32 tglobaladdr:$src3)))), + (MI IntRegs:$src1, u2ImmPred:$src2, tglobaladdr:$src3)>; + def : Pat <(VT (ldOp (add IntRegs:$src1, + (HexagonCONST32 tglobaladdr:$src2)))), + (MI IntRegs:$src1, 0, tglobaladdr:$src2)>; + + def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2ImmPred:$src2), + (HexagonCONST32 tconstpool:$src3)))), + (MI IntRegs:$src1, u2ImmPred:$src2, tconstpool:$src3)>; + def : Pat <(VT (ldOp (add IntRegs:$src1, + (HexagonCONST32 tconstpool:$src2)))), + (MI IntRegs:$src1, 0, tconstpool:$src2)>; + + def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2ImmPred:$src2), + (HexagonCONST32 tjumptable:$src3)))), + (MI IntRegs:$src1, u2ImmPred:$src2, tjumptable:$src3)>; + def : Pat <(VT (ldOp (add IntRegs:$src1, + (HexagonCONST32 tjumptable:$src2)))), + (MI IntRegs:$src1, 0, tjumptable:$src2)>; +} + +let AddedComplexity = 60 in { +defm : T_LoadAbsReg_Pat <sextloadi8, L4_loadrb_ur>; +defm : T_LoadAbsReg_Pat <zextloadi8, L4_loadrub_ur>; +defm : T_LoadAbsReg_Pat <extloadi8, L4_loadrub_ur>; + +defm : T_LoadAbsReg_Pat <sextloadi16, L4_loadrh_ur>; +defm : T_LoadAbsReg_Pat <zextloadi16, L4_loadruh_ur>; +defm : T_LoadAbsReg_Pat <extloadi16, L4_loadruh_ur>; + +defm : T_LoadAbsReg_Pat <load, L4_loadri_ur>; +defm : T_LoadAbsReg_Pat <load, L4_loadrd_ur, i64>; +} //===----------------------------------------------------------------------===// // Template classes for the non-predicated load instructions with @@ -430,217 +612,234 @@ multiclass ld_idxd_shl <string mnemonic, string CextOp, RegisterClass RC, } } -let hasNewValue = 1, accessSize = ByteAccess, isCodeGenOnly = 0 in { +let hasNewValue = 1, accessSize = ByteAccess in { defm loadrb : ld_idxd_shl<"memb", "LDrib", IntRegs, 0b000>; defm loadrub : ld_idxd_shl<"memub", "LDriub", IntRegs, 0b001>; } -let hasNewValue = 1, accessSize = HalfWordAccess, isCodeGenOnly = 0 in { +let hasNewValue = 1, accessSize = HalfWordAccess in { defm loadrh : ld_idxd_shl<"memh", "LDrih", IntRegs, 0b010>; defm loadruh : ld_idxd_shl<"memuh", "LDriuh", IntRegs, 0b011>; } -let hasNewValue = 1, accessSize = WordAccess, isCodeGenOnly = 0 in +let hasNewValue = 1, accessSize = WordAccess in defm loadri : ld_idxd_shl<"memw", "LDriw", IntRegs, 0b100>; -let accessSize = DoubleWordAccess, isCodeGenOnly = 0 in +let accessSize = DoubleWordAccess in defm loadrd : ld_idxd_shl<"memd", "LDrid", DoubleRegs, 0b110>; // 'def pats' for load instructions with base + register offset and non-zero // immediate value. Immediate value is used to left-shift the second // register operand. +class Loadxs_pat<PatFrag Load, ValueType VT, InstHexagon MI> + : Pat<(VT (Load (add (i32 IntRegs:$Rs), + (i32 (shl (i32 IntRegs:$Rt), u2ImmPred:$u2))))), + (VT (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2))>; + let AddedComplexity = 40 in { -def : Pat <(i32 (sextloadi8 (add IntRegs:$src1, - (shl IntRegs:$src2, u2ImmPred:$offset)))), - (L4_loadrb_rr IntRegs:$src1, - IntRegs:$src2, u2ImmPred:$offset)>, - Requires<[HasV4T]>; - -def : Pat <(i32 (zextloadi8 (add IntRegs:$src1, - (shl IntRegs:$src2, u2ImmPred:$offset)))), - (L4_loadrub_rr IntRegs:$src1, - IntRegs:$src2, u2ImmPred:$offset)>, - Requires<[HasV4T]>; - -def : Pat <(i32 (extloadi8 (add IntRegs:$src1, - (shl IntRegs:$src2, u2ImmPred:$offset)))), - (L4_loadrub_rr IntRegs:$src1, - IntRegs:$src2, u2ImmPred:$offset)>, - Requires<[HasV4T]>; - -def : Pat <(i32 (sextloadi16 (add IntRegs:$src1, - (shl IntRegs:$src2, u2ImmPred:$offset)))), - (L4_loadrh_rr IntRegs:$src1, - IntRegs:$src2, u2ImmPred:$offset)>, - Requires<[HasV4T]>; - -def : Pat <(i32 (zextloadi16 (add IntRegs:$src1, - (shl IntRegs:$src2, u2ImmPred:$offset)))), - (L4_loadruh_rr IntRegs:$src1, - IntRegs:$src2, u2ImmPred:$offset)>, - Requires<[HasV4T]>; - -def : Pat <(i32 (extloadi16 (add IntRegs:$src1, - (shl IntRegs:$src2, u2ImmPred:$offset)))), - (L4_loadruh_rr IntRegs:$src1, - IntRegs:$src2, u2ImmPred:$offset)>, - Requires<[HasV4T]>; - -def : Pat <(i32 (load (add IntRegs:$src1, - (shl IntRegs:$src2, u2ImmPred:$offset)))), - (L4_loadri_rr IntRegs:$src1, - IntRegs:$src2, u2ImmPred:$offset)>, - Requires<[HasV4T]>; - -def : Pat <(i64 (load (add IntRegs:$src1, - (shl IntRegs:$src2, u2ImmPred:$offset)))), - (L4_loadrd_rr IntRegs:$src1, - IntRegs:$src2, u2ImmPred:$offset)>, - Requires<[HasV4T]>; + def: Loadxs_pat<extloadi8, i32, L4_loadrub_rr>; + def: Loadxs_pat<zextloadi8, i32, L4_loadrub_rr>; + def: Loadxs_pat<sextloadi8, i32, L4_loadrb_rr>; + def: Loadxs_pat<extloadi16, i32, L4_loadruh_rr>; + def: Loadxs_pat<zextloadi16, i32, L4_loadruh_rr>; + def: Loadxs_pat<sextloadi16, i32, L4_loadrh_rr>; + def: Loadxs_pat<load, i32, L4_loadri_rr>; + def: Loadxs_pat<load, i64, L4_loadrd_rr>; } - // 'def pats' for load instruction base + register offset and // zero immediate value. -let AddedComplexity = 10 in { -def : Pat <(i64 (load (add IntRegs:$src1, IntRegs:$src2))), - (L4_loadrd_rr IntRegs:$src1, IntRegs:$src2, 0)>, - Requires<[HasV4T]>; +class Loadxs_simple_pat<PatFrag Load, ValueType VT, InstHexagon MI> + : Pat<(VT (Load (add (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)))), + (VT (MI IntRegs:$Rs, IntRegs:$Rt, 0))>; + +let AddedComplexity = 20 in { + def: Loadxs_simple_pat<extloadi8, i32, L4_loadrub_rr>; + def: Loadxs_simple_pat<zextloadi8, i32, L4_loadrub_rr>; + def: Loadxs_simple_pat<sextloadi8, i32, L4_loadrb_rr>; + def: Loadxs_simple_pat<extloadi16, i32, L4_loadruh_rr>; + def: Loadxs_simple_pat<zextloadi16, i32, L4_loadruh_rr>; + def: Loadxs_simple_pat<sextloadi16, i32, L4_loadrh_rr>; + def: Loadxs_simple_pat<load, i32, L4_loadri_rr>; + def: Loadxs_simple_pat<load, i64, L4_loadrd_rr>; +} -def : Pat <(i32 (sextloadi8 (add IntRegs:$src1, IntRegs:$src2))), - (L4_loadrb_rr IntRegs:$src1, IntRegs:$src2, 0)>, - Requires<[HasV4T]>; +// zext i1->i64 +def: Pat<(i64 (zext (i1 PredRegs:$src1))), + (Zext64 (C2_muxii PredRegs:$src1, 1, 0))>; + +// zext i32->i64 +def: Pat<(i64 (zext (i32 IntRegs:$src1))), + (Zext64 IntRegs:$src1)>; -def : Pat <(i32 (zextloadi8 (add IntRegs:$src1, IntRegs:$src2))), - (L4_loadrub_rr IntRegs:$src1, IntRegs:$src2, 0)>, - Requires<[HasV4T]>; +//===----------------------------------------------------------------------===// +// LD - +//===----------------------------------------------------------------------===// -def : Pat <(i32 (extloadi8 (add IntRegs:$src1, IntRegs:$src2))), - (L4_loadrub_rr IntRegs:$src1, IntRegs:$src2, 0)>, - Requires<[HasV4T]>; +//===----------------------------------------------------------------------===// +// ST + +//===----------------------------------------------------------------------===// +/// +//===----------------------------------------------------------------------===// +// Template class for store instructions with Absolute set addressing mode. +//===----------------------------------------------------------------------===// +let isExtended = 1, opExtendable = 1, opExtentBits = 6, + addrMode = AbsoluteSet, isNVStorable = 1 in +class T_ST_absset <string mnemonic, string BaseOp, RegisterClass RC, + bits<3> MajOp, MemAccessSize AccessSz, bit isHalf = 0> + : STInst<(outs IntRegs:$dst), + (ins u6Ext:$addr, RC:$src), + mnemonic#"($dst = #$addr) = $src"#!if(isHalf, ".h","")>, NewValueRel { + bits<5> dst; + bits<6> addr; + bits<5> src; + let accessSize = AccessSz; + let BaseOpcode = BaseOp#"_AbsSet"; -def : Pat <(i32 (sextloadi16 (add IntRegs:$src1, IntRegs:$src2))), - (L4_loadrh_rr IntRegs:$src1, IntRegs:$src2, 0)>, - Requires<[HasV4T]>; + let IClass = 0b1010; -def : Pat <(i32 (zextloadi16 (add IntRegs:$src1, IntRegs:$src2))), - (L4_loadruh_rr IntRegs:$src1, IntRegs:$src2, 0)>, - Requires<[HasV4T]>; + let Inst{27-24} = 0b1011; + let Inst{23-21} = MajOp; + let Inst{20-16} = dst; + let Inst{13} = 0b0; + let Inst{12-8} = src; + let Inst{7} = 0b1; + let Inst{5-0} = addr; + } -def : Pat <(i32 (extloadi16 (add IntRegs:$src1, IntRegs:$src2))), - (L4_loadruh_rr IntRegs:$src1, IntRegs:$src2, 0)>, - Requires<[HasV4T]>; +def S4_storerb_ap : T_ST_absset <"memb", "STrib", IntRegs, 0b000, ByteAccess>; +def S4_storerh_ap : T_ST_absset <"memh", "STrih", IntRegs, 0b010, + HalfWordAccess>; +def S4_storeri_ap : T_ST_absset <"memw", "STriw", IntRegs, 0b100, WordAccess>; -def : Pat <(i32 (load (add IntRegs:$src1, IntRegs:$src2))), - (L4_loadri_rr IntRegs:$src1, IntRegs:$src2, 0)>, - Requires<[HasV4T]>; +let isNVStorable = 0 in { + def S4_storerf_ap : T_ST_absset <"memh", "STrif", IntRegs, + 0b011, HalfWordAccess, 1>; + def S4_storerd_ap : T_ST_absset <"memd", "STrid", DoubleRegs, + 0b110, DoubleWordAccess>; } -// zext i1->i64 -def : Pat <(i64 (zext (i1 PredRegs:$src1))), - (i64 (A4_combineir 0, (C2_muxii (i1 PredRegs:$src1), 1, 0)))>, - Requires<[HasV4T]>; +let opExtendable = 1, isNewValue = 1, isNVStore = 1, opNewValue = 2, +isExtended = 1, opExtentBits= 6 in +class T_ST_absset_nv <string mnemonic, string BaseOp, bits<2> MajOp, + MemAccessSize AccessSz > + : NVInst <(outs IntRegs:$dst), + (ins u6Ext:$addr, IntRegs:$src), + mnemonic#"($dst = #$addr) = $src.new">, NewValueRel { + bits<5> dst; + bits<6> addr; + bits<3> src; + let accessSize = AccessSz; + let BaseOpcode = BaseOp#"_AbsSet"; -// zext i32->i64 -def : Pat <(i64 (zext (i32 IntRegs:$src1))), - (i64 (A4_combineir 0, (i32 IntRegs:$src1)))>, - Requires<[HasV4T]>; -// zext i8->i64 -def: Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)), - (i64 (A4_combineir 0, (L2_loadrub_io AddrFI:$src1, 0)))>, - Requires<[HasV4T]>; - -let AddedComplexity = 20 in -def: Pat <(i64 (zextloadi8 (add (i32 IntRegs:$src1), - s11_0ExtPred:$offset))), - (i64 (A4_combineir 0, (L2_loadrub_io IntRegs:$src1, - s11_0ExtPred:$offset)))>, - Requires<[HasV4T]>; + let IClass = 0b1010; -// zext i1->i64 -def: Pat <(i64 (zextloadi1 ADDRriS11_0:$src1)), - (i64 (A4_combineir 0, (L2_loadrub_io AddrFI:$src1, 0)))>, - Requires<[HasV4T]>; - -let AddedComplexity = 20 in -def: Pat <(i64 (zextloadi1 (add (i32 IntRegs:$src1), - s11_0ExtPred:$offset))), - (i64 (A4_combineir 0, (L2_loadrub_io IntRegs:$src1, - s11_0ExtPred:$offset)))>, - Requires<[HasV4T]>; - -// zext i16->i64 -def: Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)), - (i64 (A4_combineir 0, (L2_loadruh_io AddrFI:$src1, 0)))>, - Requires<[HasV4T]>; - -let AddedComplexity = 20 in -def: Pat <(i64 (zextloadi16 (add (i32 IntRegs:$src1), - s11_1ExtPred:$offset))), - (i64 (A4_combineir 0, (L2_loadruh_io IntRegs:$src1, - s11_1ExtPred:$offset)))>, - Requires<[HasV4T]>; - -// anyext i16->i64 -def: Pat <(i64 (extloadi16 ADDRriS11_2:$src1)), - (i64 (A4_combineir 0, (L2_loadrh_io AddrFI:$src1, 0)))>, - Requires<[HasV4T]>; - -let AddedComplexity = 20 in -def: Pat <(i64 (extloadi16 (add (i32 IntRegs:$src1), - s11_1ExtPred:$offset))), - (i64 (A4_combineir 0, (L2_loadrh_io IntRegs:$src1, - s11_1ExtPred:$offset)))>, - Requires<[HasV4T]>; + let Inst{27-21} = 0b1011101; + let Inst{20-16} = dst; + let Inst{13-11} = 0b000; + let Inst{12-11} = MajOp; + let Inst{10-8} = src; + let Inst{7} = 0b1; + let Inst{5-0} = addr; + } -// zext i32->i64 -def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)), - (i64 (A4_combineir 0, (L2_loadri_io AddrFI:$src1, 0)))>, - Requires<[HasV4T]>; +let mayStore = 1, addrMode = AbsoluteSet in { + def S4_storerbnew_ap : T_ST_absset_nv <"memb", "STrib", 0b00, ByteAccess>; + def S4_storerhnew_ap : T_ST_absset_nv <"memh", "STrih", 0b01, HalfWordAccess>; + def S4_storerinew_ap : T_ST_absset_nv <"memw", "STriw", 0b10, WordAccess>; +} -let AddedComplexity = 100 in -def: Pat <(i64 (zextloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))), - (i64 (A4_combineir 0, (L2_loadri_io IntRegs:$src1, - s11_2ExtPred:$offset)))>, - Requires<[HasV4T]>; +let isExtended = 1, opExtendable = 2, opExtentBits = 6, InputType = "imm", +addrMode = BaseLongOffset, AddedComplexity = 40 in +class T_StoreAbsReg <string mnemonic, string CextOp, RegisterClass RC, + bits<3> MajOp, MemAccessSize AccessSz, bit isHalf = 0> + : STInst<(outs), + (ins IntRegs:$src1, u2Imm:$src2, u6Ext:$src3, RC:$src4), + mnemonic#"($src1<<#$src2 + #$src3) = $src4"#!if(isHalf, ".h",""), + []>, ImmRegShl, NewValueRel { + + bits<5> src1; + bits<2> src2; + bits<6> src3; + bits<5> src4; -// anyext i32->i64 -def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)), - (i64 (A4_combineir 0, (L2_loadri_io AddrFI:$src1, 0)))>, - Requires<[HasV4T]>; + let accessSize = AccessSz; + let CextOpcode = CextOp; + let BaseOpcode = CextOp#"_shl"; + let IClass = 0b1010; -let AddedComplexity = 100 in -def: Pat <(i64 (extloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))), - (i64 (A4_combineir 0, (L2_loadri_io IntRegs:$src1, - s11_2ExtPred:$offset)))>, - Requires<[HasV4T]>; + let Inst{27-24} =0b1101; + let Inst{23-21} = MajOp; + let Inst{20-16} = src1; + let Inst{13} = src2{1}; + let Inst{12-8} = src4; + let Inst{7} = 0b1; + let Inst{6} = src2{0}; + let Inst{5-0} = src3; +} +def S4_storerb_ur : T_StoreAbsReg <"memb", "STrib", IntRegs, 0b000, ByteAccess>; +def S4_storerh_ur : T_StoreAbsReg <"memh", "STrih", IntRegs, 0b010, + HalfWordAccess>; +def S4_storerf_ur : T_StoreAbsReg <"memh", "STrif", IntRegs, 0b011, + HalfWordAccess, 1>; +def S4_storeri_ur : T_StoreAbsReg <"memw", "STriw", IntRegs, 0b100, WordAccess>; +def S4_storerd_ur : T_StoreAbsReg <"memd", "STrid", DoubleRegs, 0b110, + DoubleWordAccess>; +let AddedComplexity = 40 in +multiclass T_StoreAbsReg_Pats <InstHexagon MI, RegisterClass RC, ValueType VT, + PatFrag stOp> { + def : Pat<(stOp (VT RC:$src4), + (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2), + u32ImmPred:$src3)), + (MI IntRegs:$src1, u2ImmPred:$src2, u32ImmPred:$src3, RC:$src4)>; -//===----------------------------------------------------------------------===// -// LD - -//===----------------------------------------------------------------------===// + def : Pat<(stOp (VT RC:$src4), + (add (shl IntRegs:$src1, u2ImmPred:$src2), + (HexagonCONST32 tglobaladdr:$src3))), + (MI IntRegs:$src1, u2ImmPred:$src2, tglobaladdr:$src3, RC:$src4)>; -//===----------------------------------------------------------------------===// -// ST + -//===----------------------------------------------------------------------===// -/// -//===----------------------------------------------------------------------===// -// Template class for store instructions with Absolute set addressing mode. -//===----------------------------------------------------------------------===// -let isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT, -addrMode = AbsoluteSet in -class T_ST_abs_set<string mnemonic, RegisterClass RC>: - STInst2<(outs IntRegs:$dst1), - (ins RC:$src1, u0AlwaysExt:$src2), - mnemonic#"($dst1=##$src2) = $src1", - []>, - Requires<[HasV4T]>; + def : Pat<(stOp (VT RC:$src4), + (add IntRegs:$src1, (HexagonCONST32 tglobaladdr:$src3))), + (MI IntRegs:$src1, 0, tglobaladdr:$src3, RC:$src4)>; +} -def STrid_abs_set_V4 : T_ST_abs_set <"memd", DoubleRegs>; -def STrib_abs_set_V4 : T_ST_abs_set <"memb", IntRegs>; -def STrih_abs_set_V4 : T_ST_abs_set <"memh", IntRegs>; -def STriw_abs_set_V4 : T_ST_abs_set <"memw", IntRegs>; +defm : T_StoreAbsReg_Pats <S4_storerd_ur, DoubleRegs, i64, store>; +defm : T_StoreAbsReg_Pats <S4_storeri_ur, IntRegs, i32, store>; +defm : T_StoreAbsReg_Pats <S4_storerb_ur, IntRegs, i32, truncstorei8>; +defm : T_StoreAbsReg_Pats <S4_storerh_ur, IntRegs, i32, truncstorei16>; + +let mayStore = 1, isNVStore = 1, isExtended = 1, addrMode = BaseLongOffset, + opExtentBits = 6, isNewValue = 1, opNewValue = 3, opExtendable = 2 in +class T_StoreAbsRegNV <string mnemonic, string CextOp, bits<2> MajOp, + MemAccessSize AccessSz> + : NVInst <(outs ), + (ins IntRegs:$src1, u2Imm:$src2, u6Ext:$src3, IntRegs:$src4), + mnemonic#"($src1<<#$src2 + #$src3) = $src4.new">, NewValueRel { + bits<5> src1; + bits<2> src2; + bits<6> src3; + bits<3> src4; + + let CextOpcode = CextOp; + let BaseOpcode = CextOp#"_shl"; + let IClass = 0b1010; + + let Inst{27-21} = 0b1101101; + let Inst{12-11} = 0b00; + let Inst{7} = 0b1; + let Inst{20-16} = src1; + let Inst{13} = src2{1}; + let Inst{12-11} = MajOp; + let Inst{10-8} = src4; + let Inst{6} = src2{0}; + let Inst{5-0} = src3; + } + +def S4_storerbnew_ur : T_StoreAbsRegNV <"memb", "STrib", 0b00, ByteAccess>; +def S4_storerhnew_ur : T_StoreAbsRegNV <"memh", "STrih", 0b01, HalfWordAccess>; +def S4_storerinew_ur : T_StoreAbsRegNV <"memw", "STriw", 0b10, WordAccess>; //===----------------------------------------------------------------------===// // Template classes for the non-predicated store instructions with @@ -804,8 +1003,7 @@ multiclass ST_Idxd_shl_nv <string mnemonic, string CextOp, RegisterClass RC, } } -let addrMode = BaseRegOffset, InputType = "reg", hasSideEffects = 0, - isCodeGenOnly = 0 in { +let addrMode = BaseRegOffset, InputType = "reg", hasSideEffects = 0 in { let accessSize = ByteAccess in defm storerb: ST_Idxd_shl<"memb", "STrib", IntRegs, 0b000>, ST_Idxd_shl_nv<"memb", "STrib", IntRegs, 0b00>; @@ -825,83 +1023,18 @@ let addrMode = BaseRegOffset, InputType = "reg", hasSideEffects = 0, defm storerf: ST_Idxd_shl<"memh", "STrif", IntRegs, 0b011, 1>; } -let Predicates = [HasV4T], AddedComplexity = 10 in { -def : Pat<(truncstorei8 (i32 IntRegs:$src4), - (add IntRegs:$src1, (shl IntRegs:$src2, - u2ImmPred:$src3))), - (S4_storerb_rr IntRegs:$src1, IntRegs:$src2, - u2ImmPred:$src3, IntRegs:$src4)>; - -def : Pat<(truncstorei16 (i32 IntRegs:$src4), - (add IntRegs:$src1, (shl IntRegs:$src2, - u2ImmPred:$src3))), - (S4_storerh_rr IntRegs:$src1, IntRegs:$src2, - u2ImmPred:$src3, IntRegs:$src4)>; - -def : Pat<(store (i32 IntRegs:$src4), - (add IntRegs:$src1, (shl IntRegs:$src2, u2ImmPred:$src3))), - (S4_storeri_rr IntRegs:$src1, IntRegs:$src2, - u2ImmPred:$src3, IntRegs:$src4)>; - -def : Pat<(store (i64 DoubleRegs:$src4), - (add IntRegs:$src1, (shl IntRegs:$src2, u2ImmPred:$src3))), - (S4_storerd_rr IntRegs:$src1, IntRegs:$src2, - u2ImmPred:$src3, DoubleRegs:$src4)>; -} - -let isExtended = 1, opExtendable = 2 in -class T_ST_LongOff <string mnemonic, PatFrag stOp, RegisterClass RC, ValueType VT> : - STInst<(outs), - (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, RC:$src4), - mnemonic#"($src1<<#$src2+##$src3) = $src4", - [(stOp (VT RC:$src4), - (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2), - u0AlwaysExtPred:$src3))]>, - Requires<[HasV4T]>; - -let isExtended = 1, opExtendable = 2, mayStore = 1, isNVStore = 1 in -class T_ST_LongOff_nv <string mnemonic> : - NVInst_V4<(outs), - (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4), - mnemonic#"($src1<<#$src2+##$src3) = $src4.new", - []>, - Requires<[HasV4T]>; - -multiclass ST_LongOff <string mnemonic, string BaseOp, PatFrag stOp> { - let BaseOpcode = BaseOp#"_shl" in { - let isNVStorable = 1 in - def NAME#_V4 : T_ST_LongOff<mnemonic, stOp, IntRegs, i32>; - - def NAME#_nv_V4 : T_ST_LongOff_nv<mnemonic>; - } -} - -let AddedComplexity = 10, validSubTargets = HasV4SubT in { - def STrid_shl_V4 : T_ST_LongOff<"memd", store, DoubleRegs, i64>; - defm STrib_shl : ST_LongOff <"memb", "STrib", truncstorei8>, NewValueRel; - defm STrih_shl : ST_LongOff <"memh", "Strih", truncstorei16>, NewValueRel; - defm STriw_shl : ST_LongOff <"memw", "STriw", store>, NewValueRel; -} - -let AddedComplexity = 40 in -multiclass T_ST_LOff_Pats <InstHexagon I, RegisterClass RC, ValueType VT, - PatFrag stOp> { - def : Pat<(stOp (VT RC:$src4), - (add (shl IntRegs:$src1, u2ImmPred:$src2), - (NumUsesBelowThresCONST32 tglobaladdr:$src3))), - (I IntRegs:$src1, u2ImmPred:$src2, tglobaladdr:$src3, RC:$src4)>; +class Storexs_pat<PatFrag Store, PatFrag Value, InstHexagon MI> + : Pat<(Store Value:$Ru, (add (i32 IntRegs:$Rs), + (i32 (shl (i32 IntRegs:$Rt), u2ImmPred:$u2)))), + (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2, Value:$Ru)>; - def : Pat<(stOp (VT RC:$src4), - (add IntRegs:$src1, - (NumUsesBelowThresCONST32 tglobaladdr:$src3))), - (I IntRegs:$src1, 0, tglobaladdr:$src3, RC:$src4)>; +let AddedComplexity = 40 in { + def: Storexs_pat<truncstorei8, I32, S4_storerb_rr>; + def: Storexs_pat<truncstorei16, I32, S4_storerh_rr>; + def: Storexs_pat<store, I32, S4_storeri_rr>; + def: Storexs_pat<store, I64, S4_storerd_rr>; } -defm : T_ST_LOff_Pats<STrid_shl_V4, DoubleRegs, i64, store>; -defm : T_ST_LOff_Pats<STriw_shl_V4, IntRegs, i32, store>; -defm : T_ST_LOff_Pats<STrib_shl_V4, IntRegs, i32, truncstorei8>; -defm : T_ST_LOff_Pats<STrih_shl_V4, IntRegs, i32, truncstorei16>; - // memd(Rx++#s4:3)=Rtt // memd(Rx++#s4:3:circ(Mu))=Rtt // memd(Rx++I:circ(Mu))=Rtt @@ -1004,8 +1137,8 @@ multiclass ST_Imm <string mnemonic, string CextOp, Operand OffsetOp, } } -let hasSideEffects = 0, validSubTargets = HasV4SubT, addrMode = BaseImmOffset, - InputType = "imm", isCodeGenOnly = 0 in { +let hasSideEffects = 0, addrMode = BaseImmOffset, + InputType = "imm" in { let accessSize = ByteAccess in defm S4_storeirb : ST_Imm<"memb", "STrib", u6_0Imm, 0b00>; @@ -1016,22 +1149,49 @@ let hasSideEffects = 0, validSubTargets = HasV4SubT, addrMode = BaseImmOffset, defm S4_storeiri : ST_Imm<"memw", "STriw", u6_2Imm, 0b10>; } -let Predicates = [HasV4T], AddedComplexity = 10 in { -def: Pat<(truncstorei8 s8ExtPred:$src3, (add IntRegs:$src1, u6_0ImmPred:$src2)), - (S4_storeirb_io IntRegs:$src1, u6_0ImmPred:$src2, s8ExtPred:$src3)>; +def IMM_BYTE : SDNodeXForm<imm, [{ + // -1 etc is represented as 255 etc + // assigning to a byte restores our desired signed value. + int8_t imm = N->getSExtValue(); + return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32); +}]>; + +def IMM_HALF : SDNodeXForm<imm, [{ + // -1 etc is represented as 65535 etc + // assigning to a short restores our desired signed value. + int16_t imm = N->getSExtValue(); + return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32); +}]>; + +def IMM_WORD : SDNodeXForm<imm, [{ + // -1 etc can be represented as 4294967295 etc + // Currently, it's not doing this. But some optimization + // might convert -1 to a large +ve number. + // assigning to a word restores our desired signed value. + int32_t imm = N->getSExtValue(); + return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32); +}]>; -def: Pat<(truncstorei16 s8ExtPred:$src3, (add IntRegs:$src1, - u6_1ImmPred:$src2)), - (S4_storeirh_io IntRegs:$src1, u6_1ImmPred:$src2, s8ExtPred:$src3)>; +def ToImmByte : OutPatFrag<(ops node:$R), (IMM_BYTE $R)>; +def ToImmHalf : OutPatFrag<(ops node:$R), (IMM_HALF $R)>; +def ToImmWord : OutPatFrag<(ops node:$R), (IMM_WORD $R)>; -def: Pat<(store s8ExtPred:$src3, (add IntRegs:$src1, u6_2ImmPred:$src2)), - (S4_storeiri_io IntRegs:$src1, u6_2ImmPred:$src2, s8ExtPred:$src3)>; +let AddedComplexity = 40 in { + // Not using frameindex patterns for these stores, because the offset + // is not extendable. This could cause problems during removing the frame + // indices, since the offset with respect to R29/R30 may not fit in the + // u6 field. + def: Storexm_add_pat<truncstorei8, s32ImmPred, u6_0ImmPred, ToImmByte, + S4_storeirb_io>; + def: Storexm_add_pat<truncstorei16, s32ImmPred, u6_1ImmPred, ToImmHalf, + S4_storeirh_io>; + def: Storexm_add_pat<store, s32ImmPred, u6_2ImmPred, ToImmWord, + S4_storeiri_io>; } -let AddedComplexity = 6 in -def : Pat <(truncstorei8 s8ExtPred:$src2, (i32 IntRegs:$src1)), - (S4_storeirb_io IntRegs:$src1, 0, s8ExtPred:$src2)>, - Requires<[HasV4T]>; +def: Storexm_simple_pat<truncstorei8, s32ImmPred, ToImmByte, S4_storeirb_io>; +def: Storexm_simple_pat<truncstorei16, s32ImmPred, ToImmHalf, S4_storeirh_io>; +def: Storexm_simple_pat<store, s32ImmPred, ToImmWord, S4_storeiri_io>; // memb(Rx++#s4:0:circ(Mu))=Rt // memb(Rx++I:circ(Mu))=Rt @@ -1039,16 +1199,10 @@ def : Pat <(truncstorei8 s8ExtPred:$src2, (i32 IntRegs:$src1)), // memb(Rx++Mu:brev)=Rt // memb(gp+#u16:0)=Rt - // Store halfword. // TODO: needs to be implemented // memh(Re=#U6)=Rt.H // memh(Rs+#s11:1)=Rt.H -let AddedComplexity = 6 in -def : Pat <(truncstorei16 s8ExtPred:$src2, (i32 IntRegs:$src1)), - (S4_storeirh_io IntRegs:$src1, 0, s8ExtPred:$src2)>, - Requires<[HasV4T]>; - // memh(Rs+Ru<<#u2)=Rt.H // TODO: needs to be implemented. @@ -1065,7 +1219,6 @@ def : Pat <(truncstorei16 s8ExtPred:$src2, (i32 IntRegs:$src1)), // if ([!]Pv[.new]) memh(#u6)=Rt.H // if ([!]Pv[.new]) memh(#u6)=Rt - // if ([!]Pv[.new]) memh(Rs+#u6:1)=Rt.H // TODO: needs to be implemented. @@ -1075,20 +1228,6 @@ def : Pat <(truncstorei16 s8ExtPred:$src2, (i32 IntRegs:$src1)), // Store word. // memw(Re=#U6)=Rt // TODO: Needs to be implemented. - -// Store predicate: -let hasSideEffects = 0 in -def STriw_pred_V4 : STInst2<(outs), - (ins MEMri:$addr, PredRegs:$src1), - "Error; should not emit", - []>, - Requires<[HasV4T]>; - -let AddedComplexity = 6 in -def : Pat <(store s8ExtPred:$src2, (i32 IntRegs:$src1)), - (S4_storeiri_io IntRegs:$src1, 0, s8ExtPred:$src2)>, - Requires<[HasV4T]>; - // memw(Rx++#s4:2)=Rt // memw(Rx++#s4:2:circ(Mu))=Rt // memw(Rx++I:circ(Mu))=Rt @@ -1203,7 +1342,7 @@ multiclass ST_Idxd_nv<string mnemonic, string CextOp, RegisterClass RC, } } -let addrMode = BaseImmOffset, InputType = "imm", isCodeGenOnly = 0 in { +let addrMode = BaseImmOffset, InputType = "imm" in { let accessSize = ByteAccess in defm storerb: ST_Idxd_nv<"memb", "STrib", IntRegs, s11_0Ext, u6_0Ext, 0b00>, AddrModeRel; @@ -1218,11 +1357,45 @@ let addrMode = BaseImmOffset, InputType = "imm", isCodeGenOnly = 0 in { } //===----------------------------------------------------------------------===// +// Post increment loads with register offset. +//===----------------------------------------------------------------------===// + +let hasNewValue = 1 in +def L2_loadbsw2_pr : T_load_pr <"membh", IntRegs, 0b0001, HalfWordAccess>; + +def L2_loadbsw4_pr : T_load_pr <"membh", DoubleRegs, 0b0111, WordAccess>; + +let hasSideEffects = 0, addrMode = PostInc in +class T_loadalign_pr <string mnemonic, bits<4> MajOp, MemAccessSize AccessSz> + : LDInstPI <(outs DoubleRegs:$dst, IntRegs:$_dst_), + (ins DoubleRegs:$src1, IntRegs:$src2, ModRegs:$src3), + "$dst = "#mnemonic#"($src2++$src3)", [], + "$src1 = $dst, $src2 = $_dst_"> { + bits<5> dst; + bits<5> src2; + bits<1> src3; + + let accessSize = AccessSz; + let IClass = 0b1001; + + let Inst{27-25} = 0b110; + let Inst{24-21} = MajOp; + let Inst{20-16} = src2; + let Inst{13} = src3; + let Inst{12} = 0b0; + let Inst{7} = 0b0; + let Inst{4-0} = dst; + } + +def L2_loadalignb_pr : T_loadalign_pr <"memb_fifo", 0b0100, ByteAccess>; +def L2_loadalignh_pr : T_loadalign_pr <"memh_fifo", 0b0010, HalfWordAccess>; + +//===----------------------------------------------------------------------===// // Template class for non-predicated post increment .new stores // mem[bhwd](Rx++#s4:[0123])=Nt.new //===----------------------------------------------------------------------===// -let isPredicable = 1, hasSideEffects = 0, validSubTargets = HasV4SubT, - addrMode = PostInc, isNVStore = 1, isNewValue = 1, opNewValue = 3 in +let isPredicable = 1, hasSideEffects = 0, addrMode = PostInc, isNVStore = 1, + isNewValue = 1, opNewValue = 3 in class T_StorePI_nv <string mnemonic, Operand ImmOp, bits<2> MajOp > : NVInstPI_V4 <(outs IntRegs:$_dst_), (ins IntRegs:$src1, ImmOp:$offset, IntRegs:$src2), @@ -1254,8 +1427,8 @@ class T_StorePI_nv <string mnemonic, Operand ImmOp, bits<2> MajOp > // Template class for predicated post increment .new stores // if([!]Pv[.new]) mem[bhwd](Rx++#s4:[0123])=Nt.new //===----------------------------------------------------------------------===// -let isPredicated = 1, hasSideEffects = 0, validSubTargets = HasV4SubT, - addrMode = PostInc, isNVStore = 1, isNewValue = 1, opNewValue = 4 in +let isPredicated = 1, hasSideEffects = 0, addrMode = PostInc, isNVStore = 1, + isNewValue = 1, opNewValue = 4 in class T_StorePI_nv_pred <string mnemonic, Operand ImmOp, bits<2> MajOp, bit isPredNot, bit isPredNew > : NVInstPI_V4 <(outs IntRegs:$_dst_), @@ -1310,13 +1483,13 @@ multiclass ST_PostInc_nv<string mnemonic, string BaseOp, Operand ImmOp, } } -let accessSize = ByteAccess, isCodeGenOnly = 0 in +let accessSize = ByteAccess in defm storerbnew: ST_PostInc_nv <"memb", "STrib", s4_0Imm, 0b00>; -let accessSize = HalfWordAccess, isCodeGenOnly = 0 in +let accessSize = HalfWordAccess in defm storerhnew: ST_PostInc_nv <"memh", "STrih", s4_1Imm, 0b01>; -let accessSize = WordAccess, isCodeGenOnly = 0 in +let accessSize = WordAccess in defm storerinew: ST_PostInc_nv <"memw", "STriw", s4_2Imm, 0b10>; //===----------------------------------------------------------------------===// @@ -1343,15 +1516,12 @@ class T_StorePI_RegNV <string mnemonic, bits<2> MajOp, MemAccessSize AccessSz> let Inst{7} = 0b0; } -let isCodeGenOnly = 0 in { def S2_storerbnew_pr : T_StorePI_RegNV<"memb", 0b00, ByteAccess>; def S2_storerhnew_pr : T_StorePI_RegNV<"memh", 0b01, HalfWordAccess>; def S2_storerinew_pr : T_StorePI_RegNV<"memw", 0b10, WordAccess>; -} // memb(Rx++#s4:0:circ(Mu))=Nt.new // memb(Rx++I:circ(Mu))=Nt.new -// memb(Rx++Mu)=Nt.new // memb(Rx++Mu:brev)=Nt.new // memh(Rx++#s4:1:circ(Mu))=Nt.new // memh(Rx++I:circ(Mu))=Nt.new @@ -1401,7 +1571,7 @@ class NVJrr_template<string mnemonic, bits<3> majOp, bit NvOpNum, let RegOp = !if(!eq(NvOpNum, 0), src2, src1); let IClass = 0b0010; - let Inst{26} = 0b0; + let Inst{27-26} = 0b00; let Inst{25-23} = majOp; let Inst{22} = isNegCond; let Inst{18-16} = Ns; @@ -1415,9 +1585,9 @@ class NVJrr_template<string mnemonic, bits<3> majOp, bit NvOpNum, multiclass NVJrr_cond<string mnemonic, bits<3> majOp, bit NvOpNum, bit isNegCond> { // Branch not taken: - def _nt_V4: NVJrr_template<mnemonic, majOp, NvOpNum, isNegCond, 0>; + def _nt: NVJrr_template<mnemonic, majOp, NvOpNum, isNegCond, 0>; // Branch taken: - def _t_V4: NVJrr_template<mnemonic, majOp, NvOpNum, isNegCond, 1>; + def _t : NVJrr_template<mnemonic, majOp, NvOpNum, isNegCond, 1>; } // NvOpNum = 0 -> First Operand is a new-value Register @@ -1426,8 +1596,8 @@ multiclass NVJrr_cond<string mnemonic, bits<3> majOp, bit NvOpNum, multiclass NVJrr_base<string mnemonic, string BaseOp, bits<3> majOp, bit NvOpNum> { let BaseOpcode = BaseOp#_NVJ in { - defm _t_Jumpnv : NVJrr_cond<mnemonic, majOp, NvOpNum, 0>; // True cond - defm _f_Jumpnv : NVJrr_cond<mnemonic, majOp, NvOpNum, 1>; // False cond + defm _t_jumpnv : NVJrr_cond<mnemonic, majOp, NvOpNum, 0>; // True cond + defm _f_jumpnv : NVJrr_cond<mnemonic, majOp, NvOpNum, 1>; // False cond } } @@ -1438,13 +1608,12 @@ multiclass NVJrr_base<string mnemonic, string BaseOp, bits<3> majOp, // if ([!]cmp.gtu(Rt,Ns.new)) jump:[n]t #r9:2 let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator = 1, - Defs = [PC], hasSideEffects = 0, validSubTargets = HasV4SubT, - isCodeGenOnly = 0 in { - defm CMPEQrr : NVJrr_base<"cmp.eq", "CMPEQ", 0b000, 0>, PredRel; - defm CMPGTrr : NVJrr_base<"cmp.gt", "CMPGT", 0b001, 0>, PredRel; - defm CMPGTUrr : NVJrr_base<"cmp.gtu", "CMPGTU", 0b010, 0>, PredRel; - defm CMPLTrr : NVJrr_base<"cmp.gt", "CMPLT", 0b011, 1>, PredRel; - defm CMPLTUrr : NVJrr_base<"cmp.gtu", "CMPLTU", 0b100, 1>, PredRel; + Defs = [PC], hasSideEffects = 0 in { + defm J4_cmpeq : NVJrr_base<"cmp.eq", "CMPEQ", 0b000, 0>, PredRel; + defm J4_cmpgt : NVJrr_base<"cmp.gt", "CMPGT", 0b001, 0>, PredRel; + defm J4_cmpgtu : NVJrr_base<"cmp.gtu", "CMPGTU", 0b010, 0>, PredRel; + defm J4_cmplt : NVJrr_base<"cmp.gt", "CMPLT", 0b011, 1>, PredRel; + defm J4_cmpltu : NVJrr_base<"cmp.gtu", "CMPLTU", 0b100, 1>, PredRel; } //===----------------------------------------------------------------------===// @@ -1482,15 +1651,15 @@ class NVJri_template<string mnemonic, bits<3> majOp, bit isNegCond, multiclass NVJri_cond<string mnemonic, bits<3> majOp, bit isNegCond> { // Branch not taken: - def _nt_V4: NVJri_template<mnemonic, majOp, isNegCond, 0>; + def _nt: NVJri_template<mnemonic, majOp, isNegCond, 0>; // Branch taken: - def _t_V4: NVJri_template<mnemonic, majOp, isNegCond, 1>; + def _t : NVJri_template<mnemonic, majOp, isNegCond, 1>; } multiclass NVJri_base<string mnemonic, string BaseOp, bits<3> majOp> { let BaseOpcode = BaseOp#_NVJri in { - defm _t_Jumpnv : NVJri_cond<mnemonic, majOp, 0>; // True Cond - defm _f_Jumpnv : NVJri_cond<mnemonic, majOp, 1>; // False cond + defm _t_jumpnv : NVJri_cond<mnemonic, majOp, 0>; // True Cond + defm _f_jumpnv : NVJri_cond<mnemonic, majOp, 1>; // False cond } } @@ -1499,11 +1668,10 @@ multiclass NVJri_base<string mnemonic, string BaseOp, bits<3> majOp> { // if ([!]cmp.gtu(Ns.new,#U5)) jump:[n]t #r9:2 let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator = 1, - Defs = [PC], hasSideEffects = 0, validSubTargets = HasV4SubT, - isCodeGenOnly = 0 in { - defm CMPEQri : NVJri_base<"cmp.eq", "CMPEQ", 0b000>, PredRel; - defm CMPGTri : NVJri_base<"cmp.gt", "CMPGT", 0b001>, PredRel; - defm CMPGTUri : NVJri_base<"cmp.gtu", "CMPGTU", 0b010>, PredRel; + Defs = [PC], hasSideEffects = 0 in { + defm J4_cmpeqi : NVJri_base<"cmp.eq", "CMPEQ", 0b000>, PredRel; + defm J4_cmpgti : NVJri_base<"cmp.gt", "CMPGT", 0b001>, PredRel; + defm J4_cmpgtui : NVJri_base<"cmp.gtu", "CMPGTU", 0b010>, PredRel; } //===----------------------------------------------------------------------===// @@ -1540,16 +1708,16 @@ class NVJ_ConstImm_template<string mnemonic, bits<3> majOp, string ImmVal, multiclass NVJ_ConstImm_cond<string mnemonic, bits<3> majOp, string ImmVal, bit isNegCond> { // Branch not taken: - def _nt_V4: NVJ_ConstImm_template<mnemonic, majOp, ImmVal, isNegCond, 0>; + def _nt: NVJ_ConstImm_template<mnemonic, majOp, ImmVal, isNegCond, 0>; // Branch taken: - def _t_V4: NVJ_ConstImm_template<mnemonic, majOp, ImmVal, isNegCond, 1>; + def _t : NVJ_ConstImm_template<mnemonic, majOp, ImmVal, isNegCond, 1>; } multiclass NVJ_ConstImm_base<string mnemonic, string BaseOp, bits<3> majOp, string ImmVal> { let BaseOpcode = BaseOp#_NVJ_ConstImm in { - defm _t_Jumpnv : NVJ_ConstImm_cond<mnemonic, majOp, ImmVal, 0>; // True - defm _f_Jumpnv : NVJ_ConstImm_cond<mnemonic, majOp, ImmVal, 1>; // False + defm _t_jumpnv : NVJ_ConstImm_cond<mnemonic, majOp, ImmVal, 0>; // True + defm _f_jumpnv : NVJ_ConstImm_cond<mnemonic, majOp, ImmVal, 1>; // False } } @@ -1558,14 +1726,14 @@ multiclass NVJ_ConstImm_base<string mnemonic, string BaseOp, bits<3> majOp, // if ([!]cmp.gt(Ns.new,#-1)) jump:[n]t #r9:2 let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator=1, - Defs = [PC], hasSideEffects = 0, isCodeGenOnly = 0 in { - defm TSTBIT0 : NVJ_ConstImm_base<"tstbit", "TSTBIT", 0b011, "0">, PredRel; - defm CMPEQn1 : NVJ_ConstImm_base<"cmp.eq", "CMPEQ", 0b100, "-1">, PredRel; - defm CMPGTn1 : NVJ_ConstImm_base<"cmp.gt", "CMPGT", 0b101, "-1">, PredRel; + Defs = [PC], hasSideEffects = 0 in { + defm J4_tstbit0 : NVJ_ConstImm_base<"tstbit", "TSTBIT", 0b011, "0">, PredRel; + defm J4_cmpeqn1 : NVJ_ConstImm_base<"cmp.eq", "CMPEQ", 0b100, "-1">, PredRel; + defm J4_cmpgtn1 : NVJ_ConstImm_base<"cmp.gt", "CMPGT", 0b101, "-1">, PredRel; } // J4_hintjumpr: Hint indirect conditional jump. -let isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0, isCodeGenOnly = 0 in +let isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in def J4_hintjumpr: JRInst < (outs), (ins IntRegs:$Rs), @@ -1586,8 +1754,7 @@ def J4_hintjumpr: JRInst < // PC-relative add let hasNewValue = 1, isExtendable = 1, opExtendable = 1, - isExtentSigned = 0, opExtentBits = 6, hasSideEffects = 0, - Uses = [PC], validSubTargets = HasV4SubT in + isExtentSigned = 0, opExtentBits = 6, hasSideEffects = 0, Uses = [PC] in def C4_addipc : CRInst <(outs IntRegs:$Rd), (ins u6Ext:$u6), "$Rd = add(pc, #$u6)", [], "", CR_tc_2_SLOT3 > { bits<5> Rd; @@ -1625,7 +1792,6 @@ class T_LOGICAL_3OP<string MnOp1, string MnOp2, bits<2> OpBits, bit IsNeg> let Inst{1-0} = Pd; } -let isCodeGenOnly = 0 in { def C4_and_and : T_LOGICAL_3OP<"and", "and", 0b00, 0>; def C4_and_or : T_LOGICAL_3OP<"and", "or", 0b01, 0>; def C4_or_and : T_LOGICAL_3OP<"or", "and", 0b10, 0>; @@ -1634,7 +1800,69 @@ def C4_and_andn : T_LOGICAL_3OP<"and", "and", 0b00, 1>; def C4_and_orn : T_LOGICAL_3OP<"and", "or", 0b01, 1>; def C4_or_andn : T_LOGICAL_3OP<"or", "and", 0b10, 1>; def C4_or_orn : T_LOGICAL_3OP<"or", "or", 0b11, 1>; -} + +// op(Ps, op(Pt, Pu)) +class LogLog_pat<SDNode Op1, SDNode Op2, InstHexagon MI> + : Pat<(i1 (Op1 I1:$Ps, (Op2 I1:$Pt, I1:$Pu))), + (MI I1:$Ps, I1:$Pt, I1:$Pu)>; + +// op(Ps, op(Pt, ~Pu)) +class LogLogNot_pat<SDNode Op1, SDNode Op2, InstHexagon MI> + : Pat<(i1 (Op1 I1:$Ps, (Op2 I1:$Pt, (not I1:$Pu)))), + (MI I1:$Ps, I1:$Pt, I1:$Pu)>; + +def: LogLog_pat<and, and, C4_and_and>; +def: LogLog_pat<and, or, C4_and_or>; +def: LogLog_pat<or, and, C4_or_and>; +def: LogLog_pat<or, or, C4_or_or>; + +def: LogLogNot_pat<and, and, C4_and_andn>; +def: LogLogNot_pat<and, or, C4_and_orn>; +def: LogLogNot_pat<or, and, C4_or_andn>; +def: LogLogNot_pat<or, or, C4_or_orn>; + +//===----------------------------------------------------------------------===// +// PIC: Support for PIC compilations. The patterns and SD nodes defined +// below are needed to support code generation for PIC +//===----------------------------------------------------------------------===// + +def SDT_HexagonPICAdd + : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; +def SDT_HexagonGOTAdd + : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; + +def SDT_HexagonGOTAddInternal : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>; +def SDT_HexagonGOTAddInternalJT : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>; +def SDT_HexagonGOTAddInternalBA : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>; + +def Hexagonpic_add : SDNode<"HexagonISD::PIC_ADD", SDT_HexagonPICAdd>; +def Hexagonat_got : SDNode<"HexagonISD::AT_GOT", SDT_HexagonGOTAdd>; +def Hexagongat_pcrel : SDNode<"HexagonISD::AT_PCREL", + SDT_HexagonGOTAddInternal>; +def Hexagongat_pcrel_jt : SDNode<"HexagonISD::AT_PCREL", + SDT_HexagonGOTAddInternalJT>; +def Hexagongat_pcrel_ba : SDNode<"HexagonISD::AT_PCREL", + SDT_HexagonGOTAddInternalBA>; + +// PIC: Map from a block address computation to a PC-relative add +def: Pat<(Hexagongat_pcrel_ba tblockaddress:$src1), + (C4_addipc u32ImmPred:$src1)>; + +// PIC: Map from the computation to generate a GOT pointer to a PC-relative add +def: Pat<(Hexagonpic_add texternalsym:$src1), + (C4_addipc u32ImmPred:$src1)>; + +// PIC: Map from a jump table address computation to a PC-relative add +def: Pat<(Hexagongat_pcrel_jt tjumptable:$src1), + (C4_addipc u32ImmPred:$src1)>; + +// PIC: Map from a GOT-relative symbol reference to a load +def: Pat<(Hexagonat_got (i32 IntRegs:$src1), tglobaladdr:$src2), + (L2_loadri_io IntRegs:$src1, s30_2ImmPred:$src2)>; + +// PIC: Map from a static symbol reference to a PC-relative add +def: Pat<(Hexagongat_pcrel tglobaladdr:$src1), + (C4_addipc u32ImmPred:$src1)>; //===----------------------------------------------------------------------===// // CR - @@ -1645,12 +1873,15 @@ def C4_or_orn : T_LOGICAL_3OP<"or", "or", 0b11, 1>; //===----------------------------------------------------------------------===// // Logical with-not instructions. -let validSubTargets = HasV4SubT, isCodeGenOnly = 0 in { - def A4_andnp : T_ALU64_logical<"and", 0b001, 1, 0, 1>; - def A4_ornp : T_ALU64_logical<"or", 0b011, 1, 0, 1>; -} +def A4_andnp : T_ALU64_logical<"and", 0b001, 1, 0, 1>; +def A4_ornp : T_ALU64_logical<"or", 0b011, 1, 0, 1>; + +def: Pat<(i64 (and (i64 DoubleRegs:$Rs), (i64 (not (i64 DoubleRegs:$Rt))))), + (A4_andnp DoubleRegs:$Rs, DoubleRegs:$Rt)>; +def: Pat<(i64 (or (i64 DoubleRegs:$Rs), (i64 (not (i64 DoubleRegs:$Rt))))), + (A4_ornp DoubleRegs:$Rs, DoubleRegs:$Rt)>; -let hasNewValue = 1, hasSideEffects = 0, isCodeGenOnly = 0 in +let hasNewValue = 1, hasSideEffects = 0 in def S4_parity: ALU64Inst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt), "$Rd = parity($Rs, $Rt)", [], "", ALU64_tc_2_SLOT23> { bits<5> Rd; @@ -1663,15 +1894,16 @@ def S4_parity: ALU64Inst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt), let Inst{12-8} = Rt; let Inst{4-0} = Rd; } + // Add and accumulate. // Rd=add(Rs,add(Ru,#s6)) let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1, opExtentBits = 6, - opExtendable = 3, isCodeGenOnly = 0 in + opExtendable = 3 in def S4_addaddi : ALU64Inst <(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Ru, s6Ext:$s6), "$Rd = add($Rs, add($Ru, #$s6))" , [(set (i32 IntRegs:$Rd), (add (i32 IntRegs:$Rs), - (add (i32 IntRegs:$Ru), s6_16ExtPred:$s6)))], + (add (i32 IntRegs:$Ru), s16_16ImmPred:$s6)))], "", ALU64_tc_2_SLOT23> { bits<5> Rd; bits<5> Rs; @@ -1690,7 +1922,7 @@ def S4_addaddi : ALU64Inst <(outs IntRegs:$Rd), } let isExtentSigned = 1, hasSideEffects = 0, hasNewValue = 1, isExtendable = 1, - opExtentBits = 6, opExtendable = 2, isCodeGenOnly = 0 in + opExtentBits = 6, opExtendable = 2 in def S4_subaddi: ALU64Inst <(outs IntRegs:$Rd), (ins IntRegs:$Rs, s6Ext:$s6, IntRegs:$Ru), "$Rd = add($Rs, sub(#$s6, $Ru))", @@ -1710,31 +1942,64 @@ def S4_subaddi: ALU64Inst <(outs IntRegs:$Rd), let Inst{7-5} = s6{2-0}; let Inst{4-0} = Ru; } - + +// Rd=add(Rs,sub(#s6,Ru)) +def: Pat<(add (i32 IntRegs:$src1), (sub s32ImmPred:$src2, + (i32 IntRegs:$src3))), + (S4_subaddi IntRegs:$src1, s32ImmPred:$src2, IntRegs:$src3)>; + +// Rd=sub(add(Rs,#s6),Ru) +def: Pat<(sub (add (i32 IntRegs:$src1), s32ImmPred:$src2), + (i32 IntRegs:$src3)), + (S4_subaddi IntRegs:$src1, s32ImmPred:$src2, IntRegs:$src3)>; + +// Rd=add(sub(Rs,Ru),#s6) +def: Pat<(add (sub (i32 IntRegs:$src1), (i32 IntRegs:$src3)), + (s32ImmPred:$src2)), + (S4_subaddi IntRegs:$src1, s32ImmPred:$src2, IntRegs:$src3)>; + + +// Add or subtract doublewords with carry. +//TODO: +// Rdd=add(Rss,Rtt,Px):carry +//TODO: +// Rdd=sub(Rss,Rtt,Px):carry + // Extract bitfield // Rdd=extract(Rss,#u6,#U6) // Rdd=extract(Rss,Rtt) // Rd=extract(Rs,Rtt) // Rd=extract(Rs,#u5,#U5) -let isCodeGenOnly = 0 in { def S4_extractp_rp : T_S3op_64 < "extract", 0b11, 0b100, 0>; def S4_extractp : T_S2op_extract <"extract", 0b1010, DoubleRegs, u6Imm>; -} -let hasNewValue = 1, isCodeGenOnly = 0 in { +let hasNewValue = 1 in { def S4_extract_rp : T_S3op_extract<"extract", 0b01>; def S4_extract : T_S2op_extract <"extract", 0b1101, IntRegs, u5Imm>; } -let Itinerary = M_tc_3x_SLOT23, Defs = [USR_OVF], isCodeGenOnly = 0 in { +// Complex add/sub halfwords/words +let Defs = [USR_OVF] in { + def S4_vxaddsubh : T_S3op_64 < "vxaddsubh", 0b01, 0b100, 0, 1>; + def S4_vxaddsubw : T_S3op_64 < "vxaddsubw", 0b01, 0b000, 0, 1>; + def S4_vxsubaddh : T_S3op_64 < "vxsubaddh", 0b01, 0b110, 0, 1>; + def S4_vxsubaddw : T_S3op_64 < "vxsubaddw", 0b01, 0b010, 0, 1>; +} + +let Defs = [USR_OVF] in { + def S4_vxaddsubhr : T_S3op_64 < "vxaddsubh", 0b11, 0b000, 0, 1, 1, 1>; + def S4_vxsubaddhr : T_S3op_64 < "vxsubaddh", 0b11, 0b010, 0, 1, 1, 1>; +} + +let Itinerary = M_tc_3x_SLOT23, Defs = [USR_OVF] in { def M4_mac_up_s1_sat: T_MType_acc_rr<"+= mpy", 0b011, 0b000, 0, [], 0, 1, 1>; def M4_nac_up_s1_sat: T_MType_acc_rr<"-= mpy", 0b011, 0b001, 0, [], 0, 1, 1>; } // Logical xor with xor accumulation. // Rxx^=xor(Rss,Rtt) -let hasSideEffects = 0, isCodeGenOnly = 0 in +let hasSideEffects = 0 in def M4_xor_xacc : SInst <(outs DoubleRegs:$Rxx), (ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt), @@ -1749,36 +2014,102 @@ def M4_xor_xacc let IClass = 0b1100; - let Inst{27-23} = 0b10101; + let Inst{27-22} = 0b101010; let Inst{20-16} = Rss; let Inst{12-8} = Rtt; + let Inst{7-5} = 0b000; let Inst{4-0} = Rxx; } - + +// Rotate and reduce bytes +// Rdd=vrcrotate(Rss,Rt,#u2) +let hasSideEffects = 0 in +def S4_vrcrotate + : SInst <(outs DoubleRegs:$Rdd), + (ins DoubleRegs:$Rss, IntRegs:$Rt, u2Imm:$u2), + "$Rdd = vrcrotate($Rss, $Rt, #$u2)", + [], "", S_3op_tc_3x_SLOT23> { + bits<5> Rdd; + bits<5> Rss; + bits<5> Rt; + bits<2> u2; + + let IClass = 0b1100; + + let Inst{27-22} = 0b001111; + let Inst{20-16} = Rss; + let Inst{13} = u2{1}; + let Inst{12-8} = Rt; + let Inst{7-6} = 0b11; + let Inst{5} = u2{0}; + let Inst{4-0} = Rdd; + } + +// Rotate and reduce bytes with accumulation +// Rxx+=vrcrotate(Rss,Rt,#u2) +let hasSideEffects = 0 in +def S4_vrcrotate_acc + : SInst <(outs DoubleRegs:$Rxx), + (ins DoubleRegs:$dst2, DoubleRegs:$Rss, IntRegs:$Rt, u2Imm:$u2), + "$Rxx += vrcrotate($Rss, $Rt, #$u2)", [], + "$dst2 = $Rxx", S_3op_tc_3x_SLOT23> { + bits<5> Rxx; + bits<5> Rss; + bits<5> Rt; + bits<2> u2; + + let IClass = 0b1100; + + let Inst{27-21} = 0b1011101; + let Inst{20-16} = Rss; + let Inst{13} = u2{1}; + let Inst{12-8} = Rt; + let Inst{5} = u2{0}; + let Inst{4-0} = Rxx; + } + +// Vector reduce conditional negate halfwords +let hasSideEffects = 0 in +def S2_vrcnegh + : SInst <(outs DoubleRegs:$Rxx), + (ins DoubleRegs:$dst2, DoubleRegs:$Rss, IntRegs:$Rt), + "$Rxx += vrcnegh($Rss, $Rt)", [], + "$dst2 = $Rxx", S_3op_tc_3x_SLOT23> { + bits<5> Rxx; + bits<5> Rss; + bits<5> Rt; + + let IClass = 0b1100; + + let Inst{27-21} = 0b1011001; + let Inst{20-16} = Rss; + let Inst{13} = 0b1; + let Inst{12-8} = Rt; + let Inst{7-5} = 0b111; + let Inst{4-0} = Rxx; + } + // Split bitfield -let isCodeGenOnly = 0 in def A4_bitspliti : T_S2op_2_di <"bitsplit", 0b110, 0b100>; // Arithmetic/Convergent round -let isCodeGenOnly = 0 in def A4_cround_ri : T_S2op_2_ii <"cround", 0b111, 0b000>; -let isCodeGenOnly = 0 in def A4_round_ri : T_S2op_2_ii <"round", 0b111, 0b100>; -let Defs = [USR_OVF], isCodeGenOnly = 0 in +let Defs = [USR_OVF] in def A4_round_ri_sat : T_S2op_2_ii <"round", 0b111, 0b110, 1>; // Logical-logical words. // Compound or-and -- Rx=or(Ru,and(Rx,#s10)) let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1, opExtentBits = 10, - opExtendable = 3, isCodeGenOnly = 0 in + opExtendable = 3 in def S4_or_andix: ALU64Inst<(outs IntRegs:$Rx), (ins IntRegs:$Ru, IntRegs:$_src_, s10Ext:$s10), "$Rx = or($Ru, and($_src_, #$s10))" , [(set (i32 IntRegs:$Rx), - (or (i32 IntRegs:$Ru), (and (i32 IntRegs:$_src_), s10ExtPred:$s10)))] , + (or (i32 IntRegs:$Ru), (and (i32 IntRegs:$_src_), s32ImmPred:$s10)))] , "$_src_ = $Rx", ALU64_tc_2_SLOT23> { bits<5> Rx; bits<5> Ru; @@ -1795,7 +2126,7 @@ def S4_or_andix: // Miscellaneous ALU64 instructions. // -let hasNewValue = 1, hasSideEffects = 0, isCodeGenOnly = 0 in +let hasNewValue = 1, hasSideEffects = 0 in def A4_modwrapu: ALU64Inst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt), "$Rd = modwrap($Rs, $Rt)", [], "", ALU64_tc_2_SLOT23> { bits<5> Rd; @@ -1810,7 +2141,7 @@ def A4_modwrapu: ALU64Inst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt), let Inst{4-0} = Rd; } -let hasSideEffects = 0, isCodeGenOnly = 0 in +let hasSideEffects = 0 in def A4_bitsplit: ALU64Inst<(outs DoubleRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt), "$Rd = bitsplit($Rs, $Rt)", [], "", ALU64_tc_1_SLOT23> { @@ -1826,7 +2157,54 @@ def A4_bitsplit: ALU64Inst<(outs DoubleRegs:$Rd), let Inst{4-0} = Rd; } -let isCodeGenOnly = 0 in { +let hasSideEffects = 0 in +def dep_S2_packhl: ALU64Inst<(outs DoubleRegs:$Rd), + (ins IntRegs:$Rs, IntRegs:$Rt), + "$Rd = packhl($Rs, $Rt):deprecated", [], "", ALU64_tc_1_SLOT23> { + bits<5> Rd; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1101; + let Inst{27-24} = 0b0100; + let Inst{21} = 0b0; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + let Inst{4-0} = Rd; +} + +let hasNewValue = 1, hasSideEffects = 0 in +def dep_A2_addsat: ALU64Inst<(outs IntRegs:$Rd), + (ins IntRegs:$Rs, IntRegs:$Rt), + "$Rd = add($Rs, $Rt):sat:deprecated", [], "", ALU64_tc_2_SLOT23> { + bits<5> Rd; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1101; + let Inst{27-21} = 0b0101100; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + let Inst{7} = 0b0; + let Inst{4-0} = Rd; +} + +let hasNewValue = 1, hasSideEffects = 0 in +def dep_A2_subsat: ALU64Inst<(outs IntRegs:$Rd), + (ins IntRegs:$Rs, IntRegs:$Rt), + "$Rd = sub($Rs, $Rt):sat:deprecated", [], "", ALU64_tc_2_SLOT23> { + bits<5> Rd; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1101; + let Inst{27-21} = 0b0101100; + let Inst{20-16} = Rt; + let Inst{12-8} = Rs; + let Inst{7} = 0b1; + let Inst{4-0} = Rd; +} + // Rx[&|]=xor(Rs,Rt) def M4_or_xor : T_MType_acc_rr < "|= xor", 0b110, 0b001, 0>; def M4_and_xor : T_MType_acc_rr < "&= xor", 0b010, 0b010, 0>; @@ -1849,7 +2227,24 @@ def M4_and_and : T_MType_acc_rr < "&= and", 0b010, 0b000, 0>; def M4_xor_andn : T_MType_acc_rr < "^= and", 0b001, 0b010, 0, [], 1>; def M4_or_andn : T_MType_acc_rr < "|= and", 0b001, 0b000, 0, [], 1>; def M4_and_andn : T_MType_acc_rr < "&= and", 0b001, 0b001, 0, [], 1>; -} + +def: T_MType_acc_pat2 <M4_or_xor, xor, or>; +def: T_MType_acc_pat2 <M4_and_xor, xor, and>; +def: T_MType_acc_pat2 <M4_or_and, and, or>; +def: T_MType_acc_pat2 <M4_and_and, and, and>; +def: T_MType_acc_pat2 <M4_xor_and, and, xor>; +def: T_MType_acc_pat2 <M4_or_or, or, or>; +def: T_MType_acc_pat2 <M4_and_or, or, and>; +def: T_MType_acc_pat2 <M4_xor_or, or, xor>; + +class T_MType_acc_pat3 <InstHexagon MI, SDNode firstOp, SDNode secOp> + : Pat <(i32 (secOp IntRegs:$src1, (firstOp IntRegs:$src2, + (not IntRegs:$src3)))), + (i32 (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3))>; + +def: T_MType_acc_pat3 <M4_or_andn, and, or>; +def: T_MType_acc_pat3 <M4_and_andn, and, and>; +def: T_MType_acc_pat3 <M4_xor_andn, and, xor>; // Compound or-or and or-and let isExtentSigned = 1, InputType = "imm", hasNewValue = 1, isExtendable = 1, @@ -1859,7 +2254,7 @@ class T_CompOR <string mnemonic, bits<2> MajOp, SDNode OpNode> (ins IntRegs:$src1, IntRegs:$Rs, s10Ext:$s10), "$Rx |= "#mnemonic#"($Rs, #$s10)", [(set (i32 IntRegs:$Rx), (or (i32 IntRegs:$src1), - (OpNode (i32 IntRegs:$Rs), s10ExtPred:$s10)))], + (OpNode (i32 IntRegs:$Rs), s32ImmPred:$s10)))], "$src1 = $Rx", ALU64_tc_2_SLOT23>, ImmRegRel { bits<5> Rx; bits<5> Rs; @@ -1875,10 +2270,10 @@ class T_CompOR <string mnemonic, bits<2> MajOp, SDNode OpNode> let Inst{4-0} = Rx; } -let CextOpcode = "ORr_ANDr", isCodeGenOnly = 0 in +let CextOpcode = "ORr_ANDr" in def S4_or_andi : T_CompOR <"and", 0b00, and>; -let CextOpcode = "ORr_ORr", isCodeGenOnly = 0 in +let CextOpcode = "ORr_ORr" in def S4_or_ori : T_CompOR <"or", 0b10, or>; // Modulo wrap @@ -1923,22 +2318,33 @@ def S4_or_ori : T_CompOR <"or", 0b10, or>; //===----------------------------------------------------------------------===// // Bit reverse -let isCodeGenOnly = 0 in def S2_brevp : T_S2op_3 <"brev", 0b11, 0b110>; // Bit count -let isCodeGenOnly = 0 in { def S2_ct0p : T_COUNT_LEADING_64<"ct0", 0b111, 0b010>; def S2_ct1p : T_COUNT_LEADING_64<"ct1", 0b111, 0b100>; def S4_clbpnorm : T_COUNT_LEADING_64<"normamt", 0b011, 0b000>; -} -def: Pat<(i32 (trunc (cttz (i64 DoubleRegs:$Rss)))), - (S2_ct0p (i64 DoubleRegs:$Rss))>; -def: Pat<(i32 (trunc (cttz (not (i64 DoubleRegs:$Rss))))), - (S2_ct1p (i64 DoubleRegs:$Rss))>; +// Count trailing zeros: 64-bit. +def: Pat<(i32 (trunc (cttz I64:$Rss))), (S2_ct0p I64:$Rss)>; +def: Pat<(i32 (trunc (cttz_zero_undef I64:$Rss))), (S2_ct0p I64:$Rss)>; + +// Count trailing ones: 64-bit. +def: Pat<(i32 (trunc (cttz (not I64:$Rss)))), (S2_ct1p I64:$Rss)>; +def: Pat<(i32 (trunc (cttz_zero_undef (not I64:$Rss)))), (S2_ct1p I64:$Rss)>; + +// Define leading/trailing patterns that require zero-extensions to 64 bits. +def: Pat<(i64 (ctlz I64:$Rss)), (Zext64 (S2_cl0p I64:$Rss))>; +def: Pat<(i64 (ctlz_zero_undef I64:$Rss)), (Zext64 (S2_cl0p I64:$Rss))>; +def: Pat<(i64 (cttz I64:$Rss)), (Zext64 (S2_ct0p I64:$Rss))>; +def: Pat<(i64 (cttz_zero_undef I64:$Rss)), (Zext64 (S2_ct0p I64:$Rss))>; +def: Pat<(i64 (ctlz (not I64:$Rss))), (Zext64 (S2_cl1p I64:$Rss))>; +def: Pat<(i64 (ctlz_zero_undef (not I64:$Rss))), (Zext64 (S2_cl1p I64:$Rss))>; +def: Pat<(i64 (cttz (not I64:$Rss))), (Zext64 (S2_ct1p I64:$Rss))>; +def: Pat<(i64 (cttz_zero_undef (not I64:$Rss))), (Zext64 (S2_ct1p I64:$Rss))>; -let hasSideEffects = 0, hasNewValue = 1, isCodeGenOnly = 0 in + +let hasSideEffects = 0, hasNewValue = 1 in def S4_clbaddi : SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, s6Imm:$s6), "$Rd = add(clb($Rs), #$s6)", [], "", S_2op_tc_2_SLOT23> { bits<5> Rs; @@ -1953,7 +2359,7 @@ def S4_clbaddi : SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, s6Imm:$s6), let Inst{4-0} = Rd; } -let hasSideEffects = 0, hasNewValue = 1, isCodeGenOnly = 0 in +let hasSideEffects = 0, hasNewValue = 1 in def S4_clbpaddi : SInst<(outs IntRegs:$Rd), (ins DoubleRegs:$Rs, s6Imm:$s6), "$Rd = add(clb($Rs), #$s6)", [], "", S_2op_tc_2_SLOT23> { bits<5> Rs; @@ -1970,10 +2376,8 @@ def S4_clbpaddi : SInst<(outs IntRegs:$Rd), (ins DoubleRegs:$Rs, s6Imm:$s6), // Bit test/set/clear -let isCodeGenOnly = 0 in { def S4_ntstbit_i : T_TEST_BIT_IMM<"!tstbit", 0b001>; def S4_ntstbit_r : T_TEST_BIT_REG<"!tstbit", 1>; -} let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm. def: Pat<(i1 (seteq (and (shl 1, u5ImmPred:$u5), (i32 IntRegs:$Rs)), 0)), @@ -1993,11 +2397,9 @@ let AddedComplexity = 100 in def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 Set5ImmPred:$u5)), (i32 0))), (S4_ntstbit_i (i32 IntRegs:$Rs), (BITPOS32 Set5ImmPred:$u5))>; -let isCodeGenOnly = 0 in { def C4_nbitsset : T_TEST_BITS_REG<"!bitsset", 0b01, 1>; def C4_nbitsclr : T_TEST_BITS_REG<"!bitsclr", 0b10, 1>; def C4_nbitsclri : T_TEST_BITS_IMM<"!bitsclr", 0b10, 1>; -} // Do not increase complexity of these patterns. In the DAG, "cmp i8" may be // represented as a compare against "value & 0xFF", which is an exact match @@ -2022,14 +2424,13 @@ def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)), // Rd=add(#u6,mpyi(Rs,#U6)) -- Multiply by immed and add immed. -let hasNewValue = 1, isExtendable = 1, opExtentBits = 6, opExtendable = 1, - isCodeGenOnly = 0 in +let hasNewValue = 1, isExtendable = 1, opExtentBits = 6, opExtendable = 1 in def M4_mpyri_addi : MInst<(outs IntRegs:$Rd), (ins u6Ext:$u6, IntRegs:$Rs, u6Imm:$U6), "$Rd = add(#$u6, mpyi($Rs, #$U6))" , [(set (i32 IntRegs:$Rd), (add (mul (i32 IntRegs:$Rs), u6ImmPred:$U6), - u6ExtPred:$u6))] ,"",ALU64_tc_3x_SLOT23> { + u32ImmPred:$u6))] ,"",ALU64_tc_3x_SLOT23> { bits<5> Rd; bits<6> u6; bits<5> Rs; @@ -2049,12 +2450,12 @@ def M4_mpyri_addi : MInst<(outs IntRegs:$Rd), // Rd=add(#u6,mpyi(Rs,Rt)) let CextOpcode = "ADD_MPY", InputType = "imm", hasNewValue = 1, - isExtendable = 1, opExtentBits = 6, opExtendable = 1, isCodeGenOnly = 0 in + isExtendable = 1, opExtentBits = 6, opExtendable = 1 in def M4_mpyrr_addi : MInst <(outs IntRegs:$Rd), (ins u6Ext:$u6, IntRegs:$Rs, IntRegs:$Rt), "$Rd = add(#$u6, mpyi($Rs, $Rt))" , [(set (i32 IntRegs:$Rd), - (add (mul (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), u6ExtPred:$u6))], + (add (mul (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), u32ImmPred:$u6))], "", ALU64_tc_3x_SLOT23>, ImmRegRel { bits<5> Rd; bits<6> u6; @@ -2099,18 +2500,16 @@ class T_AddMpy <bit MajOp, PatLeaf ImmPred, dag ins> let Inst{4-0} = src1; } -let isCodeGenOnly = 0 in def M4_mpyri_addr_u2 : T_AddMpy<0b0, u6_2ImmPred, (ins IntRegs:$src1, u6_2Imm:$src2, IntRegs:$src3)>; let isExtendable = 1, opExtentBits = 6, opExtendable = 3, - CextOpcode = "ADD_MPY", InputType = "imm", isCodeGenOnly = 0 in -def M4_mpyri_addr : T_AddMpy<0b1, u6ExtPred, + CextOpcode = "ADD_MPY", InputType = "imm" in +def M4_mpyri_addr : T_AddMpy<0b1, u32ImmPred, (ins IntRegs:$src1, IntRegs:$src3, u6Ext:$src2)>, ImmRegRel; // Rx=add(Ru,mpyi(Rx,Rs)) -let validSubTargets = HasV4SubT, CextOpcode = "ADD_MPY", InputType = "reg", - hasNewValue = 1, isCodeGenOnly = 0 in +let CextOpcode = "ADD_MPY", InputType = "reg", hasNewValue = 1 in def M4_mpyrr_addr: MInst_acc <(outs IntRegs:$Rx), (ins IntRegs:$Ru, IntRegs:$_src_, IntRegs:$Rs), "$Rx = add($Ru, mpyi($_src_, $Rs))", @@ -2129,51 +2528,101 @@ def M4_mpyrr_addr: MInst_acc <(outs IntRegs:$Rx), let Inst{20-16} = Rs; } -// Rd=add(##,mpyi(Rs,#U6)) -def : Pat <(add (mul (i32 IntRegs:$src2), u6ImmPred:$src3), - (HexagonCONST32 tglobaladdr:$src1)), - (i32 (M4_mpyri_addi tglobaladdr:$src1, IntRegs:$src2, - u6ImmPred:$src3))>; -// Rd=add(##,mpyi(Rs,Rt)) -def : Pat <(add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)), - (HexagonCONST32 tglobaladdr:$src1)), - (i32 (M4_mpyrr_addi tglobaladdr:$src1, IntRegs:$src2, - IntRegs:$src3))>; +// Vector reduce multiply word by signed half (32x16) +//Rdd=vrmpyweh(Rss,Rtt)[:<<1] +def M4_vrmpyeh_s0 : T_M2_vmpy<"vrmpyweh", 0b010, 0b100, 0, 0, 0>; +def M4_vrmpyeh_s1 : T_M2_vmpy<"vrmpyweh", 0b110, 0b100, 1, 0, 0>; -// Polynomial multiply words -// Rdd=pmpyw(Rs,Rt) -// Rxx^=pmpyw(Rs,Rt) +//Rdd=vrmpywoh(Rss,Rtt)[:<<1] +def M4_vrmpyoh_s0 : T_M2_vmpy<"vrmpywoh", 0b001, 0b010, 0, 0, 0>; +def M4_vrmpyoh_s1 : T_M2_vmpy<"vrmpywoh", 0b101, 0b010, 1, 0, 0>; -// Vector reduce multiply word by signed half (32x16) -// Rdd=vrmpyweh(Rss,Rtt)[:<<1] -// Rdd=vrmpywoh(Rss,Rtt)[:<<1] -// Rxx+=vrmpyweh(Rss,Rtt)[:<<1] -// Rxx+=vrmpywoh(Rss,Rtt)[:<<1] - -// Multiply and use upper result -// Rd=mpy(Rs,Rt.H):<<1:sat -// Rd=mpy(Rs,Rt.L):<<1:sat -// Rd=mpy(Rs,Rt):<<1 -// Rd=mpy(Rs,Rt):<<1:sat -// Rd=mpysu(Rs,Rt) -// Rx+=mpy(Rs,Rt):<<1:sat -// Rx-=mpy(Rs,Rt):<<1:sat - -// Vector multiply bytes -// Rdd=vmpybsu(Rs,Rt) -// Rdd=vmpybu(Rs,Rt) -// Rxx+=vmpybsu(Rs,Rt) -// Rxx+=vmpybu(Rs,Rt) +//Rdd+=vrmpyweh(Rss,Rtt)[:<<1] +def M4_vrmpyeh_acc_s0: T_M2_vmpy_acc<"vrmpyweh", 0b001, 0b110, 0, 0>; +def M4_vrmpyeh_acc_s1: T_M2_vmpy_acc<"vrmpyweh", 0b101, 0b110, 1, 0>; + +//Rdd=vrmpywoh(Rss,Rtt)[:<<1] +def M4_vrmpyoh_acc_s0: T_M2_vmpy_acc<"vrmpywoh", 0b011, 0b110, 0, 0>; +def M4_vrmpyoh_acc_s1: T_M2_vmpy_acc<"vrmpywoh", 0b111, 0b110, 1, 0>; + +// Vector multiply halfwords, signed by unsigned +// Rdd=vmpyhsu(Rs,Rt)[:<<]:sat +def M2_vmpy2su_s0 : T_XTYPE_mpy64 < "vmpyhsu", 0b000, 0b111, 1, 0, 0>; +def M2_vmpy2su_s1 : T_XTYPE_mpy64 < "vmpyhsu", 0b100, 0b111, 1, 1, 0>; + +// Rxx+=vmpyhsu(Rs,Rt)[:<<1]:sat +def M2_vmac2su_s0 : T_XTYPE_mpy64_acc < "vmpyhsu", "+", 0b011, 0b101, 1, 0, 0>; +def M2_vmac2su_s1 : T_XTYPE_mpy64_acc < "vmpyhsu", "+", 0b111, 0b101, 1, 1, 0>; // Vector polynomial multiply halfwords // Rdd=vpmpyh(Rs,Rt) +def M4_vpmpyh : T_XTYPE_mpy64 < "vpmpyh", 0b110, 0b111, 0, 0, 0>; + // Rxx^=vpmpyh(Rs,Rt) +def M4_vpmpyh_acc : T_XTYPE_mpy64_acc < "vpmpyh", "^", 0b101, 0b111, 0, 0, 0>; + +// Polynomial multiply words +// Rdd=pmpyw(Rs,Rt) +def M4_pmpyw : T_XTYPE_mpy64 < "pmpyw", 0b010, 0b111, 0, 0, 0>; + +// Rxx^=pmpyw(Rs,Rt) +def M4_pmpyw_acc : T_XTYPE_mpy64_acc < "pmpyw", "^", 0b001, 0b111, 0, 0, 0>; //===----------------------------------------------------------------------===// // XTYPE/MPY - //===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// ALU64/Vector compare +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// Template class for vector compare +//===----------------------------------------------------------------------===// + +let hasSideEffects = 0 in +class T_vcmpImm <string Str, bits<2> cmpOp, bits<2> minOp, Operand ImmOprnd> + : ALU64_rr <(outs PredRegs:$Pd), + (ins DoubleRegs:$Rss, ImmOprnd:$Imm), + "$Pd = "#Str#"($Rss, #$Imm)", + [], "", ALU64_tc_2early_SLOT23> { + bits<2> Pd; + bits<5> Rss; + bits<32> Imm; + bits<8> ImmBits; + let ImmBits{6-0} = Imm{6-0}; + let ImmBits{7} = !if (!eq(cmpOp,0b10), 0b0, Imm{7}); // 0 for vcmp[bhw].gtu + + let IClass = 0b1101; + + let Inst{27-24} = 0b1100; + let Inst{22-21} = cmpOp; + let Inst{20-16} = Rss; + let Inst{12-5} = ImmBits; + let Inst{4-3} = minOp; + let Inst{1-0} = Pd; + } + +// Vector compare bytes +def A4_vcmpbgt : T_vcmp <"vcmpb.gt", 0b1010>; +def: T_vcmp_pat<A4_vcmpbgt, setgt, v8i8>; + +let AsmString = "$Pd = any8(vcmpb.eq($Rss, $Rtt))" in +def A4_vcmpbeq_any : T_vcmp <"any8(vcmpb.gt", 0b1000>; + +def A4_vcmpbeqi : T_vcmpImm <"vcmpb.eq", 0b00, 0b00, u8Imm>; +def A4_vcmpbgti : T_vcmpImm <"vcmpb.gt", 0b01, 0b00, s8Imm>; +def A4_vcmpbgtui : T_vcmpImm <"vcmpb.gtu", 0b10, 0b00, u7Imm>; + +// Vector compare halfwords +def A4_vcmpheqi : T_vcmpImm <"vcmph.eq", 0b00, 0b01, s8Imm>; +def A4_vcmphgti : T_vcmpImm <"vcmph.gt", 0b01, 0b01, s8Imm>; +def A4_vcmphgtui : T_vcmpImm <"vcmph.gtu", 0b10, 0b01, u7Imm>; + +// Vector compare words +def A4_vcmpweqi : T_vcmpImm <"vcmpw.eq", 0b00, 0b10, s8Imm>; +def A4_vcmpwgti : T_vcmpImm <"vcmpw.gt", 0b01, 0b10, s8Imm>; +def A4_vcmpwgtui : T_vcmpImm <"vcmpw.gtu", 0b10, 0b10, u7Imm>; //===----------------------------------------------------------------------===// // XTYPE/SHIFT + @@ -2184,13 +2633,13 @@ def : Pat <(add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)), // Rx=and(#u8,asl(Rx,#U5)) Rx=and(#u8,lsr(Rx,#U5)) // Rx=or(#u8,asl(Rx,#U5)) Rx=or(#u8,lsr(Rx,#U5)) let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8, - hasNewValue = 1, opNewValue = 0, validSubTargets = HasV4SubT in + hasNewValue = 1, opNewValue = 0 in class T_S4_ShiftOperate<string MnOp, string MnSh, SDNode Op, SDNode Sh, bit asl_lsr, bits<2> MajOp, InstrItinClass Itin> : MInst_acc<(outs IntRegs:$Rd), (ins u8Ext:$u8, IntRegs:$Rx, u5Imm:$U5), "$Rd = "#MnOp#"(#$u8, "#MnSh#"($Rx, #$U5))", [(set (i32 IntRegs:$Rd), - (Op (Sh I32:$Rx, u5ImmPred:$U5), u8ExtPred:$u8))], + (Op (Sh I32:$Rx, u5ImmPred:$U5), u32ImmPred:$u8))], "$Rd = $Rx", Itin> { bits<5> Rd; @@ -2216,29 +2665,48 @@ multiclass T_ShiftOperate<string mnemonic, SDNode Op, bits<2> MajOp, def _lsr_ri : T_S4_ShiftOperate<mnemonic, "lsr", Op, srl, 1, MajOp, Itin>; } -let AddedComplexity = 200, isCodeGenOnly = 0 in { +let AddedComplexity = 200 in { defm S4_addi : T_ShiftOperate<"add", add, 0b10, ALU64_tc_2_SLOT23>; defm S4_andi : T_ShiftOperate<"and", and, 0b00, ALU64_tc_2_SLOT23>; } -let AddedComplexity = 30, isCodeGenOnly = 0 in +let AddedComplexity = 30 in defm S4_ori : T_ShiftOperate<"or", or, 0b01, ALU64_tc_1_SLOT23>; -let isCodeGenOnly = 0 in defm S4_subi : T_ShiftOperate<"sub", sub, 0b11, ALU64_tc_1_SLOT23>; +let AddedComplexity = 200 in { + def: Pat<(add addrga:$addr, (shl I32:$src2, u5ImmPred:$src3)), + (S4_addi_asl_ri addrga:$addr, IntRegs:$src2, u5ImmPred:$src3)>; + def: Pat<(add addrga:$addr, (srl I32:$src2, u5ImmPred:$src3)), + (S4_addi_lsr_ri addrga:$addr, IntRegs:$src2, u5ImmPred:$src3)>; + def: Pat<(sub addrga:$addr, (shl I32:$src2, u5ImmPred:$src3)), + (S4_subi_asl_ri addrga:$addr, IntRegs:$src2, u5ImmPred:$src3)>; + def: Pat<(sub addrga:$addr, (srl I32:$src2, u5ImmPred:$src3)), + (S4_subi_lsr_ri addrga:$addr, IntRegs:$src2, u5ImmPred:$src3)>; +} + +// Vector conditional negate +// Rdd=vcnegh(Rss,Rt) +let Defs = [USR_OVF], Itinerary = S_3op_tc_2_SLOT23 in +def S2_vcnegh : T_S3op_shiftVect < "vcnegh", 0b11, 0b01>; // Rd=[cround|round](Rs,Rt) -let hasNewValue = 1, Itinerary = S_3op_tc_2_SLOT23, isCodeGenOnly = 0 in { +let hasNewValue = 1, Itinerary = S_3op_tc_2_SLOT23 in { def A4_cround_rr : T_S3op_3 < "cround", IntRegs, 0b11, 0b00>; def A4_round_rr : T_S3op_3 < "round", IntRegs, 0b11, 0b10>; } // Rd=round(Rs,Rt):sat -let hasNewValue = 1, Defs = [USR_OVF], Itinerary = S_3op_tc_2_SLOT23, - isCodeGenOnly = 0 in +let hasNewValue = 1, Defs = [USR_OVF], Itinerary = S_3op_tc_2_SLOT23 in def A4_round_rr_sat : T_S3op_3 < "round", IntRegs, 0b11, 0b11, 1>; +// Rd=[cmpyiwh|cmpyrwh](Rss,Rt):<<1:rnd:sat +let Defs = [USR_OVF], Itinerary = S_3op_tc_3x_SLOT23 in { + def M4_cmpyi_wh : T_S3op_8<"cmpyiwh", 0b100, 1, 1, 1>; + def M4_cmpyr_wh : T_S3op_8<"cmpyrwh", 0b110, 1, 1, 1>; +} + // Rdd=[add|sub](Rss,Rtt,Px):carry let isPredicateLate = 1, hasSideEffects = 0 in class T_S3op_carry <string mnemonic, bits<3> MajOp> @@ -2261,13 +2729,51 @@ class T_S3op_carry <string mnemonic, bits<3> MajOp> let Inst{4-0} = Rdd; } -let isCodeGenOnly = 0 in { def A4_addp_c : T_S3op_carry < "add", 0b110 >; def A4_subp_c : T_S3op_carry < "sub", 0b111 >; -} + +let Itinerary = S_3op_tc_3_SLOT23, hasSideEffects = 0 in +class T_S3op_6 <string mnemonic, bits<3> MinOp, bit isUnsigned> + : SInst <(outs DoubleRegs:$Rxx), + (ins DoubleRegs:$dst2, DoubleRegs:$Rss, IntRegs:$Ru), + "$Rxx = "#mnemonic#"($Rss, $Ru)" , + [] , "$dst2 = $Rxx"> { + bits<5> Rxx; + bits<5> Rss; + bits<5> Ru; + + let IClass = 0b1100; + + let Inst{27-21} = 0b1011001; + let Inst{20-16} = Rss; + let Inst{13} = isUnsigned; + let Inst{12-8} = Rxx; + let Inst{7-5} = MinOp; + let Inst{4-0} = Ru; + } + +// Vector reduce maximum halfwords +// Rxx=vrmax[u]h(Rss,Ru) +def A4_vrmaxh : T_S3op_6 < "vrmaxh", 0b001, 0>; +def A4_vrmaxuh : T_S3op_6 < "vrmaxuh", 0b001, 1>; + +// Vector reduce maximum words +// Rxx=vrmax[u]w(Rss,Ru) +def A4_vrmaxw : T_S3op_6 < "vrmaxw", 0b010, 0>; +def A4_vrmaxuw : T_S3op_6 < "vrmaxuw", 0b010, 1>; + +// Vector reduce minimum halfwords +// Rxx=vrmin[u]h(Rss,Ru) +def A4_vrminh : T_S3op_6 < "vrminh", 0b101, 0>; +def A4_vrminuh : T_S3op_6 < "vrminuh", 0b101, 1>; + +// Vector reduce minimum words +// Rxx=vrmin[u]w(Rss,Ru) +def A4_vrminw : T_S3op_6 < "vrminw", 0b110, 0>; +def A4_vrminuw : T_S3op_6 < "vrminuw", 0b110, 1>; // Shift an immediate left by register amount. -let hasNewValue = 1, hasSideEffects = 0, isCodeGenOnly = 0 in +let hasNewValue = 1, hasSideEffects = 0 in def S4_lsli: SInst <(outs IntRegs:$Rd), (ins s6Imm:$s6, IntRegs:$Rt), "$Rd = lsl(#$s6, $Rt)" , [(set (i32 IntRegs:$Rd), (shl s6ImmPred:$s6, @@ -2299,7 +2805,7 @@ def MEMOPIMM : SDNodeXForm<imm, [{ // Call the transformation function XformM5ToU5Imm to get the negative // immediate's positive counterpart. int32_t imm = N->getSExtValue(); - return XformM5ToU5Imm(imm); + return XformM5ToU5Imm(imm, SDLoc(N)); }]>; def MEMOPIMM_HALF : SDNodeXForm<imm, [{ @@ -2308,7 +2814,7 @@ def MEMOPIMM_HALF : SDNodeXForm<imm, [{ // Call the transformation function XformM5ToU5Imm to get the negative // immediate's positive counterpart. int16_t imm = N->getSExtValue(); - return XformM5ToU5Imm(imm); + return XformM5ToU5Imm(imm, SDLoc(N)); }]>; def MEMOPIMM_BYTE : SDNodeXForm<imm, [{ @@ -2317,14 +2823,14 @@ def MEMOPIMM_BYTE : SDNodeXForm<imm, [{ // Call the transformation function XformM5ToU5Imm to get the negative // immediate's positive counterpart. int8_t imm = N->getSExtValue(); - return XformM5ToU5Imm(imm); + return XformM5ToU5Imm(imm, SDLoc(N)); }]>; def SETMEMIMM : SDNodeXForm<imm, [{ // Return the bit position we will set [0-31]. // As an SDNode. int32_t imm = N->getSExtValue(); - return XformMskToBitPosU5Imm(imm); + return XformMskToBitPosU5Imm(imm, SDLoc(N)); }]>; def CLRMEMIMM : SDNodeXForm<imm, [{ @@ -2332,14 +2838,14 @@ def CLRMEMIMM : SDNodeXForm<imm, [{ // As an SDNode. // we bit negate the value first int32_t imm = ~(N->getSExtValue()); - return XformMskToBitPosU5Imm(imm); + return XformMskToBitPosU5Imm(imm, SDLoc(N)); }]>; def SETMEMIMM_SHORT : SDNodeXForm<imm, [{ // Return the bit position we will set [0-15]. // As an SDNode. int16_t imm = N->getSExtValue(); - return XformMskToBitPosU4Imm(imm); + return XformMskToBitPosU4Imm(imm, SDLoc(N)); }]>; def CLRMEMIMM_SHORT : SDNodeXForm<imm, [{ @@ -2347,14 +2853,14 @@ def CLRMEMIMM_SHORT : SDNodeXForm<imm, [{ // As an SDNode. // we bit negate the value first int16_t imm = ~(N->getSExtValue()); - return XformMskToBitPosU4Imm(imm); + return XformMskToBitPosU4Imm(imm, SDLoc(N)); }]>; def SETMEMIMM_BYTE : SDNodeXForm<imm, [{ // Return the bit position we will set [0-7]. // As an SDNode. int8_t imm = N->getSExtValue(); - return XformMskToBitPosU3Imm(imm); + return XformMskToBitPosU3Imm(imm, SDLoc(N)); }]>; def CLRMEMIMM_BYTE : SDNodeXForm<imm, [{ @@ -2362,7 +2868,7 @@ def CLRMEMIMM_BYTE : SDNodeXForm<imm, [{ // As an SDNode. // we bit negate the value first int8_t imm = ~(N->getSExtValue()); - return XformMskToBitPosU3Imm(imm); + return XformMskToBitPosU3Imm(imm, SDLoc(N)); }]>; //===----------------------------------------------------------------------===// @@ -2450,15 +2956,14 @@ multiclass MemOp_base <string opc, bits<2> opcBits, Operand ImmOp> { } // Define MemOp instructions. -let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, - validSubTargets =HasV4SubT in { - let opExtentBits = 6, accessSize = ByteAccess, isCodeGenOnly = 0 in +let isExtendable = 1, opExtendable = 1, isExtentSigned = 0 in { + let opExtentBits = 6, accessSize = ByteAccess in defm memopb_io : MemOp_base <"memb", 0b00, u6_0Ext>; - let opExtentBits = 7, accessSize = HalfWordAccess, isCodeGenOnly = 0 in + let opExtentBits = 7, accessSize = HalfWordAccess in defm memoph_io : MemOp_base <"memh", 0b01, u6_1Ext>; - let opExtentBits = 8, accessSize = WordAccess, isCodeGenOnly = 0 in + let opExtentBits = 8, accessSize = WordAccess in defm memopw_io : MemOp_base <"memw", 0b10, u6_2Ext>; } @@ -2469,43 +2974,43 @@ let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, // mem[bh](Rs+#u6) += #U5 //===----------------------------------------------------------------------===// -multiclass MemOpi_u5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf ExtPred, +multiclass MemOpi_u5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf ImmPred, InstHexagon MI, SDNode OpNode> { let AddedComplexity = 180 in - def : Pat < (stOp (OpNode (ldOp IntRegs:$addr), u5ImmPred:$addend), - IntRegs:$addr), - (MI IntRegs:$addr, #0, u5ImmPred:$addend )>; + def: Pat<(stOp (OpNode (ldOp IntRegs:$addr), u5ImmPred:$addend), + IntRegs:$addr), + (MI IntRegs:$addr, 0, u5ImmPred:$addend)>; let AddedComplexity = 190 in - def : Pat <(stOp (OpNode (ldOp (add IntRegs:$base, ExtPred:$offset)), - u5ImmPred:$addend), - (add IntRegs:$base, ExtPred:$offset)), - (MI IntRegs:$base, ExtPred:$offset, u5ImmPred:$addend)>; + def: Pat<(stOp (OpNode (ldOp (add IntRegs:$base, ImmPred:$offset)), + u5ImmPred:$addend), + (add IntRegs:$base, ImmPred:$offset)), + (MI IntRegs:$base, ImmPred:$offset, u5ImmPred:$addend)>; } -multiclass MemOpi_u5ALUOp<PatFrag ldOp, PatFrag stOp, PatLeaf ExtPred, +multiclass MemOpi_u5ALUOp<PatFrag ldOp, PatFrag stOp, PatLeaf ImmPred, InstHexagon addMI, InstHexagon subMI> { - defm : MemOpi_u5Pats<ldOp, stOp, ExtPred, addMI, add>; - defm : MemOpi_u5Pats<ldOp, stOp, ExtPred, subMI, sub>; + defm: MemOpi_u5Pats<ldOp, stOp, ImmPred, addMI, add>; + defm: MemOpi_u5Pats<ldOp, stOp, ImmPred, subMI, sub>; } multiclass MemOpi_u5ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > { // Half Word - defm : MemOpi_u5ALUOp <ldOpHalf, truncstorei16, u6_1ExtPred, - L4_iadd_memoph_io, L4_isub_memoph_io>; + defm: MemOpi_u5ALUOp <ldOpHalf, truncstorei16, u31_1ImmPred, + L4_iadd_memoph_io, L4_isub_memoph_io>; // Byte - defm : MemOpi_u5ALUOp <ldOpByte, truncstorei8, u6ExtPred, - L4_iadd_memopb_io, L4_isub_memopb_io>; + defm: MemOpi_u5ALUOp <ldOpByte, truncstorei8, u32ImmPred, + L4_iadd_memopb_io, L4_isub_memopb_io>; } -let Predicates = [HasV4T, UseMEMOP] in { - defm : MemOpi_u5ExtType<zextloadi8, zextloadi16>; // zero extend - defm : MemOpi_u5ExtType<sextloadi8, sextloadi16>; // sign extend - defm : MemOpi_u5ExtType<extloadi8, extloadi16>; // any extend +let Predicates = [UseMEMOP] in { + defm: MemOpi_u5ExtType<zextloadi8, zextloadi16>; // zero extend + defm: MemOpi_u5ExtType<sextloadi8, sextloadi16>; // sign extend + defm: MemOpi_u5ExtType<extloadi8, extloadi16>; // any extend // Word - defm : MemOpi_u5ALUOp <load, store, u6_2ExtPred, L4_iadd_memopw_io, - L4_isub_memopw_io>; + defm: MemOpi_u5ALUOp <load, store, u30_2ImmPred, L4_iadd_memopw_io, + L4_isub_memopw_io>; } //===----------------------------------------------------------------------===// @@ -2515,38 +3020,37 @@ let Predicates = [HasV4T, UseMEMOP] in { // mem[bh](Rs+#u6) += #m5 //===----------------------------------------------------------------------===// -multiclass MemOpi_m5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf extPred, - PatLeaf immPred, ComplexPattern addrPred, - SDNodeXForm xformFunc, InstHexagon MI> { +multiclass MemOpi_m5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf ImmPred, + PatLeaf immPred, SDNodeXForm xformFunc, + InstHexagon MI> { let AddedComplexity = 190 in - def : Pat <(stOp (add (ldOp IntRegs:$addr), immPred:$subend), - IntRegs:$addr), - (MI IntRegs:$addr, #0, (xformFunc immPred:$subend) )>; + def: Pat<(stOp (add (ldOp IntRegs:$addr), immPred:$subend), IntRegs:$addr), + (MI IntRegs:$addr, 0, (xformFunc immPred:$subend))>; let AddedComplexity = 195 in - def : Pat<(stOp (add (ldOp (add IntRegs:$base, extPred:$offset)), - immPred:$subend), - (add IntRegs:$base, extPred:$offset)), - (MI IntRegs:$base, extPred:$offset, (xformFunc immPred:$subend))>; + def: Pat<(stOp (add (ldOp (add IntRegs:$base, ImmPred:$offset)), + immPred:$subend), + (add IntRegs:$base, ImmPred:$offset)), + (MI IntRegs:$base, ImmPred:$offset, (xformFunc immPred:$subend))>; } multiclass MemOpi_m5ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > { // Half Word - defm : MemOpi_m5Pats <ldOpHalf, truncstorei16, u6_1ExtPred, m5HImmPred, - ADDRriU6_1, MEMOPIMM_HALF, L4_isub_memoph_io>; + defm: MemOpi_m5Pats <ldOpHalf, truncstorei16, u31_1ImmPred, m5HImmPred, + MEMOPIMM_HALF, L4_isub_memoph_io>; // Byte - defm : MemOpi_m5Pats <ldOpByte, truncstorei8, u6ExtPred, m5BImmPred, - ADDRriU6_0, MEMOPIMM_BYTE, L4_isub_memopb_io>; + defm: MemOpi_m5Pats <ldOpByte, truncstorei8, u32ImmPred, m5BImmPred, + MEMOPIMM_BYTE, L4_isub_memopb_io>; } -let Predicates = [HasV4T, UseMEMOP] in { - defm : MemOpi_m5ExtType<zextloadi8, zextloadi16>; // zero extend - defm : MemOpi_m5ExtType<sextloadi8, sextloadi16>; // sign extend - defm : MemOpi_m5ExtType<extloadi8, extloadi16>; // any extend +let Predicates = [UseMEMOP] in { + defm: MemOpi_m5ExtType<zextloadi8, zextloadi16>; // zero extend + defm: MemOpi_m5ExtType<sextloadi8, sextloadi16>; // sign extend + defm: MemOpi_m5ExtType<extloadi8, extloadi16>; // any extend // Word - defm : MemOpi_m5Pats <load, store, u6_2ExtPred, m5ImmPred, - ADDRriU6_2, MEMOPIMM, L4_isub_memopw_io>; + defm: MemOpi_m5Pats <load, store, u30_2ImmPred, m5ImmPred, + MEMOPIMM, L4_isub_memopw_io>; } //===----------------------------------------------------------------------===// @@ -2556,52 +3060,50 @@ let Predicates = [HasV4T, UseMEMOP] in { //===----------------------------------------------------------------------===// multiclass MemOpi_bitPats <PatFrag ldOp, PatFrag stOp, PatLeaf immPred, - PatLeaf extPred, ComplexPattern addrPred, - SDNodeXForm xformFunc, InstHexagon MI, SDNode OpNode> { + PatLeaf extPred, SDNodeXForm xformFunc, InstHexagon MI, + SDNode OpNode> { // mem[bhw](Rs+#u6:[012]) = [clrbit|setbit](#U5) let AddedComplexity = 250 in - def : Pat<(stOp (OpNode (ldOp (add IntRegs:$base, extPred:$offset)), - immPred:$bitend), - (add IntRegs:$base, extPred:$offset)), - (MI IntRegs:$base, extPred:$offset, (xformFunc immPred:$bitend))>; + def: Pat<(stOp (OpNode (ldOp (add IntRegs:$base, extPred:$offset)), + immPred:$bitend), + (add IntRegs:$base, extPred:$offset)), + (MI IntRegs:$base, extPred:$offset, (xformFunc immPred:$bitend))>; // mem[bhw](Rs+#0) = [clrbit|setbit](#U5) let AddedComplexity = 225 in - def : Pat <(stOp (OpNode (ldOp (addrPred IntRegs:$addr, extPred:$offset)), - immPred:$bitend), - (addrPred (i32 IntRegs:$addr), extPred:$offset)), - (MI IntRegs:$addr, extPred:$offset, (xformFunc immPred:$bitend))>; + def: Pat<(stOp (OpNode (ldOp IntRegs:$addr), immPred:$bitend), IntRegs:$addr), + (MI IntRegs:$addr, 0, (xformFunc immPred:$bitend))>; } -multiclass MemOpi_bitExtType<PatFrag ldOpByte, PatFrag ldOpHalf > { +multiclass MemOpi_bitExtType<PatFrag ldOpByte, PatFrag ldOpHalf> { // Byte - clrbit - defm : MemOpi_bitPats<ldOpByte, truncstorei8, Clr3ImmPred, u6ExtPred, - ADDRriU6_0, CLRMEMIMM_BYTE, L4_iand_memopb_io, and>; + defm: MemOpi_bitPats<ldOpByte, truncstorei8, Clr3ImmPred, u32ImmPred, + CLRMEMIMM_BYTE, L4_iand_memopb_io, and>; // Byte - setbit - defm : MemOpi_bitPats<ldOpByte, truncstorei8, Set3ImmPred, u6ExtPred, - ADDRriU6_0, SETMEMIMM_BYTE, L4_ior_memopb_io, or>; + defm: MemOpi_bitPats<ldOpByte, truncstorei8, Set3ImmPred, u32ImmPred, + SETMEMIMM_BYTE, L4_ior_memopb_io, or>; // Half Word - clrbit - defm : MemOpi_bitPats<ldOpHalf, truncstorei16, Clr4ImmPred, u6_1ExtPred, - ADDRriU6_1, CLRMEMIMM_SHORT, L4_iand_memoph_io, and>; + defm: MemOpi_bitPats<ldOpHalf, truncstorei16, Clr4ImmPred, u31_1ImmPred, + CLRMEMIMM_SHORT, L4_iand_memoph_io, and>; // Half Word - setbit - defm : MemOpi_bitPats<ldOpHalf, truncstorei16, Set4ImmPred, u6_1ExtPred, - ADDRriU6_1, SETMEMIMM_SHORT, L4_ior_memoph_io, or>; + defm: MemOpi_bitPats<ldOpHalf, truncstorei16, Set4ImmPred, u31_1ImmPred, + SETMEMIMM_SHORT, L4_ior_memoph_io, or>; } -let Predicates = [HasV4T, UseMEMOP] in { +let Predicates = [UseMEMOP] in { // mem[bh](Rs+#0) = [clrbit|setbit](#U5) // mem[bh](Rs+#u6:[01]) = [clrbit|setbit](#U5) - defm : MemOpi_bitExtType<zextloadi8, zextloadi16>; // zero extend - defm : MemOpi_bitExtType<sextloadi8, sextloadi16>; // sign extend - defm : MemOpi_bitExtType<extloadi8, extloadi16>; // any extend + defm: MemOpi_bitExtType<zextloadi8, zextloadi16>; // zero extend + defm: MemOpi_bitExtType<sextloadi8, sextloadi16>; // sign extend + defm: MemOpi_bitExtType<extloadi8, extloadi16>; // any extend // memw(Rs+#0) = [clrbit|setbit](#U5) // memw(Rs+#u6:2) = [clrbit|setbit](#U5) - defm : MemOpi_bitPats<load, store, Clr5ImmPred, u6_2ExtPred, ADDRriU6_2, - CLRMEMIMM, L4_iand_memopw_io, and>; - defm : MemOpi_bitPats<load, store, Set5ImmPred, u6_2ExtPred, ADDRriU6_2, - SETMEMIMM, L4_ior_memopw_io, or>; + defm: MemOpi_bitPats<load, store, Clr5ImmPred, u30_2ImmPred, CLRMEMIMM, + L4_iand_memopw_io, and>; + defm: MemOpi_bitPats<load, store, Set5ImmPred, u30_2ImmPred, SETMEMIMM, + L4_ior_memopw_io, or>; } //===----------------------------------------------------------------------===// @@ -2611,54 +3113,51 @@ let Predicates = [HasV4T, UseMEMOP] in { // mem[bhw](Rs+#U6:[012]) [+-&|]= Rt //===----------------------------------------------------------------------===// -multiclass MemOpr_Pats <PatFrag ldOp, PatFrag stOp, ComplexPattern addrPred, - PatLeaf extPred, InstHexagon MI, SDNode OpNode> { +multiclass MemOpr_Pats <PatFrag ldOp, PatFrag stOp, PatLeaf extPred, + InstHexagon MI, SDNode OpNode> { let AddedComplexity = 141 in // mem[bhw](Rs+#0) [+-&|]= Rt - def : Pat <(stOp (OpNode (ldOp (addrPred IntRegs:$addr, extPred:$offset)), - (i32 IntRegs:$addend)), - (addrPred (i32 IntRegs:$addr), extPred:$offset)), - (MI IntRegs:$addr, extPred:$offset, (i32 IntRegs:$addend) )>; + def: Pat<(stOp (OpNode (ldOp IntRegs:$addr), (i32 IntRegs:$addend)), + IntRegs:$addr), + (MI IntRegs:$addr, 0, (i32 IntRegs:$addend))>; // mem[bhw](Rs+#U6:[012]) [+-&|]= Rt let AddedComplexity = 150 in - def : Pat <(stOp (OpNode (ldOp (add IntRegs:$base, extPred:$offset)), - (i32 IntRegs:$orend)), - (add IntRegs:$base, extPred:$offset)), - (MI IntRegs:$base, extPred:$offset, (i32 IntRegs:$orend) )>; + def: Pat<(stOp (OpNode (ldOp (add IntRegs:$base, extPred:$offset)), + (i32 IntRegs:$orend)), + (add IntRegs:$base, extPred:$offset)), + (MI IntRegs:$base, extPred:$offset, (i32 IntRegs:$orend))>; } -multiclass MemOPr_ALUOp<PatFrag ldOp, PatFrag stOp, - ComplexPattern addrPred, PatLeaf extPred, +multiclass MemOPr_ALUOp<PatFrag ldOp, PatFrag stOp, PatLeaf extPred, InstHexagon addMI, InstHexagon subMI, - InstHexagon andMI, InstHexagon orMI > { - - defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, addMI, add>; - defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, subMI, sub>; - defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, andMI, and>; - defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, orMI, or>; + InstHexagon andMI, InstHexagon orMI> { + defm: MemOpr_Pats <ldOp, stOp, extPred, addMI, add>; + defm: MemOpr_Pats <ldOp, stOp, extPred, subMI, sub>; + defm: MemOpr_Pats <ldOp, stOp, extPred, andMI, and>; + defm: MemOpr_Pats <ldOp, stOp, extPred, orMI, or>; } multiclass MemOPr_ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > { // Half Word - defm : MemOPr_ALUOp <ldOpHalf, truncstorei16, ADDRriU6_1, u6_1ExtPred, - L4_add_memoph_io, L4_sub_memoph_io, - L4_and_memoph_io, L4_or_memoph_io>; + defm: MemOPr_ALUOp <ldOpHalf, truncstorei16, u31_1ImmPred, + L4_add_memoph_io, L4_sub_memoph_io, + L4_and_memoph_io, L4_or_memoph_io>; // Byte - defm : MemOPr_ALUOp <ldOpByte, truncstorei8, ADDRriU6_0, u6ExtPred, - L4_add_memopb_io, L4_sub_memopb_io, - L4_and_memopb_io, L4_or_memopb_io>; + defm: MemOPr_ALUOp <ldOpByte, truncstorei8, u32ImmPred, + L4_add_memopb_io, L4_sub_memopb_io, + L4_and_memopb_io, L4_or_memopb_io>; } // Define 'def Pats' for MemOps with register addend. -let Predicates = [HasV4T, UseMEMOP] in { +let Predicates = [UseMEMOP] in { // Byte, Half Word - defm : MemOPr_ExtType<zextloadi8, zextloadi16>; // zero extend - defm : MemOPr_ExtType<sextloadi8, sextloadi16>; // sign extend - defm : MemOPr_ExtType<extloadi8, extloadi16>; // any extend + defm: MemOPr_ExtType<zextloadi8, zextloadi16>; // zero extend + defm: MemOPr_ExtType<sextloadi8, sextloadi16>; // sign extend + defm: MemOPr_ExtType<extloadi8, extloadi16>; // any extend // Word - defm : MemOPr_ALUOp <load, store, ADDRriU6_2, u6_2ExtPred, L4_add_memopw_io, - L4_sub_memopw_io, L4_and_memopw_io, L4_or_memopw_io >; + defm: MemOPr_ALUOp <load, store, u30_2ImmPred, L4_add_memopw_io, + L4_sub_memopw_io, L4_and_memopw_io, L4_or_memopw_io>; } //===----------------------------------------------------------------------===// @@ -2676,311 +3175,41 @@ let Predicates = [HasV4T, UseMEMOP] in { // incorrect code for negative numbers. // Pd=cmpb.eq(Rs,#u8) -let isCompare = 1, isExtendable = 1, opExtendable = 2, hasSideEffects = 0, - validSubTargets = HasV4SubT in -class CMP_NOT_REG_IMM<string OpName, bits<2> op, Operand ImmOp, - list<dag> Pattern> - : ALU32Inst <(outs PredRegs:$dst), (ins IntRegs:$src1, ImmOp:$src2), - "$dst = !cmp."#OpName#"($src1, #$src2)", - Pattern, - "", ALU32_2op_tc_2early_SLOT0123> { - bits<2> dst; - bits<5> src1; - bits<10> src2; +// p=!cmp.eq(r1,#s10) +def C4_cmpneqi : T_CMP <"cmp.eq", 0b00, 1, s10Ext>; +def C4_cmpltei : T_CMP <"cmp.gt", 0b01, 1, s10Ext>; +def C4_cmplteui : T_CMP <"cmp.gtu", 0b10, 1, u9Ext>; - let IClass = 0b0111; - let Inst{27-24} = 0b0101; - let Inst{23-22} = op; - let Inst{20-16} = src1; - let Inst{21} = !if (!eq(OpName, "gtu"), 0b0, src2{9}); - let Inst{13-5} = src2{8-0}; - let Inst{4-2} = 0b100; - let Inst{1-0} = dst; -} - -let opExtentBits = 10, isExtentSigned = 1 in { -def C4_cmpneqi : CMP_NOT_REG_IMM <"eq", 0b00, s10Ext, [(set (i1 PredRegs:$dst), - (setne (i32 IntRegs:$src1), s10ExtPred:$src2))]>; - -def C4_cmpltei : CMP_NOT_REG_IMM <"gt", 0b01, s10Ext, [(set (i1 PredRegs:$dst), - (not (setgt (i32 IntRegs:$src1), s10ExtPred:$src2)))]>; - -} -let opExtentBits = 9 in -def C4_cmplteui : CMP_NOT_REG_IMM <"gtu", 0b10, u9Ext, [(set (i1 PredRegs:$dst), - (not (setugt (i32 IntRegs:$src1), u9ExtPred:$src2)))]>; - - - -// p=!cmp.eq(r1,r2) -let isCompare = 1, validSubTargets = HasV4SubT in -def CMPnotEQ_rr : ALU32_rr<(outs PredRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = !cmp.eq($src1, $src2)", - [(set (i1 PredRegs:$dst), - (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2)))]>, - Requires<[HasV4T]>; - -// p=!cmp.gt(r1,r2) -let isCompare = 1, validSubTargets = HasV4SubT in -def CMPnotGT_rr : ALU32_rr<(outs PredRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = !cmp.gt($src1, $src2)", - [(set (i1 PredRegs:$dst), - (not (setgt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>, - Requires<[HasV4T]>; - - -// p=!cmp.gtu(r1,r2) -let isCompare = 1, validSubTargets = HasV4SubT in -def CMPnotGTU_rr : ALU32_rr<(outs PredRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = !cmp.gtu($src1, $src2)", - [(set (i1 PredRegs:$dst), - (not (setugt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>, - Requires<[HasV4T]>; - -let isCompare = 1, validSubTargets = HasV4SubT in -def CMPbEQri_V4 : MInst<(outs PredRegs:$dst), - (ins IntRegs:$src1, u8Imm:$src2), - "$dst = cmpb.eq($src1, #$src2)", - [(set (i1 PredRegs:$dst), - (seteq (and (i32 IntRegs:$src1), 255), u8ImmPred:$src2))]>, - Requires<[HasV4T]>; - -def : Pat <(brcond (i1 (setne (and (i32 IntRegs:$src1), 255), u8ImmPred:$src2)), - bb:$offset), - (J2_jumpf (CMPbEQri_V4 (i32 IntRegs:$src1), u8ImmPred:$src2), - bb:$offset)>, - Requires<[HasV4T]>; - -// Pd=cmpb.eq(Rs,Rt) -let isCompare = 1, validSubTargets = HasV4SubT in -def CMPbEQrr_ubub_V4 : MInst<(outs PredRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = cmpb.eq($src1, $src2)", - [(set (i1 PredRegs:$dst), - (seteq (and (xor (i32 IntRegs:$src1), - (i32 IntRegs:$src2)), 255), 0))]>, - Requires<[HasV4T]>; - -// Pd=cmpb.eq(Rs,Rt) -let isCompare = 1, validSubTargets = HasV4SubT in -def CMPbEQrr_sbsb_V4 : MInst<(outs PredRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = cmpb.eq($src1, $src2)", - [(set (i1 PredRegs:$dst), - (seteq (shl (i32 IntRegs:$src1), (i32 24)), - (shl (i32 IntRegs:$src2), (i32 24))))]>, - Requires<[HasV4T]>; - -// Pd=cmpb.gt(Rs,Rt) -let isCompare = 1, validSubTargets = HasV4SubT in -def CMPbGTrr_V4 : MInst<(outs PredRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = cmpb.gt($src1, $src2)", - [(set (i1 PredRegs:$dst), - (setgt (shl (i32 IntRegs:$src1), (i32 24)), - (shl (i32 IntRegs:$src2), (i32 24))))]>, - Requires<[HasV4T]>; - -// Pd=cmpb.gtu(Rs,#u7) -let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 7, -isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPbGTU", InputType = "imm" in -def CMPbGTUri_V4 : MInst<(outs PredRegs:$dst), - (ins IntRegs:$src1, u7Ext:$src2), - "$dst = cmpb.gtu($src1, #$src2)", - [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 255), - u7ExtPred:$src2))]>, - Requires<[HasV4T]>, ImmRegRel; +def : T_CMP_pat <C4_cmpneqi, setne, s32ImmPred>; +def : T_CMP_pat <C4_cmpltei, setle, s32ImmPred>; +def : T_CMP_pat <C4_cmplteui, setule, u9ImmPred>; + +// rs <= rt -> !(rs > rt). +/* +def: Pat<(i1 (setle (i32 IntRegs:$src1), s32ImmPred:$src2)), + (C2_not (C2_cmpgti IntRegs:$src1, s32ImmPred:$src2))>; +// (C4_cmpltei IntRegs:$src1, s32ImmPred:$src2)>; +*/ +// Map cmplt(Rs, Imm) -> !cmpgt(Rs, Imm-1). +def: Pat<(i1 (setlt (i32 IntRegs:$src1), s32ImmPred:$src2)), + (C4_cmpltei IntRegs:$src1, (DEC_CONST_SIGNED s32ImmPred:$src2))>; + +// rs != rt -> !(rs == rt). +def: Pat<(i1 (setne (i32 IntRegs:$src1), s32ImmPred:$src2)), + (C4_cmpneqi IntRegs:$src1, s32ImmPred:$src2)>; // SDNode for converting immediate C to C-1. def DEC_CONST_BYTE : SDNodeXForm<imm, [{ // Return the byte immediate const-1 as an SDNode. int32_t imm = N->getSExtValue(); - return XformU7ToU7M1Imm(imm); + return XformU7ToU7M1Imm(imm, SDLoc(N)); }]>; // For the sequence -// zext( seteq ( and(Rs, 255), u8)) -// Generate -// Pd=cmpb.eq(Rs, #u8) -// if (Pd.new) Rd=#1 -// if (!Pd.new) Rd=#0 -def : Pat <(i32 (zext (i1 (seteq (i32 (and (i32 IntRegs:$Rs), 255)), - u8ExtPred:$u8)))), - (i32 (TFR_condset_ii (i1 (CMPbEQri_V4 (i32 IntRegs:$Rs), - (u8ExtPred:$u8))), - 1, 0))>, - Requires<[HasV4T]>; - -// For the sequence -// zext( setne ( and(Rs, 255), u8)) -// Generate -// Pd=cmpb.eq(Rs, #u8) -// if (Pd.new) Rd=#0 -// if (!Pd.new) Rd=#1 -def : Pat <(i32 (zext (i1 (setne (i32 (and (i32 IntRegs:$Rs), 255)), - u8ExtPred:$u8)))), - (i32 (TFR_condset_ii (i1 (CMPbEQri_V4 (i32 IntRegs:$Rs), - (u8ExtPred:$u8))), - 0, 1))>, - Requires<[HasV4T]>; - -// For the sequence -// zext( seteq (Rs, and(Rt, 255))) -// Generate -// Pd=cmpb.eq(Rs, Rt) -// if (Pd.new) Rd=#1 -// if (!Pd.new) Rd=#0 -def : Pat <(i32 (zext (i1 (seteq (i32 IntRegs:$Rt), - (i32 (and (i32 IntRegs:$Rs), 255)))))), - (i32 (TFR_condset_ii (i1 (CMPbEQrr_ubub_V4 (i32 IntRegs:$Rs), - (i32 IntRegs:$Rt))), - 1, 0))>, - Requires<[HasV4T]>; - -// For the sequence -// zext( setne (Rs, and(Rt, 255))) -// Generate -// Pd=cmpb.eq(Rs, Rt) -// if (Pd.new) Rd=#0 -// if (!Pd.new) Rd=#1 -def : Pat <(i32 (zext (i1 (setne (i32 IntRegs:$Rt), - (i32 (and (i32 IntRegs:$Rs), 255)))))), - (i32 (TFR_condset_ii (i1 (CMPbEQrr_ubub_V4 (i32 IntRegs:$Rs), - (i32 IntRegs:$Rt))), - 0, 1))>, - Requires<[HasV4T]>; - -// For the sequence -// zext( setugt ( and(Rs, 255), u8)) -// Generate -// Pd=cmpb.gtu(Rs, #u8) -// if (Pd.new) Rd=#1 -// if (!Pd.new) Rd=#0 -def : Pat <(i32 (zext (i1 (setugt (i32 (and (i32 IntRegs:$Rs), 255)), - u8ExtPred:$u8)))), - (i32 (TFR_condset_ii (i1 (CMPbGTUri_V4 (i32 IntRegs:$Rs), - (u8ExtPred:$u8))), - 1, 0))>, - Requires<[HasV4T]>; - -// For the sequence -// zext( setugt ( and(Rs, 254), u8)) -// Generate -// Pd=cmpb.gtu(Rs, #u8) -// if (Pd.new) Rd=#1 -// if (!Pd.new) Rd=#0 -def : Pat <(i32 (zext (i1 (setugt (i32 (and (i32 IntRegs:$Rs), 254)), - u8ExtPred:$u8)))), - (i32 (TFR_condset_ii (i1 (CMPbGTUri_V4 (i32 IntRegs:$Rs), - (u8ExtPred:$u8))), - 1, 0))>, - Requires<[HasV4T]>; - -// For the sequence -// zext( setult ( Rs, Rt)) -// Generate -// Pd=cmp.ltu(Rs, Rt) -// if (Pd.new) Rd=#1 -// if (!Pd.new) Rd=#0 -// cmp.ltu(Rs, Rt) -> cmp.gtu(Rt, Rs) -def : Pat <(i32 (zext (i1 (setult (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), - (i32 (TFR_condset_ii (i1 (C2_cmpgtu (i32 IntRegs:$Rt), - (i32 IntRegs:$Rs))), - 1, 0))>, - Requires<[HasV4T]>; - -// For the sequence -// zext( setlt ( Rs, Rt)) -// Generate -// Pd=cmp.lt(Rs, Rt) -// if (Pd.new) Rd=#1 -// if (!Pd.new) Rd=#0 -// cmp.lt(Rs, Rt) -> cmp.gt(Rt, Rs) -def : Pat <(i32 (zext (i1 (setlt (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), - (i32 (TFR_condset_ii (i1 (C2_cmpgt (i32 IntRegs:$Rt), - (i32 IntRegs:$Rs))), - 1, 0))>, - Requires<[HasV4T]>; - -// For the sequence -// zext( setugt ( Rs, Rt)) -// Generate -// Pd=cmp.gtu(Rs, Rt) -// if (Pd.new) Rd=#1 -// if (!Pd.new) Rd=#0 -def : Pat <(i32 (zext (i1 (setugt (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), - (i32 (TFR_condset_ii (i1 (C2_cmpgtu (i32 IntRegs:$Rs), - (i32 IntRegs:$Rt))), - 1, 0))>, - Requires<[HasV4T]>; - -// This pattern interefers with coremark performance, not implementing at this -// time. -// For the sequence -// zext( setgt ( Rs, Rt)) -// Generate -// Pd=cmp.gt(Rs, Rt) -// if (Pd.new) Rd=#1 -// if (!Pd.new) Rd=#0 - -// For the sequence -// zext( setuge ( Rs, Rt)) -// Generate -// Pd=cmp.ltu(Rs, Rt) -// if (Pd.new) Rd=#0 -// if (!Pd.new) Rd=#1 -// cmp.ltu(Rs, Rt) -> cmp.gtu(Rt, Rs) -def : Pat <(i32 (zext (i1 (setuge (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), - (i32 (TFR_condset_ii (i1 (C2_cmpgtu (i32 IntRegs:$Rt), - (i32 IntRegs:$Rs))), - 0, 1))>, - Requires<[HasV4T]>; - -// For the sequence -// zext( setge ( Rs, Rt)) -// Generate -// Pd=cmp.lt(Rs, Rt) -// if (Pd.new) Rd=#0 -// if (!Pd.new) Rd=#1 -// cmp.lt(Rs, Rt) -> cmp.gt(Rt, Rs) -def : Pat <(i32 (zext (i1 (setge (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), - (i32 (TFR_condset_ii (i1 (C2_cmpgt (i32 IntRegs:$Rt), - (i32 IntRegs:$Rs))), - 0, 1))>, - Requires<[HasV4T]>; - -// For the sequence -// zext( setule ( Rs, Rt)) -// Generate -// Pd=cmp.gtu(Rs, Rt) -// if (Pd.new) Rd=#0 -// if (!Pd.new) Rd=#1 -def : Pat <(i32 (zext (i1 (setule (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), - (i32 (TFR_condset_ii (i1 (C2_cmpgtu (i32 IntRegs:$Rs), - (i32 IntRegs:$Rt))), - 0, 1))>, - Requires<[HasV4T]>; - -// For the sequence -// zext( setle ( Rs, Rt)) -// Generate -// Pd=cmp.gt(Rs, Rt) -// if (Pd.new) Rd=#0 -// if (!Pd.new) Rd=#1 -def : Pat <(i32 (zext (i1 (setle (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), - (i32 (TFR_condset_ii (i1 (C2_cmpgt (i32 IntRegs:$Rs), - (i32 IntRegs:$Rt))), - 0, 1))>, - Requires<[HasV4T]>; - -// For the sequence // zext( setult ( and(Rs, 255), u8)) // Use the isdigit transformation below -// Generate code of the form 'mux_ii(cmpbgtu(Rdd, C-1),0,1)' +// Generate code of the form 'C2_muxii(cmpbgtui(Rdd, C-1),0,1)' // for C code of the form r = ((c>='0') & (c<='9')) ? 1 : 0;. // The isdigit transformation relies on two 'clever' aspects: // 1) The data type is unsigned which allows us to eliminate a zero test after @@ -2993,130 +3222,11 @@ def : Pat <(i32 (zext (i1 (setle (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), // The code is transformed upstream of llvm into // retval = (c-48) < 10 ? 1 : 0; let AddedComplexity = 139 in -def : Pat <(i32 (zext (i1 (setult (i32 (and (i32 IntRegs:$src1), 255)), - u7StrictPosImmPred:$src2)))), - (i32 (C2_muxii (i1 (CMPbGTUri_V4 (i32 IntRegs:$src1), - (DEC_CONST_BYTE u7StrictPosImmPred:$src2))), - 0, 1))>, - Requires<[HasV4T]>; - -// Pd=cmpb.gtu(Rs,Rt) -let isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPbGTU", -InputType = "reg" in -def CMPbGTUrr_V4 : MInst<(outs PredRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = cmpb.gtu($src1, $src2)", - [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 255), - (and (i32 IntRegs:$src2), 255)))]>, - Requires<[HasV4T]>, ImmRegRel; - -// Following instruction is not being extended as it results into the incorrect -// code for negative numbers. - -// Signed half compare(.eq) ri. -// Pd=cmph.eq(Rs,#s8) -let isCompare = 1, validSubTargets = HasV4SubT in -def CMPhEQri_V4 : MInst<(outs PredRegs:$dst), - (ins IntRegs:$src1, s8Imm:$src2), - "$dst = cmph.eq($src1, #$src2)", - [(set (i1 PredRegs:$dst), (seteq (and (i32 IntRegs:$src1), 65535), - s8ImmPred:$src2))]>, - Requires<[HasV4T]>; - -// Signed half compare(.eq) rr. -// Case 1: xor + and, then compare: -// r0=xor(r0,r1) -// r0=and(r0,#0xffff) -// p0=cmp.eq(r0,#0) -// Pd=cmph.eq(Rs,Rt) -let isCompare = 1, validSubTargets = HasV4SubT in -def CMPhEQrr_xor_V4 : MInst<(outs PredRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = cmph.eq($src1, $src2)", - [(set (i1 PredRegs:$dst), (seteq (and (xor (i32 IntRegs:$src1), - (i32 IntRegs:$src2)), - 65535), 0))]>, - Requires<[HasV4T]>; - -// Signed half compare(.eq) rr. -// Case 2: shift left 16 bits then compare: -// r0=asl(r0,16) -// r1=asl(r1,16) -// p0=cmp.eq(r0,r1) -// Pd=cmph.eq(Rs,Rt) -let isCompare = 1, validSubTargets = HasV4SubT in -def CMPhEQrr_shl_V4 : MInst<(outs PredRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = cmph.eq($src1, $src2)", - [(set (i1 PredRegs:$dst), - (seteq (shl (i32 IntRegs:$src1), (i32 16)), - (shl (i32 IntRegs:$src2), (i32 16))))]>, - Requires<[HasV4T]>; - -/* Incorrect Pattern -- immediate should be right shifted before being -used in the cmph.gt instruction. -// Signed half compare(.gt) ri. -// Pd=cmph.gt(Rs,#s8) - -let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8, -isCompare = 1, validSubTargets = HasV4SubT in -def CMPhGTri_V4 : MInst<(outs PredRegs:$dst), - (ins IntRegs:$src1, s8Ext:$src2), - "$dst = cmph.gt($src1, #$src2)", - [(set (i1 PredRegs:$dst), - (setgt (shl (i32 IntRegs:$src1), (i32 16)), - s8ExtPred:$src2))]>, - Requires<[HasV4T]>; -*/ - -// Signed half compare(.gt) rr. -// Pd=cmph.gt(Rs,Rt) -let isCompare = 1, validSubTargets = HasV4SubT in -def CMPhGTrr_shl_V4 : MInst<(outs PredRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = cmph.gt($src1, $src2)", - [(set (i1 PredRegs:$dst), - (setgt (shl (i32 IntRegs:$src1), (i32 16)), - (shl (i32 IntRegs:$src2), (i32 16))))]>, - Requires<[HasV4T]>; - -// Unsigned half compare rr (.gtu). -// Pd=cmph.gtu(Rs,Rt) -let isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPhGTU", -InputType = "reg" in -def CMPhGTUrr_V4 : MInst<(outs PredRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = cmph.gtu($src1, $src2)", - [(set (i1 PredRegs:$dst), - (setugt (and (i32 IntRegs:$src1), 65535), - (and (i32 IntRegs:$src2), 65535)))]>, - Requires<[HasV4T]>, ImmRegRel; - -// Unsigned half compare ri (.gtu). -// Pd=cmph.gtu(Rs,#u7) -let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 7, -isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPhGTU", -InputType = "imm" in -def CMPhGTUri_V4 : MInst<(outs PredRegs:$dst), - (ins IntRegs:$src1, u7Ext:$src2), - "$dst = cmph.gtu($src1, #$src2)", - [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 65535), - u7ExtPred:$src2))]>, - Requires<[HasV4T]>, ImmRegRel; - -let validSubTargets = HasV4SubT in -def NTSTBIT_rr : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - "$dst = !tstbit($src1, $src2)", - [(set (i1 PredRegs:$dst), - (seteq (and (shl 1, (i32 IntRegs:$src2)), (i32 IntRegs:$src1)), 0))]>, - Requires<[HasV4T]>; - -let validSubTargets = HasV4SubT in -def NTSTBIT_ri : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), - "$dst = !tstbit($src1, $src2)", - [(set (i1 PredRegs:$dst), - (seteq (and (shl 1, u5ImmPred:$src2), (i32 IntRegs:$src1)), 0))]>, - Requires<[HasV4T]>; +def: Pat<(i32 (zext (i1 (setult (i32 (and (i32 IntRegs:$src1), 255)), + u7StrictPosImmPred:$src2)))), + (C2_muxii (A4_cmpbgtui IntRegs:$src1, + (DEC_CONST_BYTE u7StrictPosImmPred:$src2)), + 0, 1)>; //===----------------------------------------------------------------------===// // XTYPE/PRED - @@ -3173,40 +3283,23 @@ multiclass LD_MISC_L4_RETURN<string mnemonic> { } let isReturn = 1, isTerminator = 1, - Defs = [R29, R30, R31, PC], Uses = [R30], hasSideEffects = 0, - validSubTargets = HasV4SubT, isCodeGenOnly = 0 in + Defs = [R29, R30, R31, PC], Uses = [R30], hasSideEffects = 0 in defm L4_return: LD_MISC_L4_RETURN <"dealloc_return">, PredNewRel; // Restore registers and dealloc return function call. let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1, - Defs = [R29, R30, R31, PC] in { -let validSubTargets = HasV4SubT in - def RESTORE_DEALLOC_RET_JMP_V4 : JInst<(outs), - (ins calltarget:$dst), - "jump $dst", - []>, - Requires<[HasV4T]>; + Defs = [R29, R30, R31, PC], isPredicable = 0, isAsmParserOnly = 1 in { + def RESTORE_DEALLOC_RET_JMP_V4 : T_JMP<"">; } // Restore registers and dealloc frame before a tail call. -let isCall = 1, isBarrier = 1, - Defs = [R29, R30, R31, PC] in { -let validSubTargets = HasV4SubT in - def RESTORE_DEALLOC_BEFORE_TAILCALL_V4 : JInst<(outs), - (ins calltarget:$dst), - "call $dst", - []>, - Requires<[HasV4T]>; +let isCall = 1, Defs = [R29, R30, R31, PC], isAsmParserOnly = 1 in { + def RESTORE_DEALLOC_BEFORE_TAILCALL_V4 : T_Call<"">, PredRel; } // Save registers function call. -let isCall = 1, isBarrier = 1, - Uses = [R29, R31] in { - def SAVE_REGISTERS_CALL_V4 : JInst<(outs), - (ins calltarget:$dst), - "call $dst // Save_calle_saved_registers", - []>, - Requires<[HasV4T]>; +let isCall = 1, Uses = [R29, R31], isAsmParserOnly = 1 in { + def SAVE_REGISTERS_CALL_V4 : T_Call<"">, PredRel; } //===----------------------------------------------------------------------===// @@ -3278,7 +3371,7 @@ class T_StoreAbs_Pred <string mnemonic, RegisterClass RC, bits<2> MajOp, //===----------------------------------------------------------------------===// class T_StoreAbs <string mnemonic, RegisterClass RC, Operand ImmOp, bits<2> MajOp, bit isHalf> - : T_StoreAbsGP <mnemonic, RC, ImmOp, MajOp, u0AlwaysExt, 1, isHalf>, + : T_StoreAbsGP <mnemonic, RC, ImmOp, MajOp, u32Imm, 1, isHalf>, AddrModeRel { string ImmOpStr = !cast<string>(ImmOp); let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19, @@ -3295,7 +3388,7 @@ class T_StoreAbs <string mnemonic, RegisterClass RC, Operand ImmOp, //===----------------------------------------------------------------------===// // Multiclass for store instructions with absolute addressing. //===----------------------------------------------------------------------===// -let validSubTargets = HasV4SubT, addrMode = Absolute, isExtended = 1 in +let addrMode = Absolute, isExtended = 1 in multiclass ST_Abs<string mnemonic, string CextOp, RegisterClass RC, Operand ImmOp, bits<2> MajOp, bit isHalf = 0> { let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in { @@ -3319,7 +3412,7 @@ multiclass ST_Abs<string mnemonic, string CextOp, RegisterClass RC, let hasSideEffects = 0, isPredicable = 1, mayStore = 1, isNVStore = 1, isNewValue = 1, opNewValue = 1 in class T_StoreAbsGP_NV <string mnemonic, Operand ImmOp, bits<2>MajOp, bit isAbs> - : NVInst_V4<(outs), (ins u0AlwaysExt:$addr, IntRegs:$src), + : NVInst_V4<(outs), (ins u32Imm:$addr, IntRegs:$src), mnemonic # !if(isAbs, "(##", "(#")#"$addr) = $src.new", [], "", V2LDST_tc_st_SLOT0> { bits<19> addr; @@ -3397,7 +3490,7 @@ class T_StoreAbs_NV <string mnemonic, Operand ImmOp, bits<2> MajOp> //===----------------------------------------------------------------------===// // Multiclass for new-value store instructions with absolute addressing. //===----------------------------------------------------------------------===// -let validSubTargets = HasV4SubT, addrMode = Absolute, isExtended = 1 in +let addrMode = Absolute, isExtended = 1 in multiclass ST_Abs_NV <string mnemonic, string CextOp, Operand ImmOp, bits<2> MajOp> { let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in { @@ -3417,22 +3510,22 @@ multiclass ST_Abs_NV <string mnemonic, string CextOp, Operand ImmOp, //===----------------------------------------------------------------------===// // Stores with absolute addressing //===----------------------------------------------------------------------===// -let accessSize = ByteAccess, isCodeGenOnly = 0 in +let accessSize = ByteAccess in defm storerb : ST_Abs <"memb", "STrib", IntRegs, u16_0Imm, 0b00>, ST_Abs_NV <"memb", "STrib", u16_0Imm, 0b00>; -let accessSize = HalfWordAccess, isCodeGenOnly = 0 in +let accessSize = HalfWordAccess in defm storerh : ST_Abs <"memh", "STrih", IntRegs, u16_1Imm, 0b01>, ST_Abs_NV <"memh", "STrih", u16_1Imm, 0b01>; -let accessSize = WordAccess, isCodeGenOnly = 0 in +let accessSize = WordAccess in defm storeri : ST_Abs <"memw", "STriw", IntRegs, u16_2Imm, 0b10>, ST_Abs_NV <"memw", "STriw", u16_2Imm, 0b10>; -let isNVStorable = 0, accessSize = DoubleWordAccess, isCodeGenOnly = 0 in +let isNVStorable = 0, accessSize = DoubleWordAccess in defm storerd : ST_Abs <"memd", "STrid", DoubleRegs, u16_3Imm, 0b11>; -let isNVStorable = 0, accessSize = HalfWordAccess, isCodeGenOnly = 0 in +let isNVStorable = 0, accessSize = HalfWordAccess in defm storerf : ST_Abs <"memh", "STrif", IntRegs, u16_1Imm, 0b01, 1>; //===----------------------------------------------------------------------===// @@ -3442,7 +3535,7 @@ defm storerf : ST_Abs <"memh", "STrif", IntRegs, u16_1Imm, 0b01, 1>; // if ([!]Pv[.new]) mem[bhwd](##global)=Rt //===----------------------------------------------------------------------===// -let validSubTargets = HasV4SubT in +let isAsmParserOnly = 1 in class T_StoreGP <string mnemonic, string BaseOp, RegisterClass RC, Operand ImmOp, bits<2> MajOp, bit isHalf = 0> : T_StoreAbsGP <mnemonic, RC, ImmOp, MajOp, globaladdress, 0, isHalf> { @@ -3452,7 +3545,7 @@ class T_StoreGP <string mnemonic, string BaseOp, RegisterClass RC, let BaseOpcode = BaseOp#_abs; } -let validSubTargets = HasV4SubT in +let isAsmParserOnly = 1 in multiclass ST_GP <string mnemonic, string BaseOp, Operand ImmOp, bits<2> MajOp, bit isHalf = 0> { // Set BaseOpcode same as absolute addressing instructions so that @@ -3483,77 +3576,44 @@ let isNVStorable = 0, accessSize = HalfWordAccess in def S2_storerfgp : T_StoreGP <"memh", "STrif", IntRegs, u16_1Imm, 0b01, 1>, PredNewRel; -let Predicates = [HasV4T], AddedComplexity = 30 in { -def : Pat<(truncstorei8 (i32 IntRegs:$src1), - (HexagonCONST32 tglobaladdr:$absaddr)), - (S2_storerbabs tglobaladdr: $absaddr, IntRegs: $src1)>; - -def : Pat<(truncstorei16 (i32 IntRegs:$src1), - (HexagonCONST32 tglobaladdr:$absaddr)), - (S2_storerhabs tglobaladdr: $absaddr, IntRegs: $src1)>; - -def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32 tglobaladdr:$absaddr)), - (S2_storeriabs tglobaladdr: $absaddr, IntRegs: $src1)>; - -def : Pat<(store (i64 DoubleRegs:$src1), - (HexagonCONST32 tglobaladdr:$absaddr)), - (S2_storerdabs tglobaladdr: $absaddr, DoubleRegs: $src1)>; +class Loada_pat<PatFrag Load, ValueType VT, PatFrag Addr, InstHexagon MI> + : Pat<(VT (Load Addr:$addr)), (MI Addr:$addr)>; + +class Loadam_pat<PatFrag Load, ValueType VT, PatFrag Addr, PatFrag ValueMod, + InstHexagon MI> + : Pat<(VT (Load Addr:$addr)), (ValueMod (MI Addr:$addr))>; + +class Storea_pat<PatFrag Store, PatFrag Value, PatFrag Addr, InstHexagon MI> + : Pat<(Store Value:$val, Addr:$addr), (MI Addr:$addr, Value:$val)>; + +class Stoream_pat<PatFrag Store, PatFrag Value, PatFrag Addr, PatFrag ValueMod, + InstHexagon MI> + : Pat<(Store Value:$val, Addr:$addr), + (MI Addr:$addr, (ValueMod Value:$val))>; + +def: Storea_pat<SwapSt<atomic_store_8>, I32, addrgp, S2_storerbgp>; +def: Storea_pat<SwapSt<atomic_store_16>, I32, addrgp, S2_storerhgp>; +def: Storea_pat<SwapSt<atomic_store_32>, I32, addrgp, S2_storerigp>; +def: Storea_pat<SwapSt<atomic_store_64>, I64, addrgp, S2_storerdgp>; + +let AddedComplexity = 100 in { + def: Storea_pat<truncstorei8, I32, addrgp, S2_storerbgp>; + def: Storea_pat<truncstorei16, I32, addrgp, S2_storerhgp>; + def: Storea_pat<store, I32, addrgp, S2_storerigp>; + def: Storea_pat<store, I64, addrgp, S2_storerdgp>; + + // Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1" + // to "r0 = 1; memw(#foo) = r0" + let AddedComplexity = 100 in + def: Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)), + (S2_storerbgp tglobaladdr:$global, (A2_tfrsi 1))>; } -// 64 bit atomic store -def : Pat <(atomic_store_64 (HexagonCONST32_GP tglobaladdr:$global), - (i64 DoubleRegs:$src1)), - (S2_storerdgp tglobaladdr:$global, (i64 DoubleRegs:$src1))>, - Requires<[HasV4T]>; - -// Map from store(globaladdress) -> memd(#foo) -let AddedComplexity = 100 in -def : Pat <(store (i64 DoubleRegs:$src1), - (HexagonCONST32_GP tglobaladdr:$global)), - (S2_storerdgp tglobaladdr:$global, (i64 DoubleRegs:$src1))>; - -// 8 bit atomic store -def : Pat < (atomic_store_8 (HexagonCONST32_GP tglobaladdr:$global), - (i32 IntRegs:$src1)), - (S2_storerbgp tglobaladdr:$global, (i32 IntRegs:$src1))>; - -// Map from store(globaladdress) -> memb(#foo) -let AddedComplexity = 100 in -def : Pat<(truncstorei8 (i32 IntRegs:$src1), - (HexagonCONST32_GP tglobaladdr:$global)), - (S2_storerbgp tglobaladdr:$global, (i32 IntRegs:$src1))>; - -// Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1" -// to "r0 = 1; memw(#foo) = r0" -let AddedComplexity = 100 in -def : Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)), - (S2_storerbgp tglobaladdr:$global, (A2_tfrsi 1))>; - -def : Pat<(atomic_store_16 (HexagonCONST32_GP tglobaladdr:$global), - (i32 IntRegs:$src1)), - (S2_storerhgp tglobaladdr:$global, (i32 IntRegs:$src1))>; - -// Map from store(globaladdress) -> memh(#foo) -let AddedComplexity = 100 in -def : Pat<(truncstorei16 (i32 IntRegs:$src1), - (HexagonCONST32_GP tglobaladdr:$global)), - (S2_storerhgp tglobaladdr:$global, (i32 IntRegs:$src1))>; - -// 32 bit atomic store -def : Pat<(atomic_store_32 (HexagonCONST32_GP tglobaladdr:$global), - (i32 IntRegs:$src1)), - (S2_storerigp tglobaladdr:$global, (i32 IntRegs:$src1))>; - -// Map from store(globaladdress) -> memw(#foo) -let AddedComplexity = 100 in -def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)), - (S2_storerigp tglobaladdr:$global, (i32 IntRegs:$src1))>; - //===----------------------------------------------------------------------===// // Template class for non predicated load instructions with // absolute addressing mode. //===----------------------------------------------------------------------===// -let isPredicable = 1, hasSideEffects = 0, validSubTargets = HasV4SubT in +let isPredicable = 1, hasSideEffects = 0 in class T_LoadAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp, bits<3> MajOp, Operand AddrOp, bit isAbs> : LDInst <(outs RC:$dst), (ins AddrOp:$addr), @@ -3582,7 +3642,7 @@ class T_LoadAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp, class T_LoadAbs <string mnemonic, RegisterClass RC, Operand ImmOp, bits<3> MajOp> - : T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp, u0AlwaysExt, 1>, AddrModeRel { + : T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp, u32Imm, 1>, AddrModeRel { string ImmOpStr = !cast<string>(ImmOp); let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19, @@ -3595,11 +3655,12 @@ class T_LoadAbs <string mnemonic, RegisterClass RC, Operand ImmOp, !if (!eq(ImmOpStr, "u16_1Imm"), 1, /* u16_0Imm */ 0))); } + //===----------------------------------------------------------------------===// // Template class for predicated load instructions with // absolute addressing mode. //===----------------------------------------------------------------------===// -let isPredicated = 1, hasNewValue = 1, opExtentBits = 6, opExtendable = 2 in +let isPredicated = 1, opExtentBits = 6, opExtendable = 2 in class T_LoadAbs_Pred <string mnemonic, RegisterClass RC, bits<3> MajOp, bit isPredNot, bit isPredNew> : LDInst <(outs RC:$dst), (ins PredRegs:$src1, u6Ext:$absaddr), @@ -3611,6 +3672,7 @@ class T_LoadAbs_Pred <string mnemonic, RegisterClass RC, bits<3> MajOp, let isPredicatedNew = isPredNew; let isPredicatedFalse = isPredNot; + let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1); let IClass = 0b1001; @@ -3649,20 +3711,20 @@ multiclass LD_Abs<string mnemonic, string CextOp, RegisterClass RC, } } -let accessSize = ByteAccess, hasNewValue = 1, isCodeGenOnly = 0 in { +let accessSize = ByteAccess, hasNewValue = 1 in { defm loadrb : LD_Abs<"memb", "LDrib", IntRegs, u16_0Imm, 0b000>; defm loadrub : LD_Abs<"memub", "LDriub", IntRegs, u16_0Imm, 0b001>; } -let accessSize = HalfWordAccess, hasNewValue = 1, isCodeGenOnly = 0 in { +let accessSize = HalfWordAccess, hasNewValue = 1 in { defm loadrh : LD_Abs<"memh", "LDrih", IntRegs, u16_1Imm, 0b010>; defm loadruh : LD_Abs<"memuh", "LDriuh", IntRegs, u16_1Imm, 0b011>; } -let accessSize = WordAccess, hasNewValue = 1, isCodeGenOnly = 0 in +let accessSize = WordAccess, hasNewValue = 1 in defm loadri : LD_Abs<"memw", "LDriw", IntRegs, u16_2Imm, 0b100>; -let accessSize = DoubleWordAccess, isCodeGenOnly = 0 in +let accessSize = DoubleWordAccess in defm loadrd : LD_Abs<"memd", "LDrid", DoubleRegs, u16_3Imm, 0b110>; //===----------------------------------------------------------------------===// @@ -3672,6 +3734,7 @@ defm loadrd : LD_Abs<"memd", "LDrid", DoubleRegs, u16_3Imm, 0b110>; // if ([!]Pv[.new]) Rx=mem[bhwd](##global) //===----------------------------------------------------------------------===// +let isAsmParserOnly = 1 in class T_LoadGP <string mnemonic, string BaseOp, RegisterClass RC, Operand ImmOp, bits<3> MajOp> : T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp, globaladdress, 0>, PredNewRel { @@ -3694,439 +3757,175 @@ def L2_loadrigp : T_LoadGP<"memw", "LDriw", IntRegs, u16_2Imm, 0b100>; let accessSize = DoubleWordAccess in def L2_loadrdgp : T_LoadGP<"memd", "LDrid", DoubleRegs, u16_3Imm, 0b110>; -let Predicates = [HasV4T], AddedComplexity = 30 in { -def : Pat<(i32 (load (HexagonCONST32 tglobaladdr:$absaddr))), - (L4_loadri_abs tglobaladdr: $absaddr)>; - -def : Pat<(i32 (sextloadi8 (HexagonCONST32 tglobaladdr:$absaddr))), - (L4_loadrb_abs tglobaladdr:$absaddr)>; - -def : Pat<(i32 (zextloadi8 (HexagonCONST32 tglobaladdr:$absaddr))), - (L4_loadrub_abs tglobaladdr:$absaddr)>; - -def : Pat<(i32 (sextloadi16 (HexagonCONST32 tglobaladdr:$absaddr))), - (L4_loadrh_abs tglobaladdr:$absaddr)>; - -def : Pat<(i32 (zextloadi16 (HexagonCONST32 tglobaladdr:$absaddr))), - (L4_loadruh_abs tglobaladdr:$absaddr)>; -} - -def : Pat <(atomic_load_64 (HexagonCONST32_GP tglobaladdr:$global)), - (i64 (L2_loadrdgp tglobaladdr:$global))>; - -def : Pat <(atomic_load_32 (HexagonCONST32_GP tglobaladdr:$global)), - (i32 (L2_loadrigp tglobaladdr:$global))>; - -def : Pat <(atomic_load_16 (HexagonCONST32_GP tglobaladdr:$global)), - (i32 (L2_loadruhgp tglobaladdr:$global))>; - -def : Pat <(atomic_load_8 (HexagonCONST32_GP tglobaladdr:$global)), - (i32 (L2_loadrubgp tglobaladdr:$global))>; - -// Map from load(globaladdress) -> memw(#foo + 0) -let AddedComplexity = 100 in -def : Pat <(i64 (load (HexagonCONST32_GP tglobaladdr:$global))), - (i64 (L2_loadrdgp tglobaladdr:$global))>; +def: Loada_pat<atomic_load_8, i32, addrgp, L2_loadrubgp>; +def: Loada_pat<atomic_load_16, i32, addrgp, L2_loadruhgp>; +def: Loada_pat<atomic_load_32, i32, addrgp, L2_loadrigp>; +def: Loada_pat<atomic_load_64, i64, addrgp, L2_loadrdgp>; // Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd -let AddedComplexity = 100 in -def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))), - (i1 (C2_tfrrp (i32 (L2_loadrbgp tglobaladdr:$global))))>; +def: Loadam_pat<load, i1, addrga, I32toI1, L4_loadrub_abs>; +def: Loadam_pat<load, i1, addrgp, I32toI1, L2_loadrubgp>; + +def: Stoream_pat<store, I1, addrga, I1toI32, S2_storerbabs>; +def: Stoream_pat<store, I1, addrgp, I1toI32, S2_storerbgp>; + +// Map from load(globaladdress) -> mem[u][bhwd](#foo) +class LoadGP_pats <PatFrag ldOp, InstHexagon MI, ValueType VT = i32> + : Pat <(VT (ldOp (HexagonCONST32_GP tglobaladdr:$global))), + (VT (MI tglobaladdr:$global))>; + +let AddedComplexity = 100 in { + def: LoadGP_pats <extloadi8, L2_loadrbgp>; + def: LoadGP_pats <sextloadi8, L2_loadrbgp>; + def: LoadGP_pats <zextloadi8, L2_loadrubgp>; + def: LoadGP_pats <extloadi16, L2_loadrhgp>; + def: LoadGP_pats <sextloadi16, L2_loadrhgp>; + def: LoadGP_pats <zextloadi16, L2_loadruhgp>; + def: LoadGP_pats <load, L2_loadrigp>; + def: LoadGP_pats <load, L2_loadrdgp, i64>; +} // When the Interprocedural Global Variable optimizer realizes that a certain // global variable takes only two constant values, it shrinks the global to // a boolean. Catch those loads here in the following 3 patterns. -let AddedComplexity = 100 in -def : Pat <(i32 (extloadi1 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (L2_loadrbgp tglobaladdr:$global))>; +let AddedComplexity = 100 in { + def: LoadGP_pats <extloadi1, L2_loadrubgp>; + def: LoadGP_pats <zextloadi1, L2_loadrubgp>; +} -let AddedComplexity = 100 in -def : Pat <(i32 (sextloadi1 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (L2_loadrbgp tglobaladdr:$global))>; +// Transfer global address into a register +def: Pat<(HexagonCONST32 tglobaladdr:$Rs), (A2_tfrsi s16Ext:$Rs)>; +def: Pat<(HexagonCONST32_GP tblockaddress:$Rs), (A2_tfrsi s16Ext:$Rs)>; +def: Pat<(HexagonCONST32_GP tglobaladdr:$Rs), (A2_tfrsi s16Ext:$Rs)>; + +let AddedComplexity = 30 in { + def: Storea_pat<truncstorei8, I32, u32ImmPred, S2_storerbabs>; + def: Storea_pat<truncstorei16, I32, u32ImmPred, S2_storerhabs>; + def: Storea_pat<store, I32, u32ImmPred, S2_storeriabs>; +} -// Map from load(globaladdress) -> memb(#foo) -let AddedComplexity = 100 in -def : Pat <(i32 (extloadi8 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (L2_loadrbgp tglobaladdr:$global))>; +let AddedComplexity = 30 in { + def: Loada_pat<load, i32, u32ImmPred, L4_loadri_abs>; + def: Loada_pat<sextloadi8, i32, u32ImmPred, L4_loadrb_abs>; + def: Loada_pat<zextloadi8, i32, u32ImmPred, L4_loadrub_abs>; + def: Loada_pat<sextloadi16, i32, u32ImmPred, L4_loadrh_abs>; + def: Loada_pat<zextloadi16, i32, u32ImmPred, L4_loadruh_abs>; +} -// Map from load(globaladdress) -> memb(#foo) +// Indexed store word - global address. +// memw(Rs+#u6:2)=#S8 let AddedComplexity = 100 in -def : Pat <(i32 (sextloadi8 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (L2_loadrbgp tglobaladdr:$global))>; +def: Storex_add_pat<store, addrga, u6_2ImmPred, S4_storeiri_io>; -let AddedComplexity = 100 in -def : Pat <(i32 (zextloadi1 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (L2_loadrubgp tglobaladdr:$global))>; +// Load from a global address that has only one use in the current basic block. +let AddedComplexity = 100 in { + def: Loada_pat<extloadi8, i32, addrga, L4_loadrub_abs>; + def: Loada_pat<sextloadi8, i32, addrga, L4_loadrb_abs>; + def: Loada_pat<zextloadi8, i32, addrga, L4_loadrub_abs>; -// Map from load(globaladdress) -> memub(#foo) -let AddedComplexity = 100 in -def : Pat <(i32 (zextloadi8 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (L2_loadrubgp tglobaladdr:$global))>; + def: Loada_pat<extloadi16, i32, addrga, L4_loadruh_abs>; + def: Loada_pat<sextloadi16, i32, addrga, L4_loadrh_abs>; + def: Loada_pat<zextloadi16, i32, addrga, L4_loadruh_abs>; -// Map from load(globaladdress) -> memh(#foo) -let AddedComplexity = 100 in -def : Pat <(i32 (extloadi16 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (L2_loadrhgp tglobaladdr:$global))>; + def: Loada_pat<load, i32, addrga, L4_loadri_abs>; + def: Loada_pat<load, i64, addrga, L4_loadrd_abs>; +} -// Map from load(globaladdress) -> memh(#foo) -let AddedComplexity = 100 in -def : Pat <(i32 (sextloadi16 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (L2_loadrhgp tglobaladdr:$global))>; +// Store to a global address that has only one use in the current basic block. +let AddedComplexity = 100 in { + def: Storea_pat<truncstorei8, I32, addrga, S2_storerbabs>; + def: Storea_pat<truncstorei16, I32, addrga, S2_storerhabs>; + def: Storea_pat<store, I32, addrga, S2_storeriabs>; + def: Storea_pat<store, I64, addrga, S2_storerdabs>; -// Map from load(globaladdress) -> memuh(#foo) -let AddedComplexity = 100 in -def : Pat <(i32 (zextloadi16 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (L2_loadruhgp tglobaladdr:$global))>; + def: Stoream_pat<truncstorei32, I64, addrga, LoReg, S2_storeriabs>; +} -// Map from load(globaladdress) -> memw(#foo) +// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd let AddedComplexity = 100 in -def : Pat <(i32 (load (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (L2_loadrigp tglobaladdr:$global))>; - +def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))), + (i1 (C2_tfrrp (i32 (L2_loadrbgp tglobaladdr:$global))))>; // Transfer global address into a register let isExtended = 1, opExtendable = 1, AddedComplexity=50, isMoveImm = 1, -isAsCheapAsAMove = 1, isReMaterializable = 1, validSubTargets = HasV4SubT in +isAsCheapAsAMove = 1, isReMaterializable = 1, isCodeGenOnly = 1 in def TFRI_V4 : ALU32_ri<(outs IntRegs:$dst), (ins s16Ext:$src1), "$dst = #$src1", - [(set IntRegs:$dst, (HexagonCONST32 tglobaladdr:$src1))]>, - Requires<[HasV4T]>; + [(set IntRegs:$dst, (HexagonCONST32 tglobaladdr:$src1))]>; // Transfer a block address into a register def : Pat<(HexagonCONST32_GP tblockaddress:$src1), - (TFRI_V4 tblockaddress:$src1)>, - Requires<[HasV4T]>; - -let isExtended = 1, opExtendable = 2, AddedComplexity=50, -hasSideEffects = 0, isPredicated = 1, validSubTargets = HasV4SubT in -def TFRI_cPt_V4 : ALU32_ri<(outs IntRegs:$dst), - (ins PredRegs:$src1, s16Ext:$src2), - "if($src1) $dst = #$src2", - []>, - Requires<[HasV4T]>; - -let isExtended = 1, opExtendable = 2, AddedComplexity=50, isPredicatedFalse = 1, -hasSideEffects = 0, isPredicated = 1, validSubTargets = HasV4SubT in -def TFRI_cNotPt_V4 : ALU32_ri<(outs IntRegs:$dst), - (ins PredRegs:$src1, s16Ext:$src2), - "if(!$src1) $dst = #$src2", - []>, - Requires<[HasV4T]>; - -let isExtended = 1, opExtendable = 2, AddedComplexity=50, -hasSideEffects = 0, isPredicated = 1, validSubTargets = HasV4SubT in -def TFRI_cdnPt_V4 : ALU32_ri<(outs IntRegs:$dst), - (ins PredRegs:$src1, s16Ext:$src2), - "if($src1.new) $dst = #$src2", - []>, - Requires<[HasV4T]>; - -let isExtended = 1, opExtendable = 2, AddedComplexity=50, isPredicatedFalse = 1, -hasSideEffects = 0, isPredicated = 1, validSubTargets = HasV4SubT in -def TFRI_cdnNotPt_V4 : ALU32_ri<(outs IntRegs:$dst), - (ins PredRegs:$src1, s16Ext:$src2), - "if(!$src1.new) $dst = #$src2", - []>, - Requires<[HasV4T]>; - -let AddedComplexity = 50, Predicates = [HasV4T] in -def : Pat<(HexagonCONST32_GP tglobaladdr:$src1), - (TFRI_V4 tglobaladdr:$src1)>, - Requires<[HasV4T]>; - - -// Load - Indirect with long offset: These instructions take global address -// as an operand -let isExtended = 1, opExtendable = 3, AddedComplexity = 40, -validSubTargets = HasV4SubT in -def LDrid_ind_lo_V4 : LDInst<(outs DoubleRegs:$dst), - (ins IntRegs:$src1, u2Imm:$src2, globaladdressExt:$offset), - "$dst=memd($src1<<#$src2+##$offset)", - [(set (i64 DoubleRegs:$dst), - (load (add (shl IntRegs:$src1, u2ImmPred:$src2), - (HexagonCONST32 tglobaladdr:$offset))))]>, - Requires<[HasV4T]>; - -let AddedComplexity = 40 in -multiclass LD_indirect_lo<string OpcStr, PatFrag OpNode> { -let isExtended = 1, opExtendable = 3, validSubTargets = HasV4SubT in - def _lo_V4 : LDInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, u2Imm:$src2, globaladdressExt:$offset), - !strconcat("$dst = ", - !strconcat(OpcStr, "($src1<<#$src2+##$offset)")), - [(set IntRegs:$dst, - (i32 (OpNode (add (shl IntRegs:$src1, u2ImmPred:$src2), - (HexagonCONST32 tglobaladdr:$offset)))))]>, - Requires<[HasV4T]>; -} - -defm LDrib_ind : LD_indirect_lo<"memb", sextloadi8>; -defm LDriub_ind : LD_indirect_lo<"memub", zextloadi8>; -defm LDriub_ind_anyext : LD_indirect_lo<"memub", extloadi8>; -defm LDrih_ind : LD_indirect_lo<"memh", sextloadi16>; -defm LDriuh_ind : LD_indirect_lo<"memuh", zextloadi16>; -defm LDriuh_ind_anyext : LD_indirect_lo<"memuh", extloadi16>; -defm LDriw_ind : LD_indirect_lo<"memw", load>; - -let AddedComplexity = 40 in -def : Pat <(i32 (sextloadi8 (add IntRegs:$src1, - (NumUsesBelowThresCONST32 tglobaladdr:$offset)))), - (i32 (LDrib_ind_lo_V4 IntRegs:$src1, 0, tglobaladdr:$offset))>, - Requires<[HasV4T]>; - -let AddedComplexity = 40 in -def : Pat <(i32 (zextloadi8 (add IntRegs:$src1, - (NumUsesBelowThresCONST32 tglobaladdr:$offset)))), - (i32 (LDriub_ind_lo_V4 IntRegs:$src1, 0, tglobaladdr:$offset))>, - Requires<[HasV4T]>; - -let Predicates = [HasV4T], AddedComplexity = 30 in { -def : Pat<(truncstorei8 (i32 IntRegs:$src1), u0AlwaysExtPred:$src2), - (S2_storerbabs u0AlwaysExtPred:$src2, IntRegs: $src1)>; - -def : Pat<(truncstorei16 (i32 IntRegs:$src1), u0AlwaysExtPred:$src2), - (S2_storerhabs u0AlwaysExtPred:$src2, IntRegs: $src1)>; + (TFRI_V4 tblockaddress:$src1)>; -def : Pat<(store (i32 IntRegs:$src1), u0AlwaysExtPred:$src2), - (S2_storeriabs u0AlwaysExtPred:$src2, IntRegs: $src1)>; -} - -let Predicates = [HasV4T], AddedComplexity = 30 in { -def : Pat<(i32 (load u0AlwaysExtPred:$src)), - (L4_loadri_abs u0AlwaysExtPred:$src)>; - -def : Pat<(i32 (sextloadi8 u0AlwaysExtPred:$src)), - (L4_loadrb_abs u0AlwaysExtPred:$src)>; - -def : Pat<(i32 (zextloadi8 u0AlwaysExtPred:$src)), - (L4_loadrub_abs u0AlwaysExtPred:$src)>; - -def : Pat<(i32 (sextloadi16 u0AlwaysExtPred:$src)), - (L4_loadrh_abs u0AlwaysExtPred:$src)>; - -def : Pat<(i32 (zextloadi16 u0AlwaysExtPred:$src)), - (L4_loadruh_abs u0AlwaysExtPred:$src)>; -} - -// Indexed store word - global address. -// memw(Rs+#u6:2)=#S8 -let AddedComplexity = 10 in -def STriw_offset_ext_V4 : STInst<(outs), - (ins IntRegs:$src1, u6_2Imm:$src2, globaladdress:$src3), - "memw($src1+#$src2) = ##$src3", - [(store (HexagonCONST32 tglobaladdr:$src3), - (add IntRegs:$src1, u6_2ImmPred:$src2))]>, - Requires<[HasV4T]>; - -def : Pat<(i64 (ctlz (i64 DoubleRegs:$src1))), - (i64 (A4_combineir (i32 0), (i32 (CTLZ64_rr DoubleRegs:$src1))))>, - Requires<[HasV4T]>; - -def : Pat<(i64 (cttz (i64 DoubleRegs:$src1))), - (i64 (A4_combineir (i32 0), (i32 (CTTZ64_rr DoubleRegs:$src1))))>, - Requires<[HasV4T]>; - - -// i8 -> i64 loads -// We need a complexity of 120 here to override preceding handling of -// zextloadi8. -let Predicates = [HasV4T], AddedComplexity = 120 in { -def: Pat <(i64 (extloadi8 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), - (i64 (A4_combineir 0, (L4_loadrb_abs tglobaladdr:$addr)))>; - -def: Pat <(i64 (zextloadi8 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), - (i64 (A4_combineir 0, (L4_loadrub_abs tglobaladdr:$addr)))>; - -def: Pat <(i64 (sextloadi8 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), - (i64 (A2_sxtw (L4_loadrb_abs tglobaladdr:$addr)))>; - -def: Pat <(i64 (extloadi8 FoldGlobalAddr:$addr)), - (i64 (A4_combineir 0, (L4_loadrb_abs FoldGlobalAddr:$addr)))>; - -def: Pat <(i64 (zextloadi8 FoldGlobalAddr:$addr)), - (i64 (A4_combineir 0, (L4_loadrub_abs FoldGlobalAddr:$addr)))>; +let AddedComplexity = 50 in +def : Pat<(HexagonCONST32_GP tglobaladdr:$src1), + (TFRI_V4 tglobaladdr:$src1)>; -def: Pat <(i64 (sextloadi8 FoldGlobalAddr:$addr)), - (i64 (A2_sxtw (L4_loadrb_abs FoldGlobalAddr:$addr)))>; -} -// i16 -> i64 loads +// i8/i16/i32 -> i64 loads // We need a complexity of 120 here to override preceding handling of -// zextloadi16. +// zextload. let AddedComplexity = 120 in { -def: Pat <(i64 (extloadi16 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), - (i64 (A4_combineir 0, (L4_loadrh_abs tglobaladdr:$addr)))>, - Requires<[HasV4T]>; - -def: Pat <(i64 (zextloadi16 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), - (i64 (A4_combineir 0, (L4_loadruh_abs tglobaladdr:$addr)))>, - Requires<[HasV4T]>; + def: Loadam_pat<extloadi8, i64, addrga, Zext64, L4_loadrub_abs>; + def: Loadam_pat<sextloadi8, i64, addrga, Sext64, L4_loadrb_abs>; + def: Loadam_pat<zextloadi8, i64, addrga, Zext64, L4_loadrub_abs>; -def: Pat <(i64 (sextloadi16 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), - (i64 (A2_sxtw (L4_loadrh_abs tglobaladdr:$addr)))>, - Requires<[HasV4T]>; + def: Loadam_pat<extloadi16, i64, addrga, Zext64, L4_loadruh_abs>; + def: Loadam_pat<sextloadi16, i64, addrga, Sext64, L4_loadrh_abs>; + def: Loadam_pat<zextloadi16, i64, addrga, Zext64, L4_loadruh_abs>; -def: Pat <(i64 (extloadi16 FoldGlobalAddr:$addr)), - (i64 (A4_combineir 0, (L4_loadrh_abs FoldGlobalAddr:$addr)))>, - Requires<[HasV4T]>; - -def: Pat <(i64 (zextloadi16 FoldGlobalAddr:$addr)), - (i64 (A4_combineir 0, (L4_loadruh_abs FoldGlobalAddr:$addr)))>, - Requires<[HasV4T]>; - -def: Pat <(i64 (sextloadi16 FoldGlobalAddr:$addr)), - (i64 (A2_sxtw (L4_loadrh_abs FoldGlobalAddr:$addr)))>, - Requires<[HasV4T]>; + def: Loadam_pat<extloadi32, i64, addrga, Zext64, L4_loadri_abs>; + def: Loadam_pat<sextloadi32, i64, addrga, Sext64, L4_loadri_abs>; + def: Loadam_pat<zextloadi32, i64, addrga, Zext64, L4_loadri_abs>; } -// i32->i64 loads -// We need a complexity of 120 here to override preceding handling of -// zextloadi32. -let AddedComplexity = 120 in { -def: Pat <(i64 (extloadi32 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), - (i64 (A4_combineir 0, (L4_loadri_abs tglobaladdr:$addr)))>, - Requires<[HasV4T]>; -def: Pat <(i64 (zextloadi32 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), - (i64 (A4_combineir 0, (L4_loadri_abs tglobaladdr:$addr)))>, - Requires<[HasV4T]>; +let AddedComplexity = 100 in { + def: Loada_pat<extloadi8, i32, addrgp, L4_loadrub_abs>; + def: Loada_pat<sextloadi8, i32, addrgp, L4_loadrb_abs>; + def: Loada_pat<zextloadi8, i32, addrgp, L4_loadrub_abs>; -def: Pat <(i64 (sextloadi32 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), - (i64 (A2_sxtw (L4_loadri_abs tglobaladdr:$addr)))>, - Requires<[HasV4T]>; + def: Loada_pat<extloadi16, i32, addrgp, L4_loadruh_abs>; + def: Loada_pat<sextloadi16, i32, addrgp, L4_loadrh_abs>; + def: Loada_pat<zextloadi16, i32, addrgp, L4_loadruh_abs>; -def: Pat <(i64 (extloadi32 FoldGlobalAddr:$addr)), - (i64 (A4_combineir 0, (L4_loadri_abs FoldGlobalAddr:$addr)))>, - Requires<[HasV4T]>; - -def: Pat <(i64 (zextloadi32 FoldGlobalAddr:$addr)), - (i64 (A4_combineir 0, (L4_loadri_abs FoldGlobalAddr:$addr)))>, - Requires<[HasV4T]>; - -def: Pat <(i64 (sextloadi32 FoldGlobalAddr:$addr)), - (i64 (A2_sxtw (L4_loadri_abs FoldGlobalAddr:$addr)))>, - Requires<[HasV4T]>; + def: Loada_pat<load, i32, addrgp, L4_loadri_abs>; + def: Loada_pat<load, i64, addrgp, L4_loadrd_abs>; } -// Indexed store double word - global address. -// memw(Rs+#u6:2)=#S8 -let AddedComplexity = 10 in -def STrih_offset_ext_V4 : STInst<(outs), - (ins IntRegs:$src1, u6_1Imm:$src2, globaladdress:$src3), - "memh($src1+#$src2) = ##$src3", - [(truncstorei16 (HexagonCONST32 tglobaladdr:$src3), - (add IntRegs:$src1, u6_1ImmPred:$src2))]>, - Requires<[HasV4T]>; -// Map from store(globaladdress + x) -> memd(#foo + x) -let AddedComplexity = 100 in -def : Pat<(store (i64 DoubleRegs:$src1), - FoldGlobalAddrGP:$addr), - (S2_storerdabs FoldGlobalAddrGP:$addr, (i64 DoubleRegs:$src1))>, - Requires<[HasV4T]>; - -def : Pat<(atomic_store_64 FoldGlobalAddrGP:$addr, - (i64 DoubleRegs:$src1)), - (S2_storerdabs FoldGlobalAddrGP:$addr, (i64 DoubleRegs:$src1))>, - Requires<[HasV4T]>; - -// Map from store(globaladdress + x) -> memb(#foo + x) -let AddedComplexity = 100 in -def : Pat<(truncstorei8 (i32 IntRegs:$src1), FoldGlobalAddrGP:$addr), - (S2_storerbabs FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>, - Requires<[HasV4T]>; - -def : Pat<(atomic_store_8 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1)), - (S2_storerbabs FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>, - Requires<[HasV4T]>; - -// Map from store(globaladdress + x) -> memh(#foo + x) -let AddedComplexity = 100 in -def : Pat<(truncstorei16 (i32 IntRegs:$src1), FoldGlobalAddrGP:$addr), - (S2_storerhabs FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>, - Requires<[HasV4T]>; - -def : Pat<(atomic_store_16 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1)), - (S2_storerhabs FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>, - Requires<[HasV4T]>; - -// Map from store(globaladdress + x) -> memw(#foo + x) -let AddedComplexity = 100 in -def : Pat<(store (i32 IntRegs:$src1), FoldGlobalAddrGP:$addr), - (S2_storeriabs FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>, - Requires<[HasV4T]>; - -def : Pat<(atomic_store_32 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1)), - (S2_storeriabs FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>, - Requires<[HasV4T]>; - -// Map from load(globaladdress + x) -> memd(#foo + x) -let AddedComplexity = 100 in -def : Pat<(i64 (load FoldGlobalAddrGP:$addr)), - (i64 (L4_loadrd_abs FoldGlobalAddrGP:$addr))>, - Requires<[HasV4T]>; - -def : Pat<(atomic_load_64 FoldGlobalAddrGP:$addr), - (i64 (L4_loadrd_abs FoldGlobalAddrGP:$addr))>, - Requires<[HasV4T]>; - -// Map from load(globaladdress + x) -> memb(#foo + x) -let AddedComplexity = 100 in -def : Pat<(i32 (extloadi8 FoldGlobalAddrGP:$addr)), - (i32 (L4_loadrb_abs FoldGlobalAddrGP:$addr))>, - Requires<[HasV4T]>; - -// Map from load(globaladdress + x) -> memb(#foo + x) -let AddedComplexity = 100 in -def : Pat<(i32 (sextloadi8 FoldGlobalAddrGP:$addr)), - (i32 (L4_loadrb_abs FoldGlobalAddrGP:$addr))>, - Requires<[HasV4T]>; - -//let AddedComplexity = 100 in -let AddedComplexity = 100 in -def : Pat<(i32 (extloadi16 FoldGlobalAddrGP:$addr)), - (i32 (L4_loadrh_abs FoldGlobalAddrGP:$addr))>, - Requires<[HasV4T]>; - -// Map from load(globaladdress + x) -> memh(#foo + x) -let AddedComplexity = 100 in -def : Pat<(i32 (sextloadi16 FoldGlobalAddrGP:$addr)), - (i32 (L4_loadrh_abs FoldGlobalAddrGP:$addr))>, - Requires<[HasV4T]>; - -// Map from load(globaladdress + x) -> memuh(#foo + x) -let AddedComplexity = 100 in -def : Pat<(i32 (zextloadi16 FoldGlobalAddrGP:$addr)), - (i32 (L4_loadruh_abs FoldGlobalAddrGP:$addr))>, - Requires<[HasV4T]>; - -def : Pat<(atomic_load_16 FoldGlobalAddrGP:$addr), - (i32 (L4_loadruh_abs FoldGlobalAddrGP:$addr))>, - Requires<[HasV4T]>; - -// Map from load(globaladdress + x) -> memub(#foo + x) -let AddedComplexity = 100 in -def : Pat<(i32 (zextloadi8 FoldGlobalAddrGP:$addr)), - (i32 (L4_loadrub_abs FoldGlobalAddrGP:$addr))>, - Requires<[HasV4T]>; - -def : Pat<(atomic_load_8 FoldGlobalAddrGP:$addr), - (i32 (L4_loadrub_abs FoldGlobalAddrGP:$addr))>, - Requires<[HasV4T]>; - -// Map from load(globaladdress + x) -> memw(#foo + x) -let AddedComplexity = 100 in -def : Pat<(i32 (load FoldGlobalAddrGP:$addr)), - (i32 (L4_loadri_abs FoldGlobalAddrGP:$addr))>, - Requires<[HasV4T]>; +let AddedComplexity = 100 in { + def: Storea_pat<truncstorei8, I32, addrgp, S2_storerbabs>; + def: Storea_pat<truncstorei16, I32, addrgp, S2_storerhabs>; + def: Storea_pat<store, I32, addrgp, S2_storeriabs>; + def: Storea_pat<store, I64, addrgp, S2_storerdabs>; +} -def : Pat<(atomic_load_32 FoldGlobalAddrGP:$addr), - (i32 (L4_loadri_abs FoldGlobalAddrGP:$addr))>, - Requires<[HasV4T]>; +def: Loada_pat<atomic_load_8, i32, addrgp, L4_loadrub_abs>; +def: Loada_pat<atomic_load_16, i32, addrgp, L4_loadruh_abs>; +def: Loada_pat<atomic_load_32, i32, addrgp, L4_loadri_abs>; +def: Loada_pat<atomic_load_64, i64, addrgp, L4_loadrd_abs>; + +def: Storea_pat<SwapSt<atomic_store_8>, I32, addrgp, S2_storerbabs>; +def: Storea_pat<SwapSt<atomic_store_16>, I32, addrgp, S2_storerhabs>; +def: Storea_pat<SwapSt<atomic_store_32>, I32, addrgp, S2_storeriabs>; +def: Storea_pat<SwapSt<atomic_store_64>, I64, addrgp, S2_storerdabs>; + +let Constraints = "@earlyclobber $dst" in +def Insert4 : PseudoM<(outs DoubleRegs:$dst), (ins IntRegs:$a, IntRegs:$b, + IntRegs:$c, IntRegs:$d), + ".error \"Should never try to emit Insert4\"", + [(set (i64 DoubleRegs:$dst), + (or (or (or (shl (i64 (zext (i32 (and (i32 IntRegs:$b), (i32 65535))))), + (i32 16)), + (i64 (zext (i32 (and (i32 IntRegs:$a), (i32 65535)))))), + (shl (i64 (anyext (i32 (and (i32 IntRegs:$c), (i32 65535))))), + (i32 32))), + (shl (i64 (anyext (i32 IntRegs:$d))), (i32 48))))]>; //===----------------------------------------------------------------------===// // :raw for of boundscheck:hi:lo insns //===----------------------------------------------------------------------===// // A4_boundscheck_lo: Detect if a register is within bounds. -let hasSideEffects = 0, isCodeGenOnly = 0 in +let hasSideEffects = 0 in def A4_boundscheck_lo: ALU64Inst < (outs PredRegs:$Pd), (ins DoubleRegs:$Rss, DoubleRegs:$Rtt), @@ -4146,7 +3945,7 @@ def A4_boundscheck_lo: ALU64Inst < } // A4_boundscheck_hi: Detect if a register is within bounds. -let hasSideEffects = 0, isCodeGenOnly = 0 in +let hasSideEffects = 0 in def A4_boundscheck_hi: ALU64Inst < (outs PredRegs:$Pd), (ins DoubleRegs:$Rss, DoubleRegs:$Rtt), @@ -4165,13 +3964,13 @@ def A4_boundscheck_hi: ALU64Inst < let Inst{12-8} = Rtt; } -let hasSideEffects = 0 in +let hasSideEffects = 0, isAsmParserOnly = 1 in def A4_boundscheck : MInst < (outs PredRegs:$Pd), (ins IntRegs:$Rs, DoubleRegs:$Rtt), "$Pd=boundscheck($Rs,$Rtt)">; // A4_tlbmatch: Detect if a VA/ASID matches a TLB entry. -let isPredicateLate = 1, hasSideEffects = 0, isCodeGenOnly = 0 in +let isPredicateLate = 1, hasSideEffects = 0 in def A4_tlbmatch : ALU64Inst<(outs PredRegs:$Pd), (ins DoubleRegs:$Rs, IntRegs:$Rt), "$Pd = tlbmatch($Rs, $Rt)", @@ -4198,7 +3997,7 @@ def HexagonDCFETCH : SDNode<"HexagonISD::DCFETCH", SDTHexagonDCFETCH, // Use LD0Inst for dcfetch, but set "mayLoad" to 0 because this doesn't // really do a load. -let hasSideEffects = 1, mayLoad = 0, isCodeGenOnly = 0 in +let hasSideEffects = 1, mayLoad = 0 in def Y2_dcfetchbo : LD0Inst<(outs), (ins IntRegs:$Rs, u11_3Imm:$u11_3), "dcfetch($Rs + #$u11_3)", [(HexagonDCFETCH IntRegs:$Rs, u11_3ImmPred:$u11_3)], @@ -4220,12 +4019,12 @@ def Y2_dcfetchbo : LD0Inst<(outs), (ins IntRegs:$Rs, u11_3Imm:$u11_3), let isBranch = 1, hasSideEffects = 0, isExtentSigned = 1, isPredicated = 1, isPredicatedNew = 1, isExtendable = 1, opExtentBits = 11, opExtentAlign = 2, opExtendable = 1, - isTerminator = 1, validSubTargets = HasV4SubT in + isTerminator = 1 in class CJInst_tstbit_R0<string px, bit np, string tnt> : InstHexagon<(outs), (ins IntRegs:$Rs, brtarget:$r9_2), ""#px#" = tstbit($Rs, #0); if (" #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2", - [], "", COMPOUND, TypeCOMPOUND> { + [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon { bits<4> Rs; bits<11> r9_2; @@ -4248,14 +4047,14 @@ class CJInst_tstbit_R0<string px, bit np, string tnt> let Inst{7-1} = r9_2{8-2}; } -let Defs = [PC, P0], Uses = [P0], isCodeGenOnly = 0 in { +let Defs = [PC, P0], Uses = [P0] in { def J4_tstbit0_tp0_jump_nt : CJInst_tstbit_R0<"p0", 0, "nt">; def J4_tstbit0_tp0_jump_t : CJInst_tstbit_R0<"p0", 0, "t">; def J4_tstbit0_fp0_jump_nt : CJInst_tstbit_R0<"p0", 1, "nt">; def J4_tstbit0_fp0_jump_t : CJInst_tstbit_R0<"p0", 1, "t">; } -let Defs = [PC, P1], Uses = [P1], isCodeGenOnly = 0 in { +let Defs = [PC, P1], Uses = [P1] in { def J4_tstbit0_tp1_jump_nt : CJInst_tstbit_R0<"p1", 0, "nt">; def J4_tstbit0_tp1_jump_t : CJInst_tstbit_R0<"p1", 0, "t">; def J4_tstbit0_fp1_jump_nt : CJInst_tstbit_R0<"p1", 1, "nt">; @@ -4266,12 +4065,12 @@ let Defs = [PC, P1], Uses = [P1], isCodeGenOnly = 0 in { let isBranch = 1, hasSideEffects = 0, isExtentSigned = 1, isPredicated = 1, isPredicatedNew = 1, isExtendable = 1, opExtentBits = 11, opExtentAlign = 2, - opExtendable = 2, isTerminator = 1, validSubTargets = HasV4SubT in + opExtendable = 2, isTerminator = 1 in class CJInst_RR<string px, string op, bit np, string tnt> : InstHexagon<(outs), (ins IntRegs:$Rs, IntRegs:$Rt, brtarget:$r9_2), ""#px#" = cmp."#op#"($Rs, $Rt); if (" #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2", - [], "", COMPOUND, TypeCOMPOUND> { + [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon { bits<4> Rs; bits<4> Rt; bits<11> r9_2; @@ -4314,21 +4113,18 @@ multiclass T_pnp_CJInst_RR<string op>{ defm J4_cmp#NAME#_f : T_tnt_CJInst_RR<op, 1>; } // TypeCJ Instructions compare RR and jump -let isCodeGenOnly = 0 in { defm eq : T_pnp_CJInst_RR<"eq">; defm gt : T_pnp_CJInst_RR<"gt">; defm gtu : T_pnp_CJInst_RR<"gtu">; -} let isBranch = 1, hasSideEffects = 0, isExtentSigned = 1, isPredicated = 1, isPredicatedNew = 1, isExtendable = 1, opExtentBits = 11, - opExtentAlign = 2, opExtendable = 2, isTerminator = 1, - validSubTargets = HasV4SubT in + opExtentAlign = 2, opExtendable = 2, isTerminator = 1 in class CJInst_RU5<string px, string op, bit np, string tnt> : InstHexagon<(outs), (ins IntRegs:$Rs, u5Imm:$U5, brtarget:$r9_2), ""#px#" = cmp."#op#"($Rs, #$U5); if (" #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2", - [], "", COMPOUND, TypeCOMPOUND> { + [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon { bits<4> Rs; bits<5> U5; bits<11> r9_2; @@ -4371,21 +4167,19 @@ multiclass T_pnp_CJInst_RU5<string op>{ defm J4_cmp#NAME#i_f : T_tnt_CJInst_RU5<op, 1>; } // TypeCJ Instructions compare RI and jump -let isCodeGenOnly = 0 in { defm eq : T_pnp_CJInst_RU5<"eq">; defm gt : T_pnp_CJInst_RU5<"gt">; defm gtu : T_pnp_CJInst_RU5<"gtu">; -} let isBranch = 1, hasSideEffects = 0, isExtentSigned = 1, isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1, isExtendable = 1, opExtentBits = 11, opExtentAlign = 2, opExtendable = 1, - isTerminator = 1, validSubTargets = HasV4SubT in + isTerminator = 1 in class CJInst_Rn1<string px, string op, bit np, string tnt> : InstHexagon<(outs), (ins IntRegs:$Rs, brtarget:$r9_2), ""#px#" = cmp."#op#"($Rs,#-1); if (" #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2", - [], "", COMPOUND, TypeCOMPOUND> { + [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon { bits<4> Rs; bits<11> r9_2; @@ -4427,16 +4221,13 @@ multiclass T_pnp_CJInst_Rn1<string op>{ defm J4_cmp#NAME#n1_f : T_tnt_CJInst_Rn1<op, 1>; } // TypeCJ Instructions compare -1 and jump -let isCodeGenOnly = 0 in { defm eq : T_pnp_CJInst_Rn1<"eq">; defm gt : T_pnp_CJInst_Rn1<"gt">; -} // J4_jumpseti: Direct unconditional jump and set register to immediate. let Defs = [PC], isBranch = 1, hasSideEffects = 0, hasNewValue = 1, isExtentSigned = 1, opNewValue = 0, isExtendable = 1, opExtentBits = 11, - opExtentAlign = 2, opExtendable = 2, validSubTargets = HasV4SubT, - isCodeGenOnly = 0 in + opExtentAlign = 2, opExtendable = 2 in def J4_jumpseti: CJInst < (outs IntRegs:$Rd), (ins u6Imm:$U6, brtarget:$r9_2), @@ -4456,8 +4247,7 @@ def J4_jumpseti: CJInst < // J4_jumpsetr: Direct unconditional jump and transfer register. let Defs = [PC], isBranch = 1, hasSideEffects = 0, hasNewValue = 1, isExtentSigned = 1, opNewValue = 0, isExtendable = 1, opExtentBits = 11, - opExtentAlign = 2, opExtendable = 2, validSubTargets = HasV4SubT, - isCodeGenOnly = 0 in + opExtentAlign = 2, opExtendable = 2 in def J4_jumpsetr: CJInst < (outs IntRegs:$Rd), (ins IntRegs:$Rs, brtarget:$r9_2), diff --git a/lib/Target/Hexagon/HexagonInstrInfoV5.td b/lib/Target/Hexagon/HexagonInstrInfoV5.td index 5674aa3ccd83..337f4ea2184a 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV5.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV5.td @@ -15,7 +15,34 @@ // XTYPE/MPY //===----------------------------------------------------------------------===// -let isCodeGenOnly = 0 in + //Rdd[+]=vrmpybsu(Rss,Rtt) +let Predicates = [HasV5T] in { + def M5_vrmpybsu: T_XTYPE_Vect<"vrmpybsu", 0b110, 0b001, 0>; + def M5_vrmacbsu: T_XTYPE_Vect_acc<"vrmpybsu", 0b110, 0b001, 0>; + + //Rdd[+]=vrmpybu(Rss,Rtt) + def M5_vrmpybuu: T_XTYPE_Vect<"vrmpybu", 0b100, 0b001, 0>; + def M5_vrmacbuu: T_XTYPE_Vect_acc<"vrmpybu", 0b100, 0b001, 0>; + + def M5_vdmpybsu: T_M2_vmpy<"vdmpybsu", 0b101, 0b001, 0, 0, 1>; + def M5_vdmacbsu: T_M2_vmpy_acc_sat <"vdmpybsu", 0b001, 0b001, 0, 0>; +} + +// Vector multiply bytes +// Rdd=vmpyb[s]u(Rs,Rt) +let Predicates = [HasV5T] in { + def M5_vmpybsu: T_XTYPE_mpy64 <"vmpybsu", 0b010, 0b001, 0, 0, 0>; + def M5_vmpybuu: T_XTYPE_mpy64 <"vmpybu", 0b100, 0b001, 0, 0, 0>; + + // Rxx+=vmpyb[s]u(Rs,Rt) + def M5_vmacbsu: T_XTYPE_mpy64_acc <"vmpybsu", "+", 0b110, 0b001, 0, 0, 0>; + def M5_vmacbuu: T_XTYPE_mpy64_acc <"vmpybu", "+", 0b100, 0b001, 0, 0, 0>; + + // Rd=vaddhub(Rss,Rtt):sat + let hasNewValue = 1, opNewValue = 0 in + def A5_vaddhubs: T_S3op_1 <"vaddhub", IntRegs, 0b01, 0b001, 0, 1>; +} + def S2_asr_i_p_rnd : S_2OpInstImm<"asr", 0b110, 0b111, u6Imm, [(set I64:$dst, (sra (i64 (add (i64 (sra I64:$src1, u6ImmPred:$src2)), 1)), @@ -25,41 +52,43 @@ def S2_asr_i_p_rnd : S_2OpInstImm<"asr", 0b110, 0b111, u6Imm, let Inst{13-8} = src2; } -let isCodeGenOnly = 0 in +let isAsmParserOnly = 1 in +def S2_asr_i_p_rnd_goodsyntax + : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2), + "$dst = asrrnd($src1, #$src2)">; + def C4_fastcorner9 : T_LOGICAL_2OP<"fastcorner9", 0b000, 0, 0>, Requires<[HasV5T]> { let Inst{13,7,4} = 0b111; } -let isCodeGenOnly = 0 in def C4_fastcorner9_not : T_LOGICAL_2OP<"!fastcorner9", 0b000, 0, 0>, Requires<[HasV5T]> { let Inst{20,13,7,4} = 0b1111; } -def SDTHexagonFCONST32 : SDTypeProfile<1, 1, [ - SDTCisVT<0, f32>, - SDTCisPtrTy<1>]>; -def HexagonFCONST32 : SDNode<"HexagonISD::FCONST32", SDTHexagonFCONST32>; +def SDTHexagonFCONST32 : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, + SDTCisPtrTy<1>]>; +def HexagonFCONST32 : SDNode<"HexagonISD::FCONST32", SDTHexagonFCONST32>; -let isReMaterializable = 1, isMoveImm = 1 in +let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in def FCONST32_nsdata : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global), - "$dst = CONST32(#$global)", - [(set (f32 IntRegs:$dst), - (HexagonFCONST32 tglobaladdr:$global))]>, - Requires<[HasV5T]>; + "$dst = CONST32(#$global)", + [(set F32:$dst, + (HexagonFCONST32 tglobaladdr:$global))]>, + Requires<[HasV5T]>; -let isReMaterializable = 1, isMoveImm = 1 in +let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in def CONST64_Float_Real : LDInst<(outs DoubleRegs:$dst), (ins f64imm:$src1), - "$dst = CONST64(#$src1)", - [(set DoubleRegs:$dst, fpimm:$src1)]>, - Requires<[HasV5T]>; + "$dst = CONST64(#$src1)", + [(set F64:$dst, fpimm:$src1)]>, + Requires<[HasV5T]>; -let isReMaterializable = 1, isMoveImm = 1 in +let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in def CONST32_Float_Real : LDInst<(outs IntRegs:$dst), (ins f32imm:$src1), - "$dst = CONST32(#$src1)", - [(set IntRegs:$dst, fpimm:$src1)]>, - Requires<[HasV5T]>; + "$dst = CONST32(#$src1)", + [(set F32:$dst, fpimm:$src1)]>, + Requires<[HasV5T]>; // Transfer immediate float. // Only works with single precision fp value. @@ -68,35 +97,33 @@ def CONST32_Float_Real : LDInst<(outs IntRegs:$dst), (ins f32imm:$src1), // Make sure that complexity is more than the CONST32 pattern in // HexagonInstrInfo.td patterns. let isExtended = 1, opExtendable = 1, isMoveImm = 1, isReMaterializable = 1, -isPredicable = 1, AddedComplexity = 30, validSubTargets = HasV5SubT, -isCodeGenOnly = 1 in + isPredicable = 1, AddedComplexity = 30, validSubTargets = HasV5SubT, + isCodeGenOnly = 1 in def TFRI_f : ALU32_ri<(outs IntRegs:$dst), (ins f32Ext:$src1), - "$dst = #$src1", - [(set IntRegs:$dst, fpimm:$src1)]>, - Requires<[HasV5T]>; + "$dst = #$src1", + [(set F32:$dst, fpimm:$src1)]>, + Requires<[HasV5T]>; let isExtended = 1, opExtendable = 2, isPredicated = 1, -hasSideEffects = 0, validSubTargets = HasV5SubT in + hasSideEffects = 0, validSubTargets = HasV5SubT, isCodeGenOnly = 1 in def TFRI_cPt_f : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, f32Ext:$src2), - "if ($src1) $dst = #$src2", - []>, - Requires<[HasV5T]>; + "if ($src1) $dst = #$src2", []>, + Requires<[HasV5T]>; -let isExtended = 1, opExtendable = 2, isPredicated = 1, isPredicatedFalse = 1, -hasSideEffects = 0, validSubTargets = HasV5SubT in +let isPseudo = 1, isExtended = 1, opExtendable = 2, isPredicated = 1, + isPredicatedFalse = 1, hasSideEffects = 0, validSubTargets = HasV5SubT in def TFRI_cNotPt_f : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, f32Ext:$src2), - "if (!$src1) $dst =#$src2", - []>, - Requires<[HasV5T]>; + "if (!$src1) $dst = #$src2", []>, + Requires<[HasV5T]>; def SDTHexagonI32I64: SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i64>]>; def HexagonPOPCOUNT: SDNode<"HexagonISD::POPCOUNT", SDTHexagonI32I64>; -let hasNewValue = 1, validSubTargets = HasV5SubT, isCodeGenOnly = 0 in +let hasNewValue = 1, validSubTargets = HasV5SubT in def S5_popcountp : ALU64_rr<(outs IntRegs:$Rd), (ins DoubleRegs:$Rss), "$Rd = popcount($Rss)", [(set I32:$Rd, (HexagonPOPCOUNT I64:$Rss))], "", S_2op_tc_2_SLOT23>, @@ -112,6 +139,14 @@ def S5_popcountp : ALU64_rr<(outs IntRegs:$Rd), (ins DoubleRegs:$Rss), let Inst{20-16} = Rss; } +defm: Loadx_pat<load, f32, s30_2ImmPred, L2_loadri_io>; +defm: Loadx_pat<load, f64, s29_3ImmPred, L2_loadrd_io>; + +defm: Storex_pat<store, F32, s30_2ImmPred, S2_storeri_io>; +defm: Storex_pat<store, F64, s29_3ImmPred, S2_storerd_io>; +def: Storex_simple_pat<store, F32, S2_storeri_io>; +def: Storex_simple_pat<store, F64, S2_storerd_io>; + let isFP = 1, hasNewValue = 1, opNewValue = 0 in class T_MInstFloat <string mnemonic, bits<3> MajOp, bits<3> MinOp> : MInst<(outs IntRegs:$Rd), @@ -134,27 +169,51 @@ class T_MInstFloat <string mnemonic, bits<3> MajOp, bits<3> MinOp> let Inst{4-0} = Rd; } -let isCommutable = 1, isCodeGenOnly = 0 in { +let isCommutable = 1 in { def F2_sfadd : T_MInstFloat < "sfadd", 0b000, 0b000>; def F2_sfmpy : T_MInstFloat < "sfmpy", 0b010, 0b000>; } -let isCodeGenOnly = 0 in def F2_sfsub : T_MInstFloat < "sfsub", 0b000, 0b001>; -let Itinerary = M_tc_3x_SLOT23, isCodeGenOnly = 0 in { +def: Pat<(f32 (fadd F32:$src1, F32:$src2)), + (F2_sfadd F32:$src1, F32:$src2)>; + +def: Pat<(f32 (fsub F32:$src1, F32:$src2)), + (F2_sfsub F32:$src1, F32:$src2)>; + +def: Pat<(f32 (fmul F32:$src1, F32:$src2)), + (F2_sfmpy F32:$src1, F32:$src2)>; + +let Itinerary = M_tc_3x_SLOT23 in { def F2_sfmax : T_MInstFloat < "sfmax", 0b100, 0b000>; def F2_sfmin : T_MInstFloat < "sfmin", 0b100, 0b001>; } -let isCodeGenOnly = 0 in { +let AddedComplexity = 100, Predicates = [HasV5T] in { + def: Pat<(f32 (select (i1 (setolt F32:$src1, F32:$src2)), + F32:$src1, F32:$src2)), + (F2_sfmin F32:$src1, F32:$src2)>; + + def: Pat<(f32 (select (i1 (setogt F32:$src1, F32:$src2)), + F32:$src2, F32:$src1)), + (F2_sfmin F32:$src1, F32:$src2)>; + + def: Pat<(f32 (select (i1 (setogt F32:$src1, F32:$src2)), + F32:$src1, F32:$src2)), + (F2_sfmax F32:$src1, F32:$src2)>; + + def: Pat<(f32 (select (i1 (setolt F32:$src1, F32:$src2)), + F32:$src2, F32:$src1)), + (F2_sfmax F32:$src1, F32:$src2)>; +} + def F2_sffixupn : T_MInstFloat < "sffixupn", 0b110, 0b000>; def F2_sffixupd : T_MInstFloat < "sffixupd", 0b110, 0b001>; -} // F2_sfrecipa: Reciprocal approximation for division. let isPredicateLate = 1, isFP = 1, -hasSideEffects = 0, hasNewValue = 1, isCodeGenOnly = 0 in +hasSideEffects = 0, hasNewValue = 1 in def F2_sfrecipa: MInst < (outs IntRegs:$Rd, PredRegs:$Pe), (ins IntRegs:$Rs, IntRegs:$Rt), @@ -210,7 +269,6 @@ class T_fcmp32 <string mnemonic, PatFrag OpNode, bits<3> MinOp> let Inst{27-21} = 0b0111111; } -let isCodeGenOnly = 0 in { def F2_dfcmpeq : T_fcmp64<"dfcmp.eq", setoeq, 0b000>; def F2_dfcmpgt : T_fcmp64<"dfcmp.gt", setogt, 0b001>; def F2_dfcmpge : T_fcmp64<"dfcmp.ge", setoge, 0b010>; @@ -220,6 +278,250 @@ def F2_sfcmpge : T_fcmp32<"sfcmp.ge", setoge, 0b000>; def F2_sfcmpuo : T_fcmp32<"sfcmp.uo", setuo, 0b001>; def F2_sfcmpeq : T_fcmp32<"sfcmp.eq", setoeq, 0b011>; def F2_sfcmpgt : T_fcmp32<"sfcmp.gt", setogt, 0b100>; + +//===----------------------------------------------------------------------===// +// Multiclass to define 'Def Pats' for ordered gt, ge, eq operations. +//===----------------------------------------------------------------------===// + +let Predicates = [HasV5T] in +multiclass T_fcmp_pats<PatFrag cmpOp, InstHexagon IntMI, InstHexagon DoubleMI> { + // IntRegs + def: Pat<(i1 (cmpOp F32:$src1, F32:$src2)), + (IntMI F32:$src1, F32:$src2)>; + // DoubleRegs + def: Pat<(i1 (cmpOp F64:$src1, F64:$src2)), + (DoubleMI F64:$src1, F64:$src2)>; +} + +defm : T_fcmp_pats <seteq, F2_sfcmpeq, F2_dfcmpeq>; +defm : T_fcmp_pats <setgt, F2_sfcmpgt, F2_dfcmpgt>; +defm : T_fcmp_pats <setge, F2_sfcmpge, F2_dfcmpge>; + +//===----------------------------------------------------------------------===// +// Multiclass to define 'Def Pats' for unordered gt, ge, eq operations. +//===----------------------------------------------------------------------===// +let Predicates = [HasV5T] in +multiclass unord_Pats <PatFrag cmpOp, InstHexagon IntMI, InstHexagon DoubleMI> { + // IntRegs + def: Pat<(i1 (cmpOp F32:$src1, F32:$src2)), + (C2_or (F2_sfcmpuo F32:$src1, F32:$src2), + (IntMI F32:$src1, F32:$src2))>; + + // DoubleRegs + def: Pat<(i1 (cmpOp F64:$src1, F64:$src2)), + (C2_or (F2_dfcmpuo F64:$src1, F64:$src2), + (DoubleMI F64:$src1, F64:$src2))>; +} + +defm : unord_Pats <setuge, F2_sfcmpge, F2_dfcmpge>; +defm : unord_Pats <setugt, F2_sfcmpgt, F2_dfcmpgt>; +defm : unord_Pats <setueq, F2_sfcmpeq, F2_dfcmpeq>; + +//===----------------------------------------------------------------------===// +// Multiclass to define 'Def Pats' for the following dags: +// seteq(setoeq(op1, op2), 0) -> not(setoeq(op1, op2)) +// seteq(setoeq(op1, op2), 1) -> setoeq(op1, op2) +// setne(setoeq(op1, op2), 0) -> setoeq(op1, op2) +// setne(setoeq(op1, op2), 1) -> not(setoeq(op1, op2)) +//===----------------------------------------------------------------------===// +let Predicates = [HasV5T] in +multiclass eq_ordgePats <PatFrag cmpOp, InstHexagon IntMI, + InstHexagon DoubleMI> { + // IntRegs + def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 0)), + (C2_not (IntMI F32:$src1, F32:$src2))>; + def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 1)), + (IntMI F32:$src1, F32:$src2)>; + def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 0)), + (IntMI F32:$src1, F32:$src2)>; + def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 1)), + (C2_not (IntMI F32:$src1, F32:$src2))>; + + // DoubleRegs + def : Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 0)), + (C2_not (DoubleMI F64:$src1, F64:$src2))>; + def : Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 1)), + (DoubleMI F64:$src1, F64:$src2)>; + def : Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)), + (DoubleMI F64:$src1, F64:$src2)>; + def : Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 1)), + (C2_not (DoubleMI F64:$src1, F64:$src2))>; +} + +defm : eq_ordgePats<setoeq, F2_sfcmpeq, F2_dfcmpeq>; +defm : eq_ordgePats<setoge, F2_sfcmpge, F2_dfcmpge>; +defm : eq_ordgePats<setogt, F2_sfcmpgt, F2_dfcmpgt>; + +//===----------------------------------------------------------------------===// +// Multiclass to define 'Def Pats' for the following dags: +// seteq(setolt(op1, op2), 0) -> not(setogt(op2, op1)) +// seteq(setolt(op1, op2), 1) -> setogt(op2, op1) +// setne(setolt(op1, op2), 0) -> setogt(op2, op1) +// setne(setolt(op1, op2), 1) -> not(setogt(op2, op1)) +//===----------------------------------------------------------------------===// +let Predicates = [HasV5T] in +multiclass eq_ordltPats <PatFrag cmpOp, InstHexagon IntMI, + InstHexagon DoubleMI> { + // IntRegs + def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 0)), + (C2_not (IntMI F32:$src2, F32:$src1))>; + def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 1)), + (IntMI F32:$src2, F32:$src1)>; + def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 0)), + (IntMI F32:$src2, F32:$src1)>; + def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 1)), + (C2_not (IntMI F32:$src2, F32:$src1))>; + + // DoubleRegs + def: Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 0)), + (C2_not (DoubleMI F64:$src2, F64:$src1))>; + def: Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 1)), + (DoubleMI F64:$src2, F64:$src1)>; + def: Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)), + (DoubleMI F64:$src2, F64:$src1)>; + def: Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)), + (C2_not (DoubleMI F64:$src2, F64:$src1))>; +} + +defm : eq_ordltPats<setole, F2_sfcmpge, F2_dfcmpge>; +defm : eq_ordltPats<setolt, F2_sfcmpgt, F2_dfcmpgt>; + + +// o. seto inverse of setuo. http://llvm.org/docs/LangRef.html#i_fcmp +let Predicates = [HasV5T] in { + def: Pat<(i1 (seto F32:$src1, F32:$src2)), + (C2_not (F2_sfcmpuo F32:$src2, F32:$src1))>; + def: Pat<(i1 (seto F32:$src1, fpimm:$src2)), + (C2_not (F2_sfcmpuo (TFRI_f fpimm:$src2), F32:$src1))>; + def: Pat<(i1 (seto F64:$src1, F64:$src2)), + (C2_not (F2_dfcmpuo F64:$src2, F64:$src1))>; + def: Pat<(i1 (seto F64:$src1, fpimm:$src2)), + (C2_not (F2_dfcmpuo (CONST64_Float_Real fpimm:$src2), F64:$src1))>; +} + +// Ordered lt. +let Predicates = [HasV5T] in { + def: Pat<(i1 (setolt F32:$src1, F32:$src2)), + (F2_sfcmpgt F32:$src2, F32:$src1)>; + def: Pat<(i1 (setolt F32:$src1, fpimm:$src2)), + (F2_sfcmpgt (f32 (TFRI_f fpimm:$src2)), F32:$src1)>; + def: Pat<(i1 (setolt F64:$src1, F64:$src2)), + (F2_dfcmpgt F64:$src2, F64:$src1)>; + def: Pat<(i1 (setolt F64:$src1, fpimm:$src2)), + (F2_dfcmpgt (CONST64_Float_Real fpimm:$src2), F64:$src1)>; +} + +// Unordered lt. +let Predicates = [HasV5T] in { + def: Pat<(i1 (setult F32:$src1, F32:$src2)), + (C2_or (F2_sfcmpuo F32:$src1, F32:$src2), + (F2_sfcmpgt F32:$src2, F32:$src1))>; + def: Pat<(i1 (setult F32:$src1, fpimm:$src2)), + (C2_or (F2_sfcmpuo F32:$src1, (TFRI_f fpimm:$src2)), + (F2_sfcmpgt (TFRI_f fpimm:$src2), F32:$src1))>; + def: Pat<(i1 (setult F64:$src1, F64:$src2)), + (C2_or (F2_dfcmpuo F64:$src1, F64:$src2), + (F2_dfcmpgt F64:$src2, F64:$src1))>; + def: Pat<(i1 (setult F64:$src1, fpimm:$src2)), + (C2_or (F2_dfcmpuo F64:$src1, (CONST64_Float_Real fpimm:$src2)), + (F2_dfcmpgt (CONST64_Float_Real fpimm:$src2), F64:$src1))>; +} + +// Ordered le. +let Predicates = [HasV5T] in { + // rs <= rt -> rt >= rs. + def: Pat<(i1 (setole F32:$src1, F32:$src2)), + (F2_sfcmpge F32:$src2, F32:$src1)>; + def: Pat<(i1 (setole F32:$src1, fpimm:$src2)), + (F2_sfcmpge (TFRI_f fpimm:$src2), F32:$src1)>; + + // Rss <= Rtt -> Rtt >= Rss. + def: Pat<(i1 (setole F64:$src1, F64:$src2)), + (F2_dfcmpge F64:$src2, F64:$src1)>; + def: Pat<(i1 (setole F64:$src1, fpimm:$src2)), + (F2_dfcmpge (CONST64_Float_Real fpimm:$src2), F64:$src1)>; +} + +// Unordered le. +let Predicates = [HasV5T] in { +// rs <= rt -> rt >= rs. + def: Pat<(i1 (setule F32:$src1, F32:$src2)), + (C2_or (F2_sfcmpuo F32:$src1, F32:$src2), + (F2_sfcmpge F32:$src2, F32:$src1))>; + def: Pat<(i1 (setule F32:$src1, fpimm:$src2)), + (C2_or (F2_sfcmpuo F32:$src1, (TFRI_f fpimm:$src2)), + (F2_sfcmpge (TFRI_f fpimm:$src2), F32:$src1))>; + def: Pat<(i1 (setule F64:$src1, F64:$src2)), + (C2_or (F2_dfcmpuo F64:$src1, F64:$src2), + (F2_dfcmpge F64:$src2, F64:$src1))>; + def: Pat<(i1 (setule F64:$src1, fpimm:$src2)), + (C2_or (F2_dfcmpuo F64:$src1, (CONST64_Float_Real fpimm:$src2)), + (F2_dfcmpge (CONST64_Float_Real fpimm:$src2), F64:$src1))>; +} + +// Ordered ne. +let Predicates = [HasV5T] in { + def: Pat<(i1 (setone F32:$src1, F32:$src2)), + (C2_not (F2_sfcmpeq F32:$src1, F32:$src2))>; + def: Pat<(i1 (setone F64:$src1, F64:$src2)), + (C2_not (F2_dfcmpeq F64:$src1, F64:$src2))>; + def: Pat<(i1 (setone F32:$src1, fpimm:$src2)), + (C2_not (F2_sfcmpeq F32:$src1, (TFRI_f fpimm:$src2)))>; + def: Pat<(i1 (setone F64:$src1, fpimm:$src2)), + (C2_not (F2_dfcmpeq F64:$src1, (CONST64_Float_Real fpimm:$src2)))>; +} + +// Unordered ne. +let Predicates = [HasV5T] in { + def: Pat<(i1 (setune F32:$src1, F32:$src2)), + (C2_or (F2_sfcmpuo F32:$src1, F32:$src2), + (C2_not (F2_sfcmpeq F32:$src1, F32:$src2)))>; + def: Pat<(i1 (setune F64:$src1, F64:$src2)), + (C2_or (F2_dfcmpuo F64:$src1, F64:$src2), + (C2_not (F2_dfcmpeq F64:$src1, F64:$src2)))>; + def: Pat<(i1 (setune F32:$src1, fpimm:$src2)), + (C2_or (F2_sfcmpuo F32:$src1, (TFRI_f fpimm:$src2)), + (C2_not (F2_sfcmpeq F32:$src1, (TFRI_f fpimm:$src2))))>; + def: Pat<(i1 (setune F64:$src1, fpimm:$src2)), + (C2_or (F2_dfcmpuo F64:$src1, (CONST64_Float_Real fpimm:$src2)), + (C2_not (F2_dfcmpeq F64:$src1, + (CONST64_Float_Real fpimm:$src2))))>; +} + +// Besides set[o|u][comparions], we also need set[comparisons]. +let Predicates = [HasV5T] in { + // lt. + def: Pat<(i1 (setlt F32:$src1, F32:$src2)), + (F2_sfcmpgt F32:$src2, F32:$src1)>; + def: Pat<(i1 (setlt F32:$src1, fpimm:$src2)), + (F2_sfcmpgt (TFRI_f fpimm:$src2), F32:$src1)>; + def: Pat<(i1 (setlt F64:$src1, F64:$src2)), + (F2_dfcmpgt F64:$src2, F64:$src1)>; + def: Pat<(i1 (setlt F64:$src1, fpimm:$src2)), + (F2_dfcmpgt (CONST64_Float_Real fpimm:$src2), F64:$src1)>; + + // le. + // rs <= rt -> rt >= rs. + def: Pat<(i1 (setle F32:$src1, F32:$src2)), + (F2_sfcmpge F32:$src2, F32:$src1)>; + def: Pat<(i1 (setle F32:$src1, fpimm:$src2)), + (F2_sfcmpge (TFRI_f fpimm:$src2), F32:$src1)>; + + // Rss <= Rtt -> Rtt >= Rss. + def: Pat<(i1 (setle F64:$src1, F64:$src2)), + (F2_dfcmpge F64:$src2, F64:$src1)>; + def: Pat<(i1 (setle F64:$src1, fpimm:$src2)), + (F2_dfcmpge (CONST64_Float_Real fpimm:$src2), F64:$src1)>; + + // ne. + def: Pat<(i1 (setne F32:$src1, F32:$src2)), + (C2_not (F2_sfcmpeq F32:$src1, F32:$src2))>; + def: Pat<(i1 (setne F64:$src1, F64:$src2)), + (C2_not (F2_dfcmpeq F64:$src1, F64:$src2))>; + def: Pat<(i1 (setne F32:$src1, fpimm:$src2)), + (C2_not (F2_sfcmpeq F32:$src1, (TFRI_f fpimm:$src2)))>; + def: Pat<(i1 (setne F64:$src1, fpimm:$src2)), + (C2_not (F2_dfcmpeq F64:$src1, (CONST64_Float_Real fpimm:$src2)))>; } // F2 convert template classes: @@ -302,7 +604,6 @@ class F2_RD_RS_CONVERT<string mnemonic, bits<3> MajOp, bits<3> MinOp, } // Convert single precision to double precision and vice-versa. -let isCodeGenOnly = 0 in { def F2_conv_sf2df : F2_RDD_RS_CONVERT <"convert_sf2df", 0b000, fextend, F64, F32>; @@ -364,10 +665,9 @@ let AddedComplexity = 20, Predicates = [HasV5T, IEEERndNearV5T] in { def F2_conv_sf2w : F2_RD_RS_CONVERT <"convert_sf2w", 0b100, 0b000, fp_to_sint, I32, F32>; } -} // Fix up radicand. -let isFP = 1, hasNewValue = 1, isCodeGenOnly = 0 in +let isFP = 1, hasNewValue = 1 in def F2_sffixupr: SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs), "$Rd = sffixupr($Rs)", [], "" , S_2op_tc_3or4x_SLOT23>, Requires<[HasV5T]> { @@ -382,6 +682,14 @@ def F2_sffixupr: SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs), let Inst{4-0} = Rd; } +// Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp]. +let Predicates = [HasV5T] in { + def: Pat <(i32 (bitconvert F32:$src)), (I32:$src)>; + def: Pat <(f32 (bitconvert I32:$src)), (F32:$src)>; + def: Pat <(i64 (bitconvert F64:$src)), (I64:$src)>; + def: Pat <(f64 (bitconvert I64:$src)), (F64:$src)>; +} + // F2_sffma: Floating-point fused multiply add. let isFP = 1, hasNewValue = 1 in class T_sfmpy_acc <bit isSub, bit isLib> @@ -406,15 +714,16 @@ class T_sfmpy_acc <bit isSub, bit isLib> let Inst{4-0} = Rx; } -let isCodeGenOnly = 0 in { def F2_sffma: T_sfmpy_acc <0, 0>; def F2_sffms: T_sfmpy_acc <1, 0>; def F2_sffma_lib: T_sfmpy_acc <0, 1>; def F2_sffms_lib: T_sfmpy_acc <1, 1>; -} + +def : Pat <(f32 (fma F32:$src2, F32:$src3, F32:$src1)), + (F2_sffma F32:$src1, F32:$src2, F32:$src3)>; // Floating-point fused multiply add w/ additional scaling (2**pu). -let isFP = 1, hasNewValue = 1, isCodeGenOnly = 0 in +let isFP = 1, hasNewValue = 1 in def F2_sffma_sc: MInst < (outs IntRegs:$Rx), (ins IntRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt, PredRegs:$Pu), @@ -437,11 +746,147 @@ def F2_sffma_sc: MInst < let Inst{4-0} = Rx; } +let isExtended = 1, isExtentSigned = 1, opExtentBits = 8, opExtendable = 3, + isPseudo = 1, InputType = "imm" in +def MUX_ir_f : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, f32Ext:$src3), + "$dst = mux($src1, $src2, #$src3)", + [(set F32:$dst, (f32 (select I1:$src1, F32:$src2, fpimm:$src3)))]>, + Requires<[HasV5T]>; + +let isExtended = 1, isExtentSigned = 1, opExtentBits = 8, opExtendable = 2, + isPseudo = 1, InputType = "imm" in +def MUX_ri_f : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, f32Ext:$src2, IntRegs:$src3), + "$dst = mux($src1, #$src2, $src3)", + [(set F32:$dst, (f32 (select I1:$src1, fpimm:$src2, F32:$src3)))]>, + Requires<[HasV5T]>; + +def: Pat<(select I1:$src1, F32:$src2, F32:$src3), + (C2_mux I1:$src1, F32:$src2, F32:$src3)>, + Requires<[HasV5T]>; + +def: Pat<(select (i1 (setult F32:$src1, F32:$src2)), F32:$src3, F32:$src4), + (C2_mux (F2_sfcmpgt F32:$src2, F32:$src1), F32:$src4, F32:$src3)>, + Requires<[HasV5T]>; + +def: Pat<(select I1:$src1, F64:$src2, F64:$src3), + (C2_vmux I1:$src1, F64:$src2, F64:$src3)>, + Requires<[HasV5T]>; + +def: Pat<(select (i1 (setult F64:$src1, F64:$src2)), F64:$src3, F64:$src4), + (C2_vmux (F2_dfcmpgt F64:$src2, F64:$src1), F64:$src3, F64:$src4)>, + Requires<[HasV5T]>; + +// Map from p0 = pnot(p0); r0 = select(p0, #i, r1) +// => r0 = MUX_ir_f(p0, #i, r1) +def: Pat<(select (not I1:$src1), fpimm:$src2, F32:$src3), + (MUX_ir_f I1:$src1, F32:$src3, fpimm:$src2)>, + Requires<[HasV5T]>; + +// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i) +// => r0 = MUX_ri_f(p0, r1, #i) +def: Pat<(select (not I1:$src1), F32:$src2, fpimm:$src3), + (MUX_ri_f I1:$src1, fpimm:$src3, F32:$src2)>, + Requires<[HasV5T]>; + +def: Pat<(i32 (fp_to_sint F64:$src1)), + (LoReg (F2_conv_df2d_chop F64:$src1))>, + Requires<[HasV5T]>; + +//===----------------------------------------------------------------------===// +// :natural forms of vasrh and vasrhub insns +//===----------------------------------------------------------------------===// +// S5_asrhub_rnd_sat: Vector arithmetic shift right by immediate with round, +// saturate, and pack. +let Defs = [USR_OVF], hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +class T_ASRHUB<bit isSat> + : SInst <(outs IntRegs:$Rd), + (ins DoubleRegs:$Rss, u4Imm:$u4), + "$Rd = vasrhub($Rss, #$u4):"#!if(isSat, "sat", "raw"), + [], "", S_2op_tc_2_SLOT23>, + Requires<[HasV5T]> { + bits<5> Rd; + bits<5> Rss; + bits<4> u4; + + let IClass = 0b1000; + + let Inst{27-21} = 0b1000011; + let Inst{20-16} = Rss; + let Inst{13-12} = 0b00; + let Inst{11-8} = u4; + let Inst{7-6} = 0b10; + let Inst{5} = isSat; + let Inst{4-0} = Rd; + } + +def S5_asrhub_rnd_sat : T_ASRHUB <0>; +def S5_asrhub_sat : T_ASRHUB <1>; + +let isAsmParserOnly = 1 in +def S5_asrhub_rnd_sat_goodsyntax + : SInst <(outs IntRegs:$Rd), (ins DoubleRegs:$Rss, u4Imm:$u4), + "$Rd = vasrhub($Rss, #$u4):rnd:sat">, Requires<[HasV5T]>; + +// S5_vasrhrnd: Vector arithmetic shift right by immediate with round. +let hasSideEffects = 0 in +def S5_vasrhrnd : SInst <(outs DoubleRegs:$Rdd), + (ins DoubleRegs:$Rss, u4Imm:$u4), + "$Rdd = vasrh($Rss, #$u4):raw">, + Requires<[HasV5T]> { + bits<5> Rdd; + bits<5> Rss; + bits<4> u4; + + let IClass = 0b1000; + + let Inst{27-21} = 0b0000001; + let Inst{20-16} = Rss; + let Inst{13-12} = 0b00; + let Inst{11-8} = u4; + let Inst{7-5} = 0b000; + let Inst{4-0} = Rdd; + } + +let isAsmParserOnly = 1 in +def S5_vasrhrnd_goodsyntax + : SInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss, u4Imm:$u4), + "$Rdd = vasrh($Rss,#$u4):rnd">, Requires<[HasV5T]>; + +// Floating point reciprocal square root approximation +let Uses = [USR], isPredicateLate = 1, isFP = 1, + hasSideEffects = 0, hasNewValue = 1, opNewValue = 0, + validSubTargets = HasV5SubT in +def F2_sfinvsqrta: SInst < + (outs IntRegs:$Rd, PredRegs:$Pe), + (ins IntRegs:$Rs), + "$Rd, $Pe = sfinvsqrta($Rs)" > , + Requires<[HasV5T]> { + bits<5> Rd; + bits<2> Pe; + bits<5> Rs; + + let IClass = 0b1000; + + let Inst{27-21} = 0b1011111; + let Inst{20-16} = Rs; + let Inst{7} = 0b0; + let Inst{6-5} = Pe; + let Inst{4-0} = Rd; + } + +// Complex multiply 32x16 +let Defs = [USR_OVF], Itinerary = S_3op_tc_3x_SLOT23 in { + def M4_cmpyi_whc : T_S3op_8<"cmpyiwh", 0b101, 1, 1, 1, 1>; + def M4_cmpyr_whc : T_S3op_8<"cmpyrwh", 0b111, 1, 1, 1, 1>; +} + // Classify floating-point value -let isFP = 1, isCodeGenOnly = 0 in +let isFP = 1 in def F2_sfclass : T_TEST_BIT_IMM<"sfclass", 0b111>; -let isFP = 1, isCodeGenOnly = 0 in +let isFP = 1 in def F2_dfclass: ALU64Inst<(outs PredRegs:$Pd), (ins DoubleRegs:$Rss, u5Imm:$u5), "$Pd = dfclass($Rss, #$u5)", [], "" , ALU64_tc_2early_SLOT23 > , Requires<[HasV5T]> { @@ -459,7 +904,6 @@ def F2_dfclass: ALU64Inst<(outs PredRegs:$Pd), (ins DoubleRegs:$Rss, u5Imm:$u5), } // Instructions to create floating point constant -let hasNewValue = 1, opNewValue = 0 in class T_fimm <string mnemonic, RegisterClass RC, bits<4> RegType, bit isNeg> : ALU64Inst<(outs RC:$dst), (ins u10Imm:$src), "$dst = "#mnemonic#"(#$src)"#!if(isNeg, ":neg", ":pos"), @@ -476,546 +920,13 @@ class T_fimm <string mnemonic, RegisterClass RC, bits<4> RegType, bit isNeg> let Inst{4-0} = dst; } -let isCodeGenOnly = 0 in { +let hasNewValue = 1, opNewValue = 0 in { def F2_sfimm_p : T_fimm <"sfmake", IntRegs, 0b0110, 0>; def F2_sfimm_n : T_fimm <"sfmake", IntRegs, 0b0110, 1>; -def F2_dfimm_p : T_fimm <"dfmake", DoubleRegs, 0b1001, 0>; -def F2_dfimm_n : T_fimm <"dfmake", DoubleRegs, 0b1001, 1>; } -// Convert single precision to double precision and vice-versa. -def CONVERT_sf2df : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src), - "$dst = convert_sf2df($src)", - [(set DoubleRegs:$dst, (fextend IntRegs:$src))]>, - Requires<[HasV5T]>; - -def CONVERT_df2sf : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src), - "$dst = convert_df2sf($src)", - [(set IntRegs:$dst, (fround DoubleRegs:$src))]>, - Requires<[HasV5T]>; - - -// Load. -def LDrid_f : LDInst<(outs DoubleRegs:$dst), - (ins MEMri:$addr), - "$dst = memd($addr)", - [(set DoubleRegs:$dst, (f64 (load ADDRriS11_3:$addr)))]>, - Requires<[HasV5T]>; - - -let AddedComplexity = 20 in -def LDrid_indexed_f : LDInst<(outs DoubleRegs:$dst), - (ins IntRegs:$src1, s11_3Imm:$offset), - "$dst = memd($src1+#$offset)", - [(set DoubleRegs:$dst, (f64 (load (add IntRegs:$src1, - s11_3ImmPred:$offset))))]>, - Requires<[HasV5T]>; - -def LDriw_f : LDInst<(outs IntRegs:$dst), - (ins MEMri:$addr), "$dst = memw($addr)", - [(set IntRegs:$dst, (f32 (load ADDRriS11_2:$addr)))]>, - Requires<[HasV5T]>; - - -let AddedComplexity = 20 in -def LDriw_indexed_f : LDInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, s11_2Imm:$offset), - "$dst = memw($src1+#$offset)", - [(set IntRegs:$dst, (f32 (load (add IntRegs:$src1, - s11_2ImmPred:$offset))))]>, - Requires<[HasV5T]>; - -// Store. -def STriw_f : STInst<(outs), - (ins MEMri:$addr, IntRegs:$src1), - "memw($addr) = $src1", - [(store (f32 IntRegs:$src1), ADDRriS11_2:$addr)]>, - Requires<[HasV5T]>; - -let AddedComplexity = 10 in -def STriw_indexed_f : STInst<(outs), - (ins IntRegs:$src1, s11_2Imm:$src2, IntRegs:$src3), - "memw($src1+#$src2) = $src3", - [(store (f32 IntRegs:$src3), - (add IntRegs:$src1, s11_2ImmPred:$src2))]>, - Requires<[HasV5T]>; - -def STrid_f : STInst<(outs), - (ins MEMri:$addr, DoubleRegs:$src1), - "memd($addr) = $src1", - [(store (f64 DoubleRegs:$src1), ADDRriS11_2:$addr)]>, - Requires<[HasV5T]>; - -// Indexed store double word. -let AddedComplexity = 10 in -def STrid_indexed_f : STInst<(outs), - (ins IntRegs:$src1, s11_3Imm:$src2, DoubleRegs:$src3), - "memd($src1+#$src2) = $src3", - [(store (f64 DoubleRegs:$src3), - (add IntRegs:$src1, s11_3ImmPred:$src2))]>, - Requires<[HasV5T]>; - - -// Add -let isCommutable = 1 in -def fADD_rr : ALU64_rr<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = sfadd($src1, $src2)", - [(set IntRegs:$dst, (fadd IntRegs:$src1, IntRegs:$src2))]>, - Requires<[HasV5T]>; - -let isCommutable = 1 in -def fADD64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, - DoubleRegs:$src2), - "$dst = dfadd($src1, $src2)", - [(set DoubleRegs:$dst, (fadd DoubleRegs:$src1, - DoubleRegs:$src2))]>, - Requires<[HasV5T]>; - -def fSUB_rr : ALU64_rr<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = sfsub($src1, $src2)", - [(set IntRegs:$dst, (fsub IntRegs:$src1, IntRegs:$src2))]>, - Requires<[HasV5T]>; - -def fSUB64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, - DoubleRegs:$src2), - "$dst = dfsub($src1, $src2)", - [(set DoubleRegs:$dst, (fsub DoubleRegs:$src1, - DoubleRegs:$src2))]>, - Requires<[HasV5T]>; - -let isCommutable = 1 in -def fMUL_rr : ALU64_rr<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = sfmpy($src1, $src2)", - [(set IntRegs:$dst, (fmul IntRegs:$src1, IntRegs:$src2))]>, - Requires<[HasV5T]>; - -let isCommutable = 1 in -def fMUL64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, - DoubleRegs:$src2), - "$dst = dfmpy($src1, $src2)", - [(set DoubleRegs:$dst, (fmul DoubleRegs:$src1, - DoubleRegs:$src2))]>, - Requires<[HasV5T]>; - -// Compare. -let isCompare = 1 in { -multiclass FCMP64_rr<string OpcStr, PatFrag OpNode> { - def _rr : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$b, DoubleRegs:$c), - !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")), - [(set PredRegs:$dst, - (OpNode (f64 DoubleRegs:$b), (f64 DoubleRegs:$c)))]>, - Requires<[HasV5T]>; -} - -multiclass FCMP32_rr<string OpcStr, PatFrag OpNode> { - def _rr : ALU64_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c), - !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")), - [(set PredRegs:$dst, - (OpNode (f32 IntRegs:$b), (f32 IntRegs:$c)))]>, - Requires<[HasV5T]>; -} -} - -defm FCMPOEQ64 : FCMP64_rr<"dfcmp.eq", setoeq>; -defm FCMPUEQ64 : FCMP64_rr<"dfcmp.eq", setueq>; -defm FCMPOGT64 : FCMP64_rr<"dfcmp.gt", setogt>; -defm FCMPUGT64 : FCMP64_rr<"dfcmp.gt", setugt>; -defm FCMPOGE64 : FCMP64_rr<"dfcmp.ge", setoge>; -defm FCMPUGE64 : FCMP64_rr<"dfcmp.ge", setuge>; - -defm FCMPOEQ32 : FCMP32_rr<"sfcmp.eq", setoeq>; -defm FCMPUEQ32 : FCMP32_rr<"sfcmp.eq", setueq>; -defm FCMPOGT32 : FCMP32_rr<"sfcmp.gt", setogt>; -defm FCMPUGT32 : FCMP32_rr<"sfcmp.gt", setugt>; -defm FCMPOGE32 : FCMP32_rr<"sfcmp.ge", setoge>; -defm FCMPUGE32 : FCMP32_rr<"sfcmp.ge", setuge>; - -// olt. -def : Pat <(i1 (setolt (f32 IntRegs:$src1), (f32 IntRegs:$src2))), - (i1 (FCMPOGT32_rr IntRegs:$src2, IntRegs:$src1))>, - Requires<[HasV5T]>; - -def : Pat <(i1 (setolt (f32 IntRegs:$src1), (fpimm:$src2))), - (i1 (FCMPOGT32_rr (f32 (TFRI_f fpimm:$src2)), (f32 IntRegs:$src1)))>, - Requires<[HasV5T]>; - -def : Pat <(i1 (setolt (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))), - (i1 (FCMPOGT64_rr DoubleRegs:$src2, DoubleRegs:$src1))>, - Requires<[HasV5T]>; - -def : Pat <(i1 (setolt (f64 DoubleRegs:$src1), (fpimm:$src2))), - (i1 (FCMPOGT64_rr (f64 (CONST64_Float_Real fpimm:$src2)), - (f64 DoubleRegs:$src1)))>, - Requires<[HasV5T]>; - -// gt. -def : Pat <(i1 (setugt (f64 DoubleRegs:$src1), (fpimm:$src2))), - (i1 (FCMPUGT64_rr (f64 DoubleRegs:$src1), - (f64 (CONST64_Float_Real fpimm:$src2))))>, - Requires<[HasV5T]>; - -def : Pat <(i1 (setugt (f32 IntRegs:$src1), (fpimm:$src2))), - (i1 (FCMPUGT32_rr (f32 IntRegs:$src1), (f32 (TFRI_f fpimm:$src2))))>, - Requires<[HasV5T]>; - -// ult. -def : Pat <(i1 (setult (f32 IntRegs:$src1), (f32 IntRegs:$src2))), - (i1 (FCMPUGT32_rr IntRegs:$src2, IntRegs:$src1))>, - Requires<[HasV5T]>; - -def : Pat <(i1 (setult (f32 IntRegs:$src1), (fpimm:$src2))), - (i1 (FCMPUGT32_rr (f32 (TFRI_f fpimm:$src2)), (f32 IntRegs:$src1)))>, - Requires<[HasV5T]>; - -def : Pat <(i1 (setult (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))), - (i1 (FCMPUGT64_rr DoubleRegs:$src2, DoubleRegs:$src1))>, - Requires<[HasV5T]>; - -def : Pat <(i1 (setult (f64 DoubleRegs:$src1), (fpimm:$src2))), - (i1 (FCMPUGT64_rr (f64 (CONST64_Float_Real fpimm:$src2)), - (f64 DoubleRegs:$src1)))>, - Requires<[HasV5T]>; - -// le. -// rs <= rt -> rt >= rs. -def : Pat<(i1 (setole (f32 IntRegs:$src1), (f32 IntRegs:$src2))), - (i1 (FCMPOGE32_rr IntRegs:$src2, IntRegs:$src1))>, - Requires<[HasV5T]>; - -def : Pat<(i1 (setole (f32 IntRegs:$src1), (fpimm:$src2))), - (i1 (FCMPOGE32_rr (f32 (TFRI_f fpimm:$src2)), IntRegs:$src1))>, - Requires<[HasV5T]>; - - -// Rss <= Rtt -> Rtt >= Rss. -def : Pat<(i1 (setole (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))), - (i1 (FCMPOGE64_rr DoubleRegs:$src2, DoubleRegs:$src1))>, - Requires<[HasV5T]>; - -def : Pat<(i1 (setole (f64 DoubleRegs:$src1), (fpimm:$src2))), - (i1 (FCMPOGE64_rr (f64 (CONST64_Float_Real fpimm:$src2)), - DoubleRegs:$src1))>, - Requires<[HasV5T]>; - -// rs <= rt -> rt >= rs. -def : Pat<(i1 (setule (f32 IntRegs:$src1), (f32 IntRegs:$src2))), - (i1 (FCMPUGE32_rr IntRegs:$src2, IntRegs:$src1))>, - Requires<[HasV5T]>; - -def : Pat<(i1 (setule (f32 IntRegs:$src1), (fpimm:$src2))), - (i1 (FCMPUGE32_rr (f32 (TFRI_f fpimm:$src2)), IntRegs:$src1))>, - Requires<[HasV5T]>; - -// Rss <= Rtt -> Rtt >= Rss. -def : Pat<(i1 (setule (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))), - (i1 (FCMPUGE64_rr DoubleRegs:$src2, DoubleRegs:$src1))>, - Requires<[HasV5T]>; - -def : Pat<(i1 (setule (f64 DoubleRegs:$src1), (fpimm:$src2))), - (i1 (FCMPUGE64_rr (f64 (CONST64_Float_Real fpimm:$src2)), - DoubleRegs:$src1))>, - Requires<[HasV5T]>; - -// ne. -def : Pat<(i1 (setone (f32 IntRegs:$src1), (f32 IntRegs:$src2))), - (i1 (C2_not (FCMPOEQ32_rr IntRegs:$src1, IntRegs:$src2)))>, - Requires<[HasV5T]>; - -def : Pat<(i1 (setone (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))), - (i1 (C2_not (FCMPOEQ64_rr DoubleRegs:$src1, DoubleRegs:$src2)))>, - Requires<[HasV5T]>; - -def : Pat<(i1 (setune (f32 IntRegs:$src1), (f32 IntRegs:$src2))), - (i1 (C2_not (FCMPUEQ32_rr IntRegs:$src1, IntRegs:$src2)))>, - Requires<[HasV5T]>; - -def : Pat<(i1 (setune (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))), - (i1 (C2_not (FCMPUEQ64_rr DoubleRegs:$src1, DoubleRegs:$src2)))>, - Requires<[HasV5T]>; - -def : Pat<(i1 (setone (f32 IntRegs:$src1), (fpimm:$src2))), - (i1 (C2_not (FCMPOEQ32_rr IntRegs:$src1, (f32 (TFRI_f fpimm:$src2)))))>, - Requires<[HasV5T]>; - -def : Pat<(i1 (setone (f64 DoubleRegs:$src1), (fpimm:$src2))), - (i1 (C2_not (FCMPOEQ64_rr DoubleRegs:$src1, - (f64 (CONST64_Float_Real fpimm:$src2)))))>, - Requires<[HasV5T]>; - -def : Pat<(i1 (setune (f32 IntRegs:$src1), (fpimm:$src2))), - (i1 (C2_not (FCMPUEQ32_rr IntRegs:$src1, (f32 (TFRI_f fpimm:$src2)))))>, - Requires<[HasV5T]>; - -def : Pat<(i1 (setune (f64 DoubleRegs:$src1), (fpimm:$src2))), - (i1 (C2_not (FCMPUEQ64_rr DoubleRegs:$src1, - (f64 (CONST64_Float_Real fpimm:$src2)))))>, - Requires<[HasV5T]>; - -// Convert Integer to Floating Point. -def CONVERT_d2sf : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src), - "$dst = convert_d2sf($src)", - [(set (f32 IntRegs:$dst), (sint_to_fp (i64 DoubleRegs:$src)))]>, - Requires<[HasV5T]>; - -def CONVERT_ud2sf : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src), - "$dst = convert_ud2sf($src)", - [(set (f32 IntRegs:$dst), (uint_to_fp (i64 DoubleRegs:$src)))]>, - Requires<[HasV5T]>; - -def CONVERT_uw2sf : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src), - "$dst = convert_uw2sf($src)", - [(set (f32 IntRegs:$dst), (uint_to_fp (i32 IntRegs:$src)))]>, - Requires<[HasV5T]>; - -def CONVERT_w2sf : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src), - "$dst = convert_w2sf($src)", - [(set (f32 IntRegs:$dst), (sint_to_fp (i32 IntRegs:$src)))]>, - Requires<[HasV5T]>; - -def CONVERT_d2df : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src), - "$dst = convert_d2df($src)", - [(set (f64 DoubleRegs:$dst), (sint_to_fp (i64 DoubleRegs:$src)))]>, - Requires<[HasV5T]>; - -def CONVERT_ud2df : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src), - "$dst = convert_ud2df($src)", - [(set (f64 DoubleRegs:$dst), (uint_to_fp (i64 DoubleRegs:$src)))]>, - Requires<[HasV5T]>; - -def CONVERT_uw2df : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src), - "$dst = convert_uw2df($src)", - [(set (f64 DoubleRegs:$dst), (uint_to_fp (i32 IntRegs:$src)))]>, - Requires<[HasV5T]>; - -def CONVERT_w2df : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src), - "$dst = convert_w2df($src)", - [(set (f64 DoubleRegs:$dst), (sint_to_fp (i32 IntRegs:$src)))]>, - Requires<[HasV5T]>; - -// Convert Floating Point to Integer - default. -def CONVERT_df2uw : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src), - "$dst = convert_df2uw($src):chop", - [(set (i32 IntRegs:$dst), (fp_to_uint (f64 DoubleRegs:$src)))]>, - Requires<[HasV5T]>; - -def CONVERT_df2w : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src), - "$dst = convert_df2w($src):chop", - [(set (i32 IntRegs:$dst), (fp_to_sint (f64 DoubleRegs:$src)))]>, - Requires<[HasV5T]>; - -def CONVERT_sf2uw : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src), - "$dst = convert_sf2uw($src):chop", - [(set (i32 IntRegs:$dst), (fp_to_uint (f32 IntRegs:$src)))]>, - Requires<[HasV5T]>; - -def CONVERT_sf2w : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src), - "$dst = convert_sf2w($src):chop", - [(set (i32 IntRegs:$dst), (fp_to_sint (f32 IntRegs:$src)))]>, - Requires<[HasV5T]>; - -def CONVERT_df2d : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src), - "$dst = convert_df2d($src):chop", - [(set (i64 DoubleRegs:$dst), (fp_to_sint (f64 DoubleRegs:$src)))]>, - Requires<[HasV5T]>; - -def CONVERT_df2ud : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src), - "$dst = convert_df2ud($src):chop", - [(set (i64 DoubleRegs:$dst), (fp_to_uint (f64 DoubleRegs:$src)))]>, - Requires<[HasV5T]>; - -def CONVERT_sf2d : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src), - "$dst = convert_sf2d($src):chop", - [(set (i64 DoubleRegs:$dst), (fp_to_sint (f32 IntRegs:$src)))]>, - Requires<[HasV5T]>; - -def CONVERT_sf2ud : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src), - "$dst = convert_sf2ud($src):chop", - [(set (i64 DoubleRegs:$dst), (fp_to_uint (f32 IntRegs:$src)))]>, - Requires<[HasV5T]>; - -// Convert Floating Point to Integer: non-chopped. -let AddedComplexity = 20 in -def CONVERT_df2uw_nchop : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src), - "$dst = convert_df2uw($src)", - [(set (i32 IntRegs:$dst), (fp_to_uint (f64 DoubleRegs:$src)))]>, - Requires<[HasV5T, IEEERndNearV5T]>; - -let AddedComplexity = 20 in -def CONVERT_df2w_nchop : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src), - "$dst = convert_df2w($src)", - [(set (i32 IntRegs:$dst), (fp_to_sint (f64 DoubleRegs:$src)))]>, - Requires<[HasV5T, IEEERndNearV5T]>; - -let AddedComplexity = 20 in -def CONVERT_sf2uw_nchop : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src), - "$dst = convert_sf2uw($src)", - [(set (i32 IntRegs:$dst), (fp_to_uint (f32 IntRegs:$src)))]>, - Requires<[HasV5T, IEEERndNearV5T]>; - -let AddedComplexity = 20 in -def CONVERT_sf2w_nchop : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src), - "$dst = convert_sf2w($src)", - [(set (i32 IntRegs:$dst), (fp_to_sint (f32 IntRegs:$src)))]>, - Requires<[HasV5T, IEEERndNearV5T]>; - -let AddedComplexity = 20 in -def CONVERT_df2d_nchop : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src), - "$dst = convert_df2d($src)", - [(set (i64 DoubleRegs:$dst), (fp_to_sint (f64 DoubleRegs:$src)))]>, - Requires<[HasV5T, IEEERndNearV5T]>; - -let AddedComplexity = 20 in -def CONVERT_df2ud_nchop : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src), - "$dst = convert_df2ud($src)", - [(set (i64 DoubleRegs:$dst), (fp_to_uint (f64 DoubleRegs:$src)))]>, - Requires<[HasV5T, IEEERndNearV5T]>; - -let AddedComplexity = 20 in -def CONVERT_sf2d_nchop : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src), - "$dst = convert_sf2d($src)", - [(set (i64 DoubleRegs:$dst), (fp_to_sint (f32 IntRegs:$src)))]>, - Requires<[HasV5T, IEEERndNearV5T]>; - -let AddedComplexity = 20 in -def CONVERT_sf2ud_nchop : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src), - "$dst = convert_sf2ud($src)", - [(set (i64 DoubleRegs:$dst), (fp_to_uint (f32 IntRegs:$src)))]>, - Requires<[HasV5T, IEEERndNearV5T]>; - - - -// Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp]. -def : Pat <(i32 (bitconvert (f32 IntRegs:$src))), - (i32 (A2_tfr IntRegs:$src))>, - Requires<[HasV5T]>; - -def : Pat <(f32 (bitconvert (i32 IntRegs:$src))), - (f32 (A2_tfr IntRegs:$src))>, - Requires<[HasV5T]>; - -def : Pat <(i64 (bitconvert (f64 DoubleRegs:$src))), - (i64 (A2_tfrp DoubleRegs:$src))>, - Requires<[HasV5T]>; - -def : Pat <(f64 (bitconvert (i64 DoubleRegs:$src))), - (f64 (A2_tfrp DoubleRegs:$src))>, - Requires<[HasV5T]>; - -def FMADD_sp : ALU64_acc<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "$dst += sfmpy($src2, $src3)", - [(set (f32 IntRegs:$dst), - (fma IntRegs:$src2, IntRegs:$src3, IntRegs:$src1))], - "$src1 = $dst">, - Requires<[HasV5T]>; - - -// Floating point max/min. - -let AddedComplexity = 100 in -def FMAX_sp : ALU64_rr<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = sfmax($src1, $src2)", - [(set IntRegs:$dst, (f32 (select (i1 (setolt IntRegs:$src2, - IntRegs:$src1)), - IntRegs:$src1, - IntRegs:$src2)))]>, - Requires<[HasV5T]>; - -let AddedComplexity = 100 in -def FMIN_sp : ALU64_rr<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = sfmin($src1, $src2)", - [(set IntRegs:$dst, (f32 (select (i1 (setogt IntRegs:$src2, - IntRegs:$src1)), - IntRegs:$src1, - IntRegs:$src2)))]>, - Requires<[HasV5T]>; - -// Pseudo instruction to encode a set of conditional transfers. -// This instruction is used instead of a mux and trades-off codesize -// for performance. We conduct this transformation optimistically in -// the hope that these instructions get promoted to dot-new transfers. -let AddedComplexity = 100, isPredicated = 1 in -def TFR_condset_rr_f : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, - IntRegs:$src2, - IntRegs:$src3), - "Error; should not emit", - [(set IntRegs:$dst, (f32 (select PredRegs:$src1, - IntRegs:$src2, - IntRegs:$src3)))]>, - Requires<[HasV5T]>; - -let AddedComplexity = 100, isPredicated = 1 in -def TFR_condset_rr64_f : ALU32_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1, - DoubleRegs:$src2, - DoubleRegs:$src3), - "Error; should not emit", - [(set DoubleRegs:$dst, (f64 (select PredRegs:$src1, - DoubleRegs:$src2, - DoubleRegs:$src3)))]>, - Requires<[HasV5T]>; - - - -let AddedComplexity = 100, isPredicated = 1 in -def TFR_condset_ri_f : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, f32imm:$src3), - "Error; should not emit", - [(set IntRegs:$dst, - (f32 (select PredRegs:$src1, IntRegs:$src2, fpimm:$src3)))]>, - Requires<[HasV5T]>; - -let AddedComplexity = 100, isPredicated = 1 in -def TFR_condset_ir_f : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, f32imm:$src2, IntRegs:$src3), - "Error; should not emit", - [(set IntRegs:$dst, - (f32 (select PredRegs:$src1, fpimm:$src2, IntRegs:$src3)))]>, - Requires<[HasV5T]>; - -let AddedComplexity = 100, isPredicated = 1 in -def TFR_condset_ii_f : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, f32imm:$src2, f32imm:$src3), - "Error; should not emit", - [(set IntRegs:$dst, (f32 (select PredRegs:$src1, - fpimm:$src2, - fpimm:$src3)))]>, - Requires<[HasV5T]>; - - -def : Pat <(select (i1 (setult (f32 IntRegs:$src1), (f32 IntRegs:$src2))), - (f32 IntRegs:$src3), - (f32 IntRegs:$src4)), - (TFR_condset_rr_f (FCMPUGT32_rr IntRegs:$src2, IntRegs:$src1), IntRegs:$src4, - IntRegs:$src3)>, Requires<[HasV5T]>; - -def : Pat <(select (i1 (setult (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))), - (f64 DoubleRegs:$src3), - (f64 DoubleRegs:$src4)), - (TFR_condset_rr64_f (FCMPUGT64_rr DoubleRegs:$src2, DoubleRegs:$src1), - DoubleRegs:$src4, DoubleRegs:$src3)>, Requires<[HasV5T]>; - -// Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i). -def : Pat <(select (not PredRegs:$src1), fpimm:$src2, fpimm:$src3), - (TFR_condset_ii_f PredRegs:$src1, fpimm:$src3, fpimm:$src2)>; - -// Map from p0 = pnot(p0); r0 = select(p0, #i, r1) -// => r0 = TFR_condset_ri(p0, r1, #i) -def : Pat <(select (not PredRegs:$src1), fpimm:$src2, IntRegs:$src3), - (TFR_condset_ri_f PredRegs:$src1, IntRegs:$src3, fpimm:$src2)>; - -// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i) -// => r0 = TFR_condset_ir(p0, #i, r1) -def : Pat <(select (not PredRegs:$src1), IntRegs:$src2, fpimm:$src3), - (TFR_condset_ir_f PredRegs:$src1, fpimm:$src3, IntRegs:$src2)>; - -def : Pat <(i32 (fp_to_sint (f64 DoubleRegs:$src1))), - (i32 (EXTRACT_SUBREG (i64 (CONVERT_df2d (f64 DoubleRegs:$src1))), subreg_loreg))>, - Requires<[HasV5T]>; +def F2_dfimm_p : T_fimm <"dfmake", DoubleRegs, 0b1001, 0>; +def F2_dfimm_n : T_fimm <"dfmake", DoubleRegs, 0b1001, 1>; def : Pat <(fabs (f32 IntRegs:$src1)), (S2_clrbit_i (f32 IntRegs:$src1), 31)>, @@ -1024,13 +935,3 @@ def : Pat <(fabs (f32 IntRegs:$src1)), def : Pat <(fneg (f32 IntRegs:$src1)), (S2_togglebit_i (f32 IntRegs:$src1), 31)>, Requires<[HasV5T]>; - -/* -def : Pat <(fabs (f64 DoubleRegs:$src1)), - (S2_clrbit_i (f32 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg)), 31)>, - Requires<[HasV5T]>; - -def : Pat <(fabs (f64 DoubleRegs:$src1)), - (S2_clrbit_i (f32 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg)), 31)>, - Requires<[HasV5T]>; - */ diff --git a/lib/Target/Hexagon/HexagonInstrInfoVector.td b/lib/Target/Hexagon/HexagonInstrInfoVector.td new file mode 100644 index 000000000000..f4fb946d5bad --- /dev/null +++ b/lib/Target/Hexagon/HexagonInstrInfoVector.td @@ -0,0 +1,483 @@ +//===- HexagonInstrInfoVector.td - Hexagon Vector Patterns -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hexagon Vector instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + +def V2I1: PatLeaf<(v2i1 PredRegs:$R)>; +def V4I1: PatLeaf<(v4i1 PredRegs:$R)>; +def V8I1: PatLeaf<(v8i1 PredRegs:$R)>; +def V4I8: PatLeaf<(v4i8 IntRegs:$R)>; +def V2I16: PatLeaf<(v2i16 IntRegs:$R)>; +def V8I8: PatLeaf<(v8i8 DoubleRegs:$R)>; +def V4I16: PatLeaf<(v4i16 DoubleRegs:$R)>; +def V2I32: PatLeaf<(v2i32 DoubleRegs:$R)>; + + +multiclass bitconvert_32<ValueType a, ValueType b> { + def : Pat <(b (bitconvert (a IntRegs:$src))), + (b IntRegs:$src)>; + def : Pat <(a (bitconvert (b IntRegs:$src))), + (a IntRegs:$src)>; +} + +multiclass bitconvert_64<ValueType a, ValueType b> { + def : Pat <(b (bitconvert (a DoubleRegs:$src))), + (b DoubleRegs:$src)>; + def : Pat <(a (bitconvert (b DoubleRegs:$src))), + (a DoubleRegs:$src)>; +} + +// Bit convert vector types. +defm : bitconvert_32<v4i8, i32>; +defm : bitconvert_32<v2i16, i32>; +defm : bitconvert_32<v2i16, v4i8>; + +defm : bitconvert_64<v8i8, i64>; +defm : bitconvert_64<v4i16, i64>; +defm : bitconvert_64<v2i32, i64>; +defm : bitconvert_64<v8i8, v4i16>; +defm : bitconvert_64<v8i8, v2i32>; +defm : bitconvert_64<v4i16, v2i32>; + + +// Vector shift support. Vector shifting in Hexagon is rather different +// from internal representation of LLVM. +// LLVM assumes all shifts (in vector case) will have the form +// <VT> = SHL/SRA/SRL <VT> by <VT> +// while Hexagon has the following format: +// <VT> = SHL/SRA/SRL <VT> by <IT/i32> +// As a result, special care is needed to guarantee correctness and +// performance. +class vshift_v4i16<SDNode Op, string Str, bits<3>MajOp, bits<3>MinOp> + : S_2OpInstImm<Str, MajOp, MinOp, u4Imm, + [(set (v4i16 DoubleRegs:$dst), + (Op (v4i16 DoubleRegs:$src1), u4ImmPred:$src2))]> { + bits<4> src2; + let Inst{11-8} = src2; +} + +class vshift_v2i32<SDNode Op, string Str, bits<3>MajOp, bits<3>MinOp> + : S_2OpInstImm<Str, MajOp, MinOp, u5Imm, + [(set (v2i32 DoubleRegs:$dst), + (Op (v2i32 DoubleRegs:$src1), u5ImmPred:$src2))]> { + bits<5> src2; + let Inst{12-8} = src2; +} + +def : Pat<(v2i16 (add (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))), + (A2_svaddh IntRegs:$src1, IntRegs:$src2)>; + +def : Pat<(v2i16 (sub (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))), + (A2_svsubh IntRegs:$src1, IntRegs:$src2)>; + +def S2_asr_i_vw : vshift_v2i32<sra, "vasrw", 0b010, 0b000>; +def S2_lsr_i_vw : vshift_v2i32<srl, "vlsrw", 0b010, 0b001>; +def S2_asl_i_vw : vshift_v2i32<shl, "vaslw", 0b010, 0b010>; + +def S2_asr_i_vh : vshift_v4i16<sra, "vasrh", 0b100, 0b000>; +def S2_lsr_i_vh : vshift_v4i16<srl, "vlsrh", 0b100, 0b001>; +def S2_asl_i_vh : vshift_v4i16<shl, "vaslh", 0b100, 0b010>; + + +def HexagonVSPLATB: SDNode<"HexagonISD::VSPLATB", SDTUnaryOp>; +def HexagonVSPLATH: SDNode<"HexagonISD::VSPLATH", SDTUnaryOp>; + +// Replicate the low 8-bits from 32-bits input register into each of the +// four bytes of 32-bits destination register. +def: Pat<(v4i8 (HexagonVSPLATB I32:$Rs)), (S2_vsplatrb I32:$Rs)>; + +// Replicate the low 16-bits from 32-bits input register into each of the +// four halfwords of 64-bits destination register. +def: Pat<(v4i16 (HexagonVSPLATH I32:$Rs)), (S2_vsplatrh I32:$Rs)>; + + +class VArith_pat <InstHexagon MI, SDNode Op, PatFrag Type> + : Pat <(Op Type:$Rss, Type:$Rtt), + (MI Type:$Rss, Type:$Rtt)>; + +def: VArith_pat <A2_vaddub, add, V8I8>; +def: VArith_pat <A2_vaddh, add, V4I16>; +def: VArith_pat <A2_vaddw, add, V2I32>; +def: VArith_pat <A2_vsubub, sub, V8I8>; +def: VArith_pat <A2_vsubh, sub, V4I16>; +def: VArith_pat <A2_vsubw, sub, V2I32>; + +def: VArith_pat <A2_and, and, V2I16>; +def: VArith_pat <A2_xor, xor, V2I16>; +def: VArith_pat <A2_or, or, V2I16>; + +def: VArith_pat <A2_andp, and, V8I8>; +def: VArith_pat <A2_andp, and, V4I16>; +def: VArith_pat <A2_andp, and, V2I32>; +def: VArith_pat <A2_orp, or, V8I8>; +def: VArith_pat <A2_orp, or, V4I16>; +def: VArith_pat <A2_orp, or, V2I32>; +def: VArith_pat <A2_xorp, xor, V8I8>; +def: VArith_pat <A2_xorp, xor, V4I16>; +def: VArith_pat <A2_xorp, xor, V2I32>; + +def: Pat<(v2i32 (sra V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c), + (i32 u5ImmPred:$c))))), + (S2_asr_i_vw V2I32:$b, imm:$c)>; +def: Pat<(v2i32 (srl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c), + (i32 u5ImmPred:$c))))), + (S2_lsr_i_vw V2I32:$b, imm:$c)>; +def: Pat<(v2i32 (shl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c), + (i32 u5ImmPred:$c))))), + (S2_asl_i_vw V2I32:$b, imm:$c)>; + +def: Pat<(v4i16 (sra V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))), + (S2_asr_i_vh V4I16:$b, imm:$c)>; +def: Pat<(v4i16 (srl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))), + (S2_lsr_i_vh V4I16:$b, imm:$c)>; +def: Pat<(v4i16 (shl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))), + (S2_asl_i_vh V4I16:$b, imm:$c)>; + + +def SDTHexagon_v2i32_v2i32_i32 : SDTypeProfile<1, 2, + [SDTCisSameAs<0, 1>, SDTCisVT<0, v2i32>, SDTCisInt<2>]>; +def SDTHexagon_v4i16_v4i16_i32 : SDTypeProfile<1, 2, + [SDTCisSameAs<0, 1>, SDTCisVT<0, v4i16>, SDTCisInt<2>]>; + +def HexagonVSRAW: SDNode<"HexagonISD::VSRAW", SDTHexagon_v2i32_v2i32_i32>; +def HexagonVSRAH: SDNode<"HexagonISD::VSRAH", SDTHexagon_v4i16_v4i16_i32>; +def HexagonVSRLW: SDNode<"HexagonISD::VSRLW", SDTHexagon_v2i32_v2i32_i32>; +def HexagonVSRLH: SDNode<"HexagonISD::VSRLH", SDTHexagon_v4i16_v4i16_i32>; +def HexagonVSHLW: SDNode<"HexagonISD::VSHLW", SDTHexagon_v2i32_v2i32_i32>; +def HexagonVSHLH: SDNode<"HexagonISD::VSHLH", SDTHexagon_v4i16_v4i16_i32>; + +def: Pat<(v2i32 (HexagonVSRAW V2I32:$Rs, u5ImmPred:$u5)), + (S2_asr_i_vw V2I32:$Rs, imm:$u5)>; +def: Pat<(v4i16 (HexagonVSRAH V4I16:$Rs, u4ImmPred:$u4)), + (S2_asr_i_vh V4I16:$Rs, imm:$u4)>; +def: Pat<(v2i32 (HexagonVSRLW V2I32:$Rs, u5ImmPred:$u5)), + (S2_lsr_i_vw V2I32:$Rs, imm:$u5)>; +def: Pat<(v4i16 (HexagonVSRLH V4I16:$Rs, u4ImmPred:$u4)), + (S2_lsr_i_vh V4I16:$Rs, imm:$u4)>; +def: Pat<(v2i32 (HexagonVSHLW V2I32:$Rs, u5ImmPred:$u5)), + (S2_asl_i_vw V2I32:$Rs, imm:$u5)>; +def: Pat<(v4i16 (HexagonVSHLH V4I16:$Rs, u4ImmPred:$u4)), + (S2_asl_i_vh V4I16:$Rs, imm:$u4)>; + +// Vector shift words by register +def S2_asr_r_vw : T_S3op_shiftVect < "vasrw", 0b00, 0b00>; +def S2_lsr_r_vw : T_S3op_shiftVect < "vlsrw", 0b00, 0b01>; +def S2_asl_r_vw : T_S3op_shiftVect < "vaslw", 0b00, 0b10>; +def S2_lsl_r_vw : T_S3op_shiftVect < "vlslw", 0b00, 0b11>; + +// Vector shift halfwords by register +def S2_asr_r_vh : T_S3op_shiftVect < "vasrh", 0b01, 0b00>; +def S2_lsr_r_vh : T_S3op_shiftVect < "vlsrh", 0b01, 0b01>; +def S2_asl_r_vh : T_S3op_shiftVect < "vaslh", 0b01, 0b10>; +def S2_lsl_r_vh : T_S3op_shiftVect < "vlslh", 0b01, 0b11>; + +class vshift_rr_pat<InstHexagon MI, SDNode Op, PatFrag Value> + : Pat <(Op Value:$Rs, I32:$Rt), + (MI Value:$Rs, I32:$Rt)>; + +def: vshift_rr_pat <S2_asr_r_vw, HexagonVSRAW, V2I32>; +def: vshift_rr_pat <S2_asr_r_vh, HexagonVSRAH, V4I16>; +def: vshift_rr_pat <S2_lsr_r_vw, HexagonVSRLW, V2I32>; +def: vshift_rr_pat <S2_lsr_r_vh, HexagonVSRLH, V4I16>; +def: vshift_rr_pat <S2_asl_r_vw, HexagonVSHLW, V2I32>; +def: vshift_rr_pat <S2_asl_r_vh, HexagonVSHLH, V4I16>; + + +def SDTHexagonVecCompare_v8i8 : SDTypeProfile<1, 2, + [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v8i8>]>; +def SDTHexagonVecCompare_v4i16 : SDTypeProfile<1, 2, + [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v4i16>]>; +def SDTHexagonVecCompare_v2i32 : SDTypeProfile<1, 2, + [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v2i32>]>; + +def HexagonVCMPBEQ: SDNode<"HexagonISD::VCMPBEQ", SDTHexagonVecCompare_v8i8>; +def HexagonVCMPBGT: SDNode<"HexagonISD::VCMPBGT", SDTHexagonVecCompare_v8i8>; +def HexagonVCMPBGTU: SDNode<"HexagonISD::VCMPBGTU", SDTHexagonVecCompare_v8i8>; +def HexagonVCMPHEQ: SDNode<"HexagonISD::VCMPHEQ", SDTHexagonVecCompare_v4i16>; +def HexagonVCMPHGT: SDNode<"HexagonISD::VCMPHGT", SDTHexagonVecCompare_v4i16>; +def HexagonVCMPHGTU: SDNode<"HexagonISD::VCMPHGTU", SDTHexagonVecCompare_v4i16>; +def HexagonVCMPWEQ: SDNode<"HexagonISD::VCMPWEQ", SDTHexagonVecCompare_v2i32>; +def HexagonVCMPWGT: SDNode<"HexagonISD::VCMPWGT", SDTHexagonVecCompare_v2i32>; +def HexagonVCMPWGTU: SDNode<"HexagonISD::VCMPWGTU", SDTHexagonVecCompare_v2i32>; + + +class vcmp_i1_pat<InstHexagon MI, SDNode Op, PatFrag Value> + : Pat <(i1 (Op Value:$Rs, Value:$Rt)), + (MI Value:$Rs, Value:$Rt)>; + +def: vcmp_i1_pat<A2_vcmpbeq, HexagonVCMPBEQ, V8I8>; +def: vcmp_i1_pat<A4_vcmpbgt, HexagonVCMPBGT, V8I8>; +def: vcmp_i1_pat<A2_vcmpbgtu, HexagonVCMPBGTU, V8I8>; + +def: vcmp_i1_pat<A2_vcmpheq, HexagonVCMPHEQ, V4I16>; +def: vcmp_i1_pat<A2_vcmphgt, HexagonVCMPHGT, V4I16>; +def: vcmp_i1_pat<A2_vcmphgtu, HexagonVCMPHGTU, V4I16>; + +def: vcmp_i1_pat<A2_vcmpweq, HexagonVCMPWEQ, V2I32>; +def: vcmp_i1_pat<A2_vcmpwgt, HexagonVCMPWGT, V2I32>; +def: vcmp_i1_pat<A2_vcmpwgtu, HexagonVCMPWGTU, V2I32>; + + +class vcmp_vi1_pat<InstHexagon MI, PatFrag Op, PatFrag InVal, ValueType OutTy> + : Pat <(OutTy (Op InVal:$Rs, InVal:$Rt)), + (MI InVal:$Rs, InVal:$Rt)>; + +def: vcmp_vi1_pat<A2_vcmpweq, seteq, V2I32, v2i1>; +def: vcmp_vi1_pat<A2_vcmpwgt, setgt, V2I32, v2i1>; +def: vcmp_vi1_pat<A2_vcmpwgtu, setugt, V2I32, v2i1>; + +def: vcmp_vi1_pat<A2_vcmpheq, seteq, V4I16, v4i1>; +def: vcmp_vi1_pat<A2_vcmphgt, setgt, V4I16, v4i1>; +def: vcmp_vi1_pat<A2_vcmphgtu, setugt, V4I16, v4i1>; + + +// Hexagon doesn't have a vector multiply with C semantics. +// Instead, generate a pseudo instruction that gets expaneded into two +// scalar MPYI instructions. +// This is expanded by ExpandPostRAPseudos. +let isPseudo = 1 in +def VMULW : PseudoM<(outs DoubleRegs:$Rd), + (ins DoubleRegs:$Rs, DoubleRegs:$Rt), + ".error \"Should never try to emit VMULW\"", + [(set V2I32:$Rd, (mul V2I32:$Rs, V2I32:$Rt))]>; + +let isPseudo = 1 in +def VMULW_ACC : PseudoM<(outs DoubleRegs:$Rd), + (ins DoubleRegs:$Rx, DoubleRegs:$Rs, DoubleRegs:$Rt), + ".error \"Should never try to emit VMULW_ACC\"", + [(set V2I32:$Rd, (add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)))], + "$Rd = $Rx">; + +// Adds two v4i8: Hexagon does not have an insn for this one, so we +// use the double add v8i8, and use only the low part of the result. +def: Pat<(v4i8 (add (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))), + (LoReg (A2_vaddub (Zext64 $Rs), (Zext64 $Rt)))>; + +// Subtract two v4i8: Hexagon does not have an insn for this one, so we +// use the double sub v8i8, and use only the low part of the result. +def: Pat<(v4i8 (sub (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))), + (LoReg (A2_vsubub (Zext64 $Rs), (Zext64 $Rt)))>; + +// +// No 32 bit vector mux. +// +def: Pat<(v4i8 (select I1:$Pu, V4I8:$Rs, V4I8:$Rt)), + (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>; +def: Pat<(v2i16 (select I1:$Pu, V2I16:$Rs, V2I16:$Rt)), + (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>; + +// +// 64-bit vector mux. +// +def: Pat<(v8i8 (vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)), + (C2_vmux V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)>; +def: Pat<(v4i16 (vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)), + (C2_vmux V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)>; +def: Pat<(v2i32 (vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)), + (C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>; + +// +// No 32 bit vector compare. +// +def: Pat<(i1 (seteq V4I8:$Rs, V4I8:$Rt)), + (A2_vcmpbeq (Zext64 $Rs), (Zext64 $Rt))>; +def: Pat<(i1 (setgt V4I8:$Rs, V4I8:$Rt)), + (A4_vcmpbgt (Zext64 $Rs), (Zext64 $Rt))>; +def: Pat<(i1 (setugt V4I8:$Rs, V4I8:$Rt)), + (A2_vcmpbgtu (Zext64 $Rs), (Zext64 $Rt))>; + +def: Pat<(i1 (seteq V2I16:$Rs, V2I16:$Rt)), + (A2_vcmpheq (Zext64 $Rs), (Zext64 $Rt))>; +def: Pat<(i1 (setgt V2I16:$Rs, V2I16:$Rt)), + (A2_vcmphgt (Zext64 $Rs), (Zext64 $Rt))>; +def: Pat<(i1 (setugt V2I16:$Rs, V2I16:$Rt)), + (A2_vcmphgtu (Zext64 $Rs), (Zext64 $Rt))>; + + +class InvertCmp_pat<InstHexagon InvMI, PatFrag CmpOp, PatFrag Value, + ValueType CmpTy> + : Pat<(CmpTy (CmpOp Value:$Rs, Value:$Rt)), + (InvMI Value:$Rt, Value:$Rs)>; + +// Map from a compare operation to the corresponding instruction with the +// order of operands reversed, e.g. x > y --> cmp.lt(y,x). +def: InvertCmp_pat<A4_vcmpbgt, setlt, V8I8, i1>; +def: InvertCmp_pat<A4_vcmpbgt, setlt, V8I8, v8i1>; +def: InvertCmp_pat<A2_vcmphgt, setlt, V4I16, i1>; +def: InvertCmp_pat<A2_vcmphgt, setlt, V4I16, v4i1>; +def: InvertCmp_pat<A2_vcmpwgt, setlt, V2I32, i1>; +def: InvertCmp_pat<A2_vcmpwgt, setlt, V2I32, v2i1>; + +def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8, i1>; +def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8, v8i1>; +def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, i1>; +def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, v4i1>; +def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, i1>; +def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, v2i1>; + +// Map from vcmpne(Rss) -> !vcmpew(Rss). +// rs != rt -> !(rs == rt). +def: Pat<(v2i1 (setne V2I32:$Rs, V2I32:$Rt)), + (C2_not (v2i1 (A2_vcmpbeq V2I32:$Rs, V2I32:$Rt)))>; + + +// Truncate: from vector B copy all 'E'ven 'B'yte elements: +// A[0] = B[0]; A[1] = B[2]; A[2] = B[4]; A[3] = B[6]; +def: Pat<(v4i8 (trunc V4I16:$Rs)), + (S2_vtrunehb V4I16:$Rs)>; + +// Truncate: from vector B copy all 'O'dd 'B'yte elements: +// A[0] = B[1]; A[1] = B[3]; A[2] = B[5]; A[3] = B[7]; +// S2_vtrunohb + +// Truncate: from vectors B and C copy all 'E'ven 'H'alf-word elements: +// A[0] = B[0]; A[1] = B[2]; A[2] = C[0]; A[3] = C[2]; +// S2_vtruneh + +def: Pat<(v2i16 (trunc V2I32:$Rs)), + (LoReg (S2_packhl (HiReg $Rs), (LoReg $Rs)))>; + + +def HexagonVSXTBH : SDNode<"HexagonISD::VSXTBH", SDTUnaryOp>; +def HexagonVSXTBW : SDNode<"HexagonISD::VSXTBW", SDTUnaryOp>; + +def: Pat<(i64 (HexagonVSXTBH I32:$Rs)), (S2_vsxtbh I32:$Rs)>; +def: Pat<(i64 (HexagonVSXTBW I32:$Rs)), (S2_vsxthw I32:$Rs)>; + +def: Pat<(v4i16 (zext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; +def: Pat<(v2i32 (zext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; +def: Pat<(v4i16 (anyext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; +def: Pat<(v2i32 (anyext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; +def: Pat<(v4i16 (sext V4I8:$Rs)), (S2_vsxtbh V4I8:$Rs)>; +def: Pat<(v2i32 (sext V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>; + +// Sign extends a v2i8 into a v2i32. +def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i8)), + (A2_combinew (A2_sxtb (HiReg $Rs)), (A2_sxtb (LoReg $Rs)))>; + +// Sign extends a v2i16 into a v2i32. +def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i16)), + (A2_combinew (A2_sxth (HiReg $Rs)), (A2_sxth (LoReg $Rs)))>; + + +// Multiplies two v2i16 and returns a v2i32. We are using here the +// saturating multiply, as hexagon does not provide a non saturating +// vector multiply, and saturation does not impact the result that is +// in double precision of the operands. + +// Multiplies two v2i16 vectors: as Hexagon does not have a multiply +// with the C semantics for this one, this pattern uses the half word +// multiply vmpyh that takes two v2i16 and returns a v2i32. This is +// then truncated to fit this back into a v2i16 and to simulate the +// wrap around semantics for unsigned in C. +def vmpyh: OutPatFrag<(ops node:$Rs, node:$Rt), + (M2_vmpy2s_s0 (i32 $Rs), (i32 $Rt))>; + +def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)), + (LoReg (S2_vtrunewh (v2i32 (A2_combineii 0, 0)), + (v2i32 (vmpyh V2I16:$Rs, V2I16:$Rt))))>; + +// Multiplies two v4i16 vectors. +def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)), + (S2_vtrunewh (vmpyh (HiReg $Rs), (HiReg $Rt)), + (vmpyh (LoReg $Rs), (LoReg $Rt)))>; + +def VMPYB_no_V5: OutPatFrag<(ops node:$Rs, node:$Rt), + (S2_vtrunewh (vmpyh (HiReg (S2_vsxtbh $Rs)), (HiReg (S2_vsxtbh $Rt))), + (vmpyh (LoReg (S2_vsxtbh $Rs)), (LoReg (S2_vsxtbh $Rt))))>; + +// Multiplies two v4i8 vectors. +def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)), + (S2_vtrunehb (M5_vmpybsu V4I8:$Rs, V4I8:$Rt))>, + Requires<[HasV5T]>; + +def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)), + (S2_vtrunehb (VMPYB_no_V5 V4I8:$Rs, V4I8:$Rt))>; + +// Multiplies two v8i8 vectors. +def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), + (A2_combinew (S2_vtrunehb (M5_vmpybsu (HiReg $Rs), (HiReg $Rt))), + (S2_vtrunehb (M5_vmpybsu (LoReg $Rs), (LoReg $Rt))))>, + Requires<[HasV5T]>; + +def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), + (A2_combinew (S2_vtrunehb (VMPYB_no_V5 (HiReg $Rs), (HiReg $Rt))), + (S2_vtrunehb (VMPYB_no_V5 (LoReg $Rs), (LoReg $Rt))))>; + + +class shuffler<SDNode Op, string Str> + : SInst<(outs DoubleRegs:$a), (ins DoubleRegs:$b, DoubleRegs:$c), + "$a = " # Str # "($b, $c)", + [(set (i64 DoubleRegs:$a), + (i64 (Op (i64 DoubleRegs:$b), (i64 DoubleRegs:$c))))], + "", S_3op_tc_1_SLOT23>; + +def SDTHexagonBinOp64 : SDTypeProfile<1, 2, + [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64>]>; + +def HexagonSHUFFEB: SDNode<"HexagonISD::SHUFFEB", SDTHexagonBinOp64>; +def HexagonSHUFFEH: SDNode<"HexagonISD::SHUFFEH", SDTHexagonBinOp64>; +def HexagonSHUFFOB: SDNode<"HexagonISD::SHUFFOB", SDTHexagonBinOp64>; +def HexagonSHUFFOH: SDNode<"HexagonISD::SHUFFOH", SDTHexagonBinOp64>; + +class ShufflePat<InstHexagon MI, SDNode Op> + : Pat<(i64 (Op DoubleRegs:$src1, DoubleRegs:$src2)), + (i64 (MI DoubleRegs:$src1, DoubleRegs:$src2))>; + +// Shuffles even bytes for i=0..3: A[2*i].b = C[2*i].b; A[2*i+1].b = B[2*i].b +def: ShufflePat<S2_shuffeb, HexagonSHUFFEB>; + +// Shuffles odd bytes for i=0..3: A[2*i].b = C[2*i+1].b; A[2*i+1].b = B[2*i+1].b +def: ShufflePat<S2_shuffob, HexagonSHUFFOB>; + +// Shuffles even half for i=0,1: A[2*i].h = C[2*i].h; A[2*i+1].h = B[2*i].h +def: ShufflePat<S2_shuffeh, HexagonSHUFFEH>; + +// Shuffles odd half for i=0,1: A[2*i].h = C[2*i+1].h; A[2*i+1].h = B[2*i+1].h +def: ShufflePat<S2_shuffoh, HexagonSHUFFOH>; + + +// Truncated store from v4i16 to v4i8. +def truncstorev4i8: PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr), + [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4i8; }]>; + +// Truncated store from v2i32 to v2i16. +def truncstorev2i16: PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr), + [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i16; }]>; + +def: Pat<(truncstorev2i16 V2I32:$Rs, I32:$Rt), + (S2_storeri_io I32:$Rt, 0, (LoReg (S2_packhl (HiReg $Rs), + (LoReg $Rs))))>; + +def: Pat<(truncstorev4i8 V4I16:$Rs, I32:$Rt), + (S2_storeri_io I32:$Rt, 0, (S2_vtrunehb V4I16:$Rs))>; + + +// Zero and sign extended load from v2i8 into v2i16. +def zextloadv2i8: PatFrag<(ops node:$ptr), (zextload node:$ptr), + [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>; + +def sextloadv2i8: PatFrag<(ops node:$ptr), (sextload node:$ptr), + [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>; + +def: Pat<(v2i16 (zextloadv2i8 I32:$Rs)), + (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0))))>; + +def: Pat<(v2i16 (sextloadv2i8 I32:$Rs)), + (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0))))>; + +def: Pat<(v2i32 (zextloadv2i8 I32:$Rs)), + (S2_vzxthw (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0)))))>; + +def: Pat<(v2i32 (sextloadv2i8 I32:$Rs)), + (S2_vsxthw (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0)))))>; diff --git a/lib/Target/Hexagon/HexagonIntrinsics.td b/lib/Target/Hexagon/HexagonIntrinsics.td index 7d3f9d92cbd4..4275230ba717 100644 --- a/lib/Target/Hexagon/HexagonIntrinsics.td +++ b/lib/Target/Hexagon/HexagonIntrinsics.td @@ -13,3495 +13,1273 @@ // March 4, 2008 //===----------------------------------------------------------------------===// -// -// ALU 32 types. -// +class T_I_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID imm:$Is), + (MI imm:$Is)>; -class qi_ALU32_sisi<string opc, Intrinsic IntID> - : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class qi_ALU32_sis10<string opc, Intrinsic IntID> - : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), - [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; - -class qi_ALU32_sis8<string opc, Intrinsic IntID> - : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), - [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; - -class qi_ALU32_siu8<string opc, Intrinsic IntID> - : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, u8Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), - [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; - -class qi_ALU32_siu9<string opc, Intrinsic IntID> - : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, u9Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), - [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; - -class si_ALU32_qisisi<string opc, Intrinsic IntID> - : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, - IntRegs:$src3), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, - IntRegs:$src3))]>; - -class si_ALU32_qis8si<string opc, Intrinsic IntID> - : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2, - IntRegs:$src3), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, $src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2, - IntRegs:$src3))]>; - -class si_ALU32_qisis8<string opc, Intrinsic IntID> - : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, - s8Imm:$src3), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, #$src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, - imm:$src3))]>; - -class si_ALU32_qis8s8<string opc, Intrinsic IntID> - : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2, s8Imm:$src3), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, #$src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2, imm:$src3))]>; - -class si_ALU32_sisi<string opc, Intrinsic IntID> - : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU32_sisi_sat<string opc, Intrinsic IntID> - : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU32_sisi_rnd<string opc, Intrinsic IntID> - : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU32_sis16<string opc, Intrinsic IntID> - : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s16Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; - -class si_ALU32_sis10<string opc, Intrinsic IntID> - : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; - -class si_ALU32_s10si<string opc, Intrinsic IntID> - : ALU32_rr<(outs IntRegs:$dst), (ins s10Imm:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "(#$src1, $src2)")), - [(set IntRegs:$dst, (IntID imm:$src1, IntRegs:$src2))]>; - -class si_lo_ALU32_siu16<string opc, Intrinsic IntID> - : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u16Imm:$src2), - !strconcat("$dst.l = ", !strconcat(opc , "#$src2")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; - -class si_hi_ALU32_siu16<string opc, Intrinsic IntID> - : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u16Imm:$src2), - !strconcat("$dst.h = ", !strconcat(opc , "#$src2")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; - -class si_ALU32_s16<string opc, Intrinsic IntID> - : ALU32_rr<(outs IntRegs:$dst), (ins s16Imm:$src1), - !strconcat("$dst = ", !strconcat(opc , "#$src1")), - [(set IntRegs:$dst, (IntID imm:$src1))]>; - -class di_ALU32_s8<string opc, Intrinsic IntID> - : ALU32_rr<(outs DoubleRegs:$dst), (ins s8Imm:$src1), - !strconcat("$dst = ", !strconcat(opc , "#$src1")), - [(set DoubleRegs:$dst, (IntID imm:$src1))]>; - -class di_ALU64_di<string opc, Intrinsic IntID> - : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src), - !strconcat("$dst = ", !strconcat(opc , "$src")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src))]>; - -class si_ALU32_si<string opc, Intrinsic IntID> - : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src), - !strconcat("$dst = ", !strconcat(opc , "($src)")), - [(set IntRegs:$dst, (IntID IntRegs:$src))]>; - -class si_ALU32_si_tfr<string opc, Intrinsic IntID> - : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src), - !strconcat("$dst = ", !strconcat(opc , "$src")), - [(set IntRegs:$dst, (IntID IntRegs:$src))]>; +class T_R_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I32:$Rs), + (MI I32:$Rs)>; -// -// ALU 64 types. -// +class T_P_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I64:$Rs), + (MI DoubleRegs:$Rs)>; + +class T_II_pat <InstHexagon MI, Intrinsic IntID, PatFrag Imm1, PatFrag Imm2> + : Pat<(IntID Imm1:$Is, Imm2:$It), + (MI Imm1:$Is, Imm2:$It)>; + +class T_RI_pat <InstHexagon MI, Intrinsic IntID, PatLeaf ImmPred = PatLeaf<(i32 imm)>> + : Pat<(IntID I32:$Rs, ImmPred:$It), + (MI I32:$Rs, ImmPred:$It)>; + +class T_IR_pat <InstHexagon MI, Intrinsic IntID, PatFrag ImmPred = PatLeaf<(i32 imm)>> + : Pat<(IntID ImmPred:$Is, I32:$Rt), + (MI ImmPred:$Is, I32:$Rt)>; + +class T_PI_pat <InstHexagon MI, Intrinsic IntID> + : Pat<(IntID I64:$Rs, imm:$It), + (MI DoubleRegs:$Rs, imm:$It)>; + +class T_RP_pat <InstHexagon MI, Intrinsic IntID> + : Pat<(IntID I32:$Rs, I64:$Rt), + (MI I32:$Rs, DoubleRegs:$Rt)>; + +class T_RR_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I32:$Rs, I32:$Rt), + (MI I32:$Rs, I32:$Rt)>; + +class T_PP_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I64:$Rs, I64:$Rt), + (MI DoubleRegs:$Rs, DoubleRegs:$Rt)>; -class si_ALU64_si_sat<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src), - !strconcat("$dst = ", !strconcat(opc , "($src):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src))]>; - -class si_ALU64_didi<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; - -class di_ALU64_sidi<string opc, Intrinsic IntID> - : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, DoubleRegs:$src2))]>; - -class di_ALU64_didi<string opc, Intrinsic IntID> - : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, - DoubleRegs:$src2))]>; - -class di_ALU64_qididi<string opc, Intrinsic IntID> - : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, DoubleRegs:$src2, - DoubleRegs:$src3), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, DoubleRegs:$src2, - DoubleRegs:$src3))]>; - -class di_ALU64_sisi<string opc, Intrinsic IntID> - : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_ALU64_didi_sat<string opc, Intrinsic IntID> - : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, - DoubleRegs:$src2))]>; - -class di_ALU64_didi_rnd<string opc, Intrinsic IntID> - : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, - DoubleRegs:$src2))]>; - -class di_ALU64_didi_crnd<string opc, Intrinsic IntID> - : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):crnd")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, - DoubleRegs:$src2))]>; - -class di_ALU64_didi_rnd_sat<string opc, Intrinsic IntID> - : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd:sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, - DoubleRegs:$src2))]>; - -class di_ALU64_didi_crnd_sat<string opc, Intrinsic IntID> - : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):crnd:sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, - DoubleRegs:$src2))]>; - -class qi_ALU64_didi<string opc, Intrinsic IntID> - : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set PredRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; - -class si_ALU64_sisi<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU64_sisi_sat_lh<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU64_sisi_l16_sat_hh<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU64_sisi_l16_sat_lh<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU64_sisi_l16_sat_hl<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU64_sisi_l16_sat_ll<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU64_sisi_l16_hh<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU64_sisi_l16_hl<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU64_sisi_l16_lh<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU64_sisi_l16_ll<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU64_sisi_h16_sat_hh<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1.H, $src2.H):sat:<<16")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU64_sisi_h16_sat_lh<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1.L, $src2.H):sat:<<16")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU64_sisi_h16_sat_hl<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1.H, $src2.L):sat:<<16")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU64_sisi_h16_sat_ll<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1.L, $src2.L):sat:<<16")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU64_sisi_h16_hh<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):<<16")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU64_sisi_h16_hl<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):<<16")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU64_sisi_h16_lh<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):<<16")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU64_sisi_h16_ll<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):<<16")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU64_sisi_lh<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU64_sisi_ll<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU64_sisi_sat<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; +class T_QII_pat <InstHexagon MI, Intrinsic IntID, PatFrag Imm1, PatFrag Imm2> + : Pat <(IntID (i32 PredRegs:$Ps), Imm1:$Is, Imm2:$It), + (MI PredRegs:$Ps, Imm1:$Is, Imm2:$It)>; -// -// SInst classes. -// +class T_QRI_pat <InstHexagon MI, Intrinsic IntID, PatFrag ImmPred> + : Pat <(IntID (i32 PredRegs:$Ps), I32:$Rs, ImmPred:$Is), + (MI PredRegs:$Ps, I32:$Rs, ImmPred:$Is)>; -class qi_SInst_qi<string opc, Intrinsic IntID> - : SInst<(outs PredRegs:$dst), (ins IntRegs:$src), - !strconcat("$dst = ", !strconcat(opc , "($src)")), - [(set PredRegs:$dst, (IntID IntRegs:$src))]>; - -class qi_SInst_qi_pxfer<string opc, Intrinsic IntID> - : SInst<(outs PredRegs:$dst), (ins IntRegs:$src), - !strconcat("$dst = ", !strconcat(opc , "$src")), - [(set PredRegs:$dst, (IntID IntRegs:$src))]>; - -class qi_SInst_qiqi<string opc, Intrinsic IntID> - : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class qi_SInst_qiqi_neg<string opc, Intrinsic IntID> - : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, !$src2)")), - [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_SInst_di<string opc, Intrinsic IntID> - : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src), - !strconcat("$dst = ", !strconcat(opc , "($src)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src))]>; - -class di_SInst_di_sat<string opc, Intrinsic IntID> - : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src), - !strconcat("$dst = ", !strconcat(opc , "($src):sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src))]>; - -class si_SInst_di<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src), - !strconcat("$dst = ", !strconcat(opc , "($src)")), - [(set IntRegs:$dst, (IntID DoubleRegs:$src))]>; - -class si_SInst_di_sat<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src), - !strconcat("$dst = ", !strconcat(opc , "($src):sat")), - [(set IntRegs:$dst, (IntID DoubleRegs:$src))]>; - -class di_SInst_disi<string opc, Intrinsic IntID> - : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>; - -class di_SInst_didi<string opc, Intrinsic IntID> - : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; - -class di_SInst_si<string opc, Intrinsic IntID> - : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1), - !strconcat("$dst = ", !strconcat(opc , "($src1)")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1))]>; - -class si_SInst_sisiu3<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, u3Imm:$src3), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, #$src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, - imm:$src3))]>; - -class si_SInst_diu5<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, u5Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), - [(set IntRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>; - -class si_SInst_disi<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set IntRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>; - -class si_SInst_sidi<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, DoubleRegs:$src2))]>; - -class di_SInst_disisi<string opc, Intrinsic IntID> - : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2, - IntRegs:$src3), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2, - IntRegs:$src3))]>; - -class di_SInst_sisi<string opc, Intrinsic IntID> - : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class qi_SInst_siu5<string opc, Intrinsic IntID> - : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), - [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; - -class qi_SInst_siu6<string opc, Intrinsic IntID> - : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u6Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), - [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; - -class qi_SInst_sisi<string opc, Intrinsic IntID> - : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_SInst_si<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins IntRegs:$src), - !strconcat("$dst = ", !strconcat(opc , "($src)")), - [(set IntRegs:$dst, (IntID IntRegs:$src))]>; - -class si_SInst_si_sat<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins IntRegs:$src), - !strconcat("$dst = ", !strconcat(opc , "($src):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src))]>; - -class di_SInst_qi<string opc, Intrinsic IntID> - : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src), - !strconcat("$dst = ", !strconcat(opc , "($src)")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src))]>; - -class si_SInst_qi<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins IntRegs:$src), - !strconcat("$dst = ", !strconcat(opc , "$src")), - [(set IntRegs:$dst, (IntID IntRegs:$src))]>; - -class si_SInst_qiqi<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class qi_SInst_si<string opc, Intrinsic IntID> - : SInst<(outs PredRegs:$dst), (ins IntRegs:$src), - !strconcat("$dst = ", !strconcat(opc , "$src")), - [(set PredRegs:$dst, (IntID IntRegs:$src))]>; - -class si_SInst_sisi<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_SInst_diu6<string opc, Intrinsic IntID> - : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>; - -class si_SInst_siu5<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; - -class si_SInst_siu5_rnd<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):rnd")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; - -class si_SInst_siu5u5<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2, u5Imm:$src3), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, #$src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2, imm:$src3))]>; - -class si_SInst_sisisi_acc<string opc, Intrinsic IntID> - : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_SInst_sisisi_nac<string opc, Intrinsic IntID> - : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_SInst_didisi_acc<string opc, Intrinsic IntID> - : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, - DoubleRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_SInst_didisi_nac<string opc, Intrinsic IntID> - : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, - DoubleRegs:$src1, IntRegs:$src2))], - "$dst2 = $dst">; - -class si_SInst_sisiu5u5<string opc, Intrinsic IntID> - : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - u5Imm:$src2, u5Imm:$src3), - !strconcat("$dst = ", !strconcat(opc , - "($src1, #$src2, #$src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - imm:$src2, imm:$src3))], - "$dst2 = $dst">; - -class si_SInst_sisidi<string opc, Intrinsic IntID> - : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - DoubleRegs:$src2))], - "$dst2 = $dst">; - -class di_SInst_didiu6u6<string opc, Intrinsic IntID> - : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, - u6Imm:$src2, u6Imm:$src3), - !strconcat("$dst = ", !strconcat(opc , - "($src1, #$src2, #$src3)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, - imm:$src2, imm:$src3))], - "$dst2 = $dst">; - -class di_SInst_dididi<string opc, Intrinsic IntID> - : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, - DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, - DoubleRegs:$src1, - DoubleRegs:$src2))], - "$dst2 = $dst">; - -class di_SInst_diu6u6<string opc, Intrinsic IntID> - : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2, - u6Imm:$src3), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, #$src3)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2, - imm:$src3))]>; - -class di_SInst_didiqi<string opc, Intrinsic IntID> - : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2, - IntRegs:$src3), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2, - IntRegs:$src3))]>; - -class di_SInst_didiu3<string opc, Intrinsic IntID> - : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2, - u3Imm:$src3), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, #$src3)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2, - imm:$src3))]>; - -class di_SInst_didisi_or<string opc, Intrinsic IntID> - : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, - IntRegs:$src2), - !strconcat("$dst |= ", !strconcat(opc , "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_SInst_didisi_and<string opc, Intrinsic IntID> - : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, - IntRegs:$src2), - !strconcat("$dst &= ", !strconcat(opc , "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_SInst_didiu6_and<string opc, Intrinsic IntID> - : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, - u6Imm:$src2), - !strconcat("$dst &= ", !strconcat(opc , "($src1, #$src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, - imm:$src2))], - "$dst2 = $dst">; - -class di_SInst_didiu6_or<string opc, Intrinsic IntID> - : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, - u6Imm:$src2), - !strconcat("$dst |= ", !strconcat(opc , "($src1, #$src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, - imm:$src2))], - "$dst2 = $dst">; - -class di_SInst_didiu6_xor<string opc, Intrinsic IntID> - : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, - u6Imm:$src2), - !strconcat("$dst ^= ", !strconcat(opc , "($src1, #$src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, - imm:$src2))], - "$dst2 = $dst">; - -class si_SInst_sisisi_and<string opc, Intrinsic IntID> - : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst &= ", !strconcat(opc , "($src1, $src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_SInst_sisisi_or<string opc, Intrinsic IntID> - : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst |= ", !strconcat(opc , "($src1, $src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - - -class si_SInst_sisiu5_and<string opc, Intrinsic IntID> - : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - u5Imm:$src2), - !strconcat("$dst &= ", !strconcat(opc , "($src1, #$src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - imm:$src2))], - "$dst2 = $dst">; - -class si_SInst_sisiu5_or<string opc, Intrinsic IntID> - : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - u5Imm:$src2), - !strconcat("$dst |= ", !strconcat(opc , "($src1, #$src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - imm:$src2))], - "$dst2 = $dst">; - -class si_SInst_sisiu5_xor<string opc, Intrinsic IntID> - : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - u5Imm:$src2), - !strconcat("$dst ^= ", !strconcat(opc , "($src1, #$src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - imm:$src2))], - "$dst2 = $dst">; - -class si_SInst_sisiu5_acc<string opc, Intrinsic IntID> - : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - u5Imm:$src2), - !strconcat("$dst += ", !strconcat(opc , "($src1, #$src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - imm:$src2))], - "$dst2 = $dst">; - -class si_SInst_sisiu5_nac<string opc, Intrinsic IntID> - : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - u5Imm:$src2), - !strconcat("$dst -= ", !strconcat(opc , "($src1, #$src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - imm:$src2))], - "$dst2 = $dst">; - -class di_SInst_didiu6_acc<string opc, Intrinsic IntID> - : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, - u5Imm:$src2), - !strconcat("$dst += ", !strconcat(opc , "($src1, #$src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, - DoubleRegs:$src1, imm:$src2))], - "$dst2 = $dst">; - -class di_SInst_didiu6_nac<string opc, Intrinsic IntID> - : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, - u5Imm:$src2), - !strconcat("$dst -= ", !strconcat(opc , "($src1, #$src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, - imm:$src2))], - "$dst2 = $dst">; +class T_QIR_pat <InstHexagon MI, Intrinsic IntID, PatFrag ImmPred> + : Pat <(IntID (i32 PredRegs:$Ps), ImmPred:$Is, I32:$Rs), + (MI PredRegs:$Ps, ImmPred:$Is, I32:$Rs)>; +class T_RRI_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I32:$Rs, I32:$Rt, imm:$Iu), + (MI I32:$Rs, I32:$Rt, imm:$Iu)>; -// -// MInst classes. -// +class T_RII_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I32:$Rs, imm:$It, imm:$Iu), + (MI I32:$Rs, imm:$It, imm:$Iu)>; -class di_MInst_sisi_rnd_hh_s1<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1.H, $src2.H):<<1:rnd")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_sisi_rnd_hh<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1.H, $src2.H):rnd")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_sisi_rnd_hl_s1<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1.H, $src2.L):<<1:rnd")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_sisi_rnd_hl<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1.H, $src2.L):rnd")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_sisi_rnd_lh_s1<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1.L, $src2.H):<<1:rnd")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_sisi_rnd_lh<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1.L, $src2.H):rnd")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_sisi_rnd_ll_s1<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1.L, $src2.L):<<1:rnd")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_sisi_rnd_ll<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1.L, $src2.L):rnd")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_disisi_acc<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_nac<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_acc_sat<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , "($src1, $src2):sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_nac_sat<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2):sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_acc_sat_conj<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , "($src1, $src2*):sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_nac_sat_conj<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2*):sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_nac_s1_sat<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1, $src2):<<1:sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_acc_s1_sat_conj<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , - "($src1, $src2*):<<1:sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_nac_s1_sat_conj<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1, $src2*):<<1:sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_s8s8<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins s8Imm:$src1, s8Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "(#$src1, #$src2)")), - [(set DoubleRegs:$dst, (IntID imm:$src1, imm:$src2))]>; - -class si_MInst_sis9<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; - -class si_MInst_sisi<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_sisi_hh<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H)")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_sisi_hh_s1<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):<<1")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_sisi_lh<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_sisi_lh_s1<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):<<1")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_sisi_hl<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L)")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_sisi_hl_s1<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):<<1")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_sisi_ll<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_sisi_ll_s1<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):<<1")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - - -class si_MInst_sisi_hh<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_hh_s1<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):<<1")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_lh<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_lh_s1<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):<<1")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_hl<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_hl_s1<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):<<1")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_ll<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_ll_s1<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):<<1")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_up<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_didi<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, - DoubleRegs:$src2))]>; - -class di_MInst_didi_conj<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2*)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, - DoubleRegs:$src2))]>; - -class di_MInst_sisi_s1_sat_conj<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1, $src2*):<<1:sat")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_didi_s1_rnd_sat<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1, $src2):<<1:rnd:sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, - DoubleRegs:$src2))]>; - -class di_MInst_didi_sat<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, - DoubleRegs:$src2))]>; - -class di_MInst_didi_rnd_sat<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1, $src2):rnd:sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, - DoubleRegs:$src2))]>; - -class si_SInst_sisi_sat<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_SInst_didi_sat<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")), - [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; - -class si_SInst_disi_s1_rnd_sat<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1, $src2):<<1:rnd:sat")), - [(set IntRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_s1_rnd_sat<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1, $src2):<<1:rnd:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_l_s1_rnd_sat<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1, $src2.L):<<1:rnd:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_h_s1_rnd_sat<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1, $src2.H):<<1:rnd:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_rnd_sat_conj<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1, $src2*):rnd:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_s1_rnd_sat_conj<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1, $src2*):<<1:rnd:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_rnd_sat<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1, $src2):rnd:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_rnd<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisisi_xacc<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2, - IntRegs:$src3), - !strconcat("$dst ^= ", !strconcat(opc , "($src2, $src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2, - IntRegs:$src3))], - "$dst2 = $dst">; - -class si_MInst_sisisi_acc<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2, - IntRegs:$src3), - !strconcat("$dst += ", !strconcat(opc , "($src2, $src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2, - IntRegs:$src3))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2, - IntRegs:$src3), - !strconcat("$dst -= ", !strconcat(opc , "($src2, $src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2, - IntRegs:$src3))], - "$dst2 = $dst">; - -class si_MInst_sisis8_acc<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2, - s8Imm:$src3), - !strconcat("$dst += ", !strconcat(opc , "($src2, #$src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2, - imm:$src3))], - "$dst2 = $dst">; - -class si_MInst_sisis8_nac<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2, - s8Imm:$src3), - !strconcat("$dst -= ", !strconcat(opc , "($src2, #$src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2, - imm:$src3))], - "$dst2 = $dst">; - -class si_MInst_sisiu4u5<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - u4Imm:$src2, u5Imm:$src3), - !strconcat("$dst = ", !strconcat(opc , - "($src1, #$src2, #$src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - imm:$src2, imm:$src3))], - "$dst2 = $dst">; - -class si_MInst_sisiu8_acc<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2, - u8Imm:$src3), - !strconcat("$dst += ", !strconcat(opc , "($src2, #$src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2, - imm:$src3))], - "$dst2 = $dst">; - -class si_MInst_sisiu8_nac<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2, - u8Imm:$src3), - !strconcat("$dst -= ", !strconcat(opc , "($src2, #$src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2, - imm:$src3))], - "$dst2 = $dst">; - -class si_MInst_sisisi_acc_hh<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.H)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_acc_sat_lh<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , - "($src1.L, $src2.H):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_acc_sat_lh_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , - "($src1.L, $src2.H):<<1:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_acc_sat_hh<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , - "($src1.H, $src2.H):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_acc_sat_hh_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , - "($src1.H, $src2.H):<<1:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_acc_hh_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , - "($src1.H, $src2.H):<<1")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_hh<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.H)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_sat_hh_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1.H, $src2.H):<<1:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_sat_hh<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1.H, $src2.H):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_sat_hl_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1.H, $src2.L):<<1:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_sat_hl<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1.H, $src2.L):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_sat_lh_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1.L, $src2.H):<<1:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_sat_lh<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1.L, $src2.H):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_sat_ll_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1.L, $src2.L):<<1:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_sat_ll<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1.L, $src2.L):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_hh_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1.H, $src2.H):<<1")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_acc_hl<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.L)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_acc_hl_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , - "($src1.H, $src2.L):<<1")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_hl<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.L)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_hl_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1.H, $src2.L):<<1")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_acc_lh<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.H)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_acc_lh_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , - "($src1.L, $src2.H):<<1")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_lh<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.H)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_lh_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1.L, $src2.H):<<1")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_acc_ll<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.L)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_acc_ll_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , - "($src1.L, $src2.L):<<1")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_acc_sat_ll_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , - "($src1.L, $src2.L):<<1:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_acc_sat_hl_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , - "($src1.H, $src2.L):<<1:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_acc_sat_ll<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , - "($src1.L, $src2.L):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_acc_sat_hl<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , - "($src1.H, $src2.L):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_ll<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.L)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_ll_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1.L, $src2.L):<<1")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_hh_sat<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1.H, $src2.H):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_hh_s1_sat<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1.H, $src2.H):<<1:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_hl_sat<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1.H, $src2.L):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_hl_s1_sat<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1.H, $src2.L):<<1:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_lh_sat<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1.L, $src2.H):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_lh_s1_sat<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1.L, $src2.H):<<1:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_ll_sat<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1.L, $src2.L):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_ll_s1_sat<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1.L, $src2.L):<<1:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_ALU32_sisi<string opc, Intrinsic IntID> - : ALU32_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_sisi<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_sisi_sat<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_sisi_sat_conj<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2*):sat")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_sisi_s1_sat<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):<<1:sat")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_didi_s1_sat<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):<<1:sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, - DoubleRegs:$src2))]>; - -class si_MInst_didi_s1_rnd_sat<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1, $src2):<<1:rnd:sat")), - [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; - -class si_MInst_didi_rnd_sat<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd:sat")), - [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; - -class si_MInst_sisi_sat_hh<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_sat_hh_s1<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1.H, $src2.H):<<1:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_sat_hl<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_sat_hl_s1<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1.H, $src2.L):<<1:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_sat_lh<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_sat_lh_s1<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1.L, $src2.H):<<1:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_sat_ll<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_sat_ll_s1<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1.L, $src2.L):<<1:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_sat_rnd_hh<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1.H, $src2.H):rnd:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_rnd_hh<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1.H, $src2.H):rnd")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_rnd_hh_s1<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1.H, $src2.H):<<1:rnd")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_sat_rnd_hh_s1<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", - !strconcat(opc , - "($src1.H, $src2.H):<<1:rnd:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_rnd_hl<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", - !strconcat(opc , "($src1.H, $src2.L):rnd")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_rnd_hl_s1<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", - !strconcat(opc , "($src1.H, $src2.L):<<1:rnd")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_sat_rnd_hl<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", - !strconcat(opc , "($src1.H, $src2.L):rnd:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_sat_rnd_hl_s1<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", - !strconcat(opc , "($src1.H, $src2.L):<<1:rnd:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_rnd_lh<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", - !strconcat(opc , "($src1.L, $src2.H):rnd")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_sat_rnd_lh<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", - !strconcat(opc , "($src1.L, $src2.H):rnd:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_sat_rnd_lh_s1<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", - !strconcat(opc , "($src1.L, $src2.H):<<1:rnd:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_rnd_lh_s1<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", - !strconcat(opc , "($src1.L, $src2.H):<<1:rnd")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_sat_rnd_ll<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", - !strconcat(opc , "($src1.L, $src2.L):rnd:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_sat_rnd_ll_s1<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", - !strconcat(opc , "($src1.L, $src2.L):<<1:rnd:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_rnd_ll<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", - !strconcat(opc , "($src1.L, $src2.L):rnd")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_rnd_ll_s1<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", - !strconcat(opc , "($src1.L, $src2.L):<<1:rnd")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_dididi_acc_sat<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, - DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , "($src1, $src2):sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, - DoubleRegs:$src1, - DoubleRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_dididi_acc_rnd_sat<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, - DoubleRegs:$src2), - !strconcat("$dst += ", - !strconcat(opc , "($src1, $src2):rnd:sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, - DoubleRegs:$src1, - DoubleRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_dididi_acc_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, - DoubleRegs:$src1, - DoubleRegs:$src2), - !strconcat("$dst += ", - !strconcat(opc , "($src1, $src2):<<1")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, - DoubleRegs:$src1, - DoubleRegs:$src2))], - "$dst2 = $dst">; - - -class di_MInst_dididi_acc_s1_sat<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, - DoubleRegs:$src1, - DoubleRegs:$src2), - !strconcat("$dst += ", - !strconcat(opc , "($src1, $src2):<<1:sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, - DoubleRegs:$src1, - DoubleRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_dididi_acc_s1_rnd_sat<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, - DoubleRegs:$src2), - !strconcat("$dst += ", - !strconcat(opc , "($src1, $src2):<<1:rnd:sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, - DoubleRegs:$src1, - DoubleRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_dididi_acc<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, - DoubleRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, - DoubleRegs:$src1, - DoubleRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_dididi_acc_conj<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, - DoubleRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , "($src1, $src2*)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, - DoubleRegs:$src1, - DoubleRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_acc_hh<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.H)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_acc_hl<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.L)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_acc_lh<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.H)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_acc_ll<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.L)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_acc_hh_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", - !strconcat(opc , "($src1.H, $src2.H):<<1")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_acc_hl_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", - !strconcat(opc , "($src1.H, $src2.L):<<1")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_acc_lh_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", - !strconcat(opc , "($src1.L, $src2.H):<<1")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_acc_ll_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", - !strconcat(opc , "($src1.L, $src2.L):<<1")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_nac_hh<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.H)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_nac_hl<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.L)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_nac_lh<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.H)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_nac_ll<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.L)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_nac_hh_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", - !strconcat(opc , "($src1.H, $src2.H):<<1")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_nac_hl_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", - !strconcat(opc , "($src1.H, $src2.L):<<1")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_nac_lh_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", - !strconcat(opc , "($src1.L, $src2.H):<<1")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_nac_ll_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", - !strconcat(opc , "($src1.L, $src2.L):<<1")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_acc_s1_sat<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", - !strconcat(opc , "($src1, $src2):<<1:sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disi_s1_sat<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):<<1:sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_didisi_acc_s1_sat<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", - !strconcat(opc , "($src1, $src2):<<1:sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, - DoubleRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_disi_s1_rnd_sat<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", - !strconcat(opc , "($src1, $src2):<<1:rnd:sat")), - [(set IntRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_didi<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; - - -class T_RI_pat <InstHexagon MI, Intrinsic IntID> - : Pat<(IntID (i32 IntRegs:$Rs), imm:$It), - (MI IntRegs:$Rs, imm:$It)>; +class T_IRI_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID imm:$It, I32:$Rs, imm:$Iu), + (MI imm:$It, I32:$Rs, imm:$Iu)>; -// -// LDInst classes. -// -let mayLoad = 1, hasSideEffects = 0 in -class di_LDInstPI_diu4<string opc, Intrinsic IntID> - : LDInstPI<(outs IntRegs:$dst, DoubleRegs:$dst2), - (ins IntRegs:$src1, IntRegs:$src2, CtrRegs:$src3, s4Imm:$offset), - "$dst2 = memd($src1++#$offset:circ($src3))", - [], - "$src1 = $dst">; +class T_IRR_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID imm:$Is, I32:$Rs, I32:$Rt), + (MI imm:$Is, I32:$Rs, I32:$Rt)>; -/******************************************************************** -* ALU32/ALU * -*********************************************************************/ +class T_RIR_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I32:$Rs, imm:$Is, I32:$Rt), + (MI I32:$Rs, imm:$Is, I32:$Rt)>; -// ALU32 / ALU / Add. -def HEXAGON_A2_add: - si_ALU32_sisi <"add", int_hexagon_A2_add>; -def HEXAGON_A2_addi: - si_ALU32_sis16 <"add", int_hexagon_A2_addi>; - -// ALU32 / ALU / Logical operations. -def HEXAGON_A2_and: - si_ALU32_sisi <"and", int_hexagon_A2_and>; -def HEXAGON_A2_andir: - si_ALU32_sis10 <"and", int_hexagon_A2_andir>; -def HEXAGON_A2_not: - si_ALU32_si <"not", int_hexagon_A2_not>; -def HEXAGON_A2_or: - si_ALU32_sisi <"or", int_hexagon_A2_or>; -def HEXAGON_A2_orir: - si_ALU32_sis10 <"or", int_hexagon_A2_orir>; -def HEXAGON_A2_xor: - si_ALU32_sisi <"xor", int_hexagon_A2_xor>; - -// ALU32 / ALU / Negate. -def HEXAGON_A2_neg: - si_ALU32_si <"neg", int_hexagon_A2_neg>; - -// ALU32 / ALU / Subtract. -def HEXAGON_A2_sub: - si_ALU32_sisi <"sub", int_hexagon_A2_sub>; -def HEXAGON_A2_subri: - si_ALU32_s10si <"sub", int_hexagon_A2_subri>; - -// ALU32 / ALU / Transfer Immediate. -def HEXAGON_A2_tfril: - si_lo_ALU32_siu16 <"", int_hexagon_A2_tfril>; -def HEXAGON_A2_tfrih: - si_hi_ALU32_siu16 <"", int_hexagon_A2_tfrih>; -def HEXAGON_A2_tfrsi: - si_ALU32_s16 <"", int_hexagon_A2_tfrsi>; -def HEXAGON_A2_tfrpi: - di_ALU32_s8 <"", int_hexagon_A2_tfrpi>; - -// ALU32 / ALU / Transfer Register. -def HEXAGON_A2_tfr: - si_ALU32_si_tfr <"", int_hexagon_A2_tfr>; +class T_RRR_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I32:$Rs, I32:$Rt, I32:$Ru), + (MI I32:$Rs, I32:$Rt, I32:$Ru)>; -/******************************************************************** -* ALU32/PERM * -*********************************************************************/ +class T_PPI_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I64:$Rs, I64:$Rt, imm:$Iu), + (MI DoubleRegs:$Rs, DoubleRegs:$Rt, imm:$Iu)>; -// ALU32 / PERM / Combine. -def HEXAGON_A2_combinew: - di_ALU32_sisi <"combine", int_hexagon_A2_combinew>; -def HEXAGON_A2_combine_hh: - si_MInst_sisi_hh <"combine", int_hexagon_A2_combine_hh>; -def HEXAGON_A2_combine_lh: - si_MInst_sisi_lh <"combine", int_hexagon_A2_combine_lh>; -def HEXAGON_A2_combine_hl: - si_MInst_sisi_hl <"combine", int_hexagon_A2_combine_hl>; -def HEXAGON_A2_combine_ll: - si_MInst_sisi_ll <"combine", int_hexagon_A2_combine_ll>; -def HEXAGON_A2_combineii: - di_MInst_s8s8 <"combine", int_hexagon_A2_combineii>; - -// ALU32 / PERM / Mux. -def HEXAGON_C2_mux: - si_ALU32_qisisi <"mux", int_hexagon_C2_mux>; -def HEXAGON_C2_muxri: - si_ALU32_qis8si <"mux", int_hexagon_C2_muxri>; -def HEXAGON_C2_muxir: - si_ALU32_qisis8 <"mux", int_hexagon_C2_muxir>; -def HEXAGON_C2_muxii: - si_ALU32_qis8s8 <"mux", int_hexagon_C2_muxii>; - -// ALU32 / PERM / Shift halfword. -def HEXAGON_A2_aslh: - si_ALU32_si <"aslh", int_hexagon_A2_aslh>; -def HEXAGON_A2_asrh: - si_ALU32_si <"asrh", int_hexagon_A2_asrh>; -def SI_to_SXTHI_asrh: - si_ALU32_si <"asrh", int_hexagon_SI_to_SXTHI_asrh>; - -// ALU32 / PERM / Sign/zero extend. -def HEXAGON_A2_sxth: - si_ALU32_si <"sxth", int_hexagon_A2_sxth>; -def HEXAGON_A2_sxtb: - si_ALU32_si <"sxtb", int_hexagon_A2_sxtb>; -def HEXAGON_A2_zxth: - si_ALU32_si <"zxth", int_hexagon_A2_zxth>; -def HEXAGON_A2_zxtb: - si_ALU32_si <"zxtb", int_hexagon_A2_zxtb>; +class T_PII_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I64:$Rs, imm:$It, imm:$Iu), + (MI DoubleRegs:$Rs, imm:$It, imm:$Iu)>; -/******************************************************************** -* ALU32/PRED * -*********************************************************************/ +class T_PPP_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I64:$Rs, I64:$Rt, I64:$Ru), + (MI DoubleRegs:$Rs, DoubleRegs:$Rt, DoubleRegs:$Ru)>; -// ALU32 / PRED / Compare. -def HEXAGON_C2_cmpeq: - qi_ALU32_sisi <"cmp.eq", int_hexagon_C2_cmpeq>; -def HEXAGON_C2_cmpeqi: - qi_ALU32_sis10 <"cmp.eq", int_hexagon_C2_cmpeqi>; -def HEXAGON_C2_cmpgei: - qi_ALU32_sis8 <"cmp.ge", int_hexagon_C2_cmpgei>; -def HEXAGON_C2_cmpgeui: - qi_ALU32_siu8 <"cmp.geu", int_hexagon_C2_cmpgeui>; -def HEXAGON_C2_cmpgt: - qi_ALU32_sisi <"cmp.gt", int_hexagon_C2_cmpgt>; -def HEXAGON_C2_cmpgti: - qi_ALU32_sis10 <"cmp.gt", int_hexagon_C2_cmpgti>; -def HEXAGON_C2_cmpgtu: - qi_ALU32_sisi <"cmp.gtu", int_hexagon_C2_cmpgtu>; -def HEXAGON_C2_cmpgtui: - qi_ALU32_siu9 <"cmp.gtu", int_hexagon_C2_cmpgtui>; -def HEXAGON_C2_cmplt: - qi_ALU32_sisi <"cmp.lt", int_hexagon_C2_cmplt>; -def HEXAGON_C2_cmpltu: - qi_ALU32_sisi <"cmp.ltu", int_hexagon_C2_cmpltu>; +class T_PPR_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I64:$Rs, I64:$Rt, I32:$Ru), + (MI DoubleRegs:$Rs, DoubleRegs:$Rt, I32:$Ru)>; -/******************************************************************** -* ALU32/VH * -*********************************************************************/ +class T_PRR_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I64:$Rs, I32:$Rt, I32:$Ru), + (MI DoubleRegs:$Rs, I32:$Rt, I32:$Ru)>; -// ALU32 / VH / Vector add halfwords. -// Rd32=vadd[u]h(Rs32,Rt32:sat] -def HEXAGON_A2_svaddh: - si_ALU32_sisi <"vaddh", int_hexagon_A2_svaddh>; -def HEXAGON_A2_svaddhs: - si_ALU32_sisi_sat <"vaddh", int_hexagon_A2_svaddhs>; -def HEXAGON_A2_svadduhs: - si_ALU32_sisi_sat <"vadduh", int_hexagon_A2_svadduhs>; - -// ALU32 / VH / Vector average halfwords. -def HEXAGON_A2_svavgh: - si_ALU32_sisi <"vavgh", int_hexagon_A2_svavgh>; -def HEXAGON_A2_svavghs: - si_ALU32_sisi_rnd <"vavgh", int_hexagon_A2_svavghs>; -def HEXAGON_A2_svnavgh: - si_ALU32_sisi <"vnavgh", int_hexagon_A2_svnavgh>; - -// ALU32 / VH / Vector subtract halfwords. -def HEXAGON_A2_svsubh: - si_ALU32_sisi <"vsubh", int_hexagon_A2_svsubh>; -def HEXAGON_A2_svsubhs: - si_ALU32_sisi_sat <"vsubh", int_hexagon_A2_svsubhs>; -def HEXAGON_A2_svsubuhs: - si_ALU32_sisi_sat <"vsubuh", int_hexagon_A2_svsubuhs>; +class T_PPQ_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I64:$Rs, I64:$Rt, (i32 PredRegs:$Ru)), + (MI DoubleRegs:$Rs, DoubleRegs:$Rt, PredRegs:$Ru)>; -/******************************************************************** -* ALU64/ALU * -*********************************************************************/ +class T_PR_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I64:$Rs, I32:$Rt), + (MI DoubleRegs:$Rs, I32:$Rt)>; -// ALU64 / ALU / Add. -def HEXAGON_A2_addp: - di_ALU64_didi <"add", int_hexagon_A2_addp>; -def HEXAGON_A2_addsat: - si_ALU64_sisi_sat <"add", int_hexagon_A2_addsat>; - -// ALU64 / ALU / Add halfword. -// Even though the definition says hl, it should be lh - -//so DON'T change the class " si_ALU64_sisi_l16_lh " it inherits. -def HEXAGON_A2_addh_l16_hl: - si_ALU64_sisi_l16_lh <"add", int_hexagon_A2_addh_l16_hl>; -def HEXAGON_A2_addh_l16_ll: - si_ALU64_sisi_l16_ll <"add", int_hexagon_A2_addh_l16_ll>; - -def HEXAGON_A2_addh_l16_sat_hl: - si_ALU64_sisi_l16_sat_lh <"add", int_hexagon_A2_addh_l16_sat_hl>; -def HEXAGON_A2_addh_l16_sat_ll: - si_ALU64_sisi_l16_sat_ll <"add", int_hexagon_A2_addh_l16_sat_ll>; - -def HEXAGON_A2_addh_h16_hh: - si_ALU64_sisi_h16_hh <"add", int_hexagon_A2_addh_h16_hh>; -def HEXAGON_A2_addh_h16_hl: - si_ALU64_sisi_h16_hl <"add", int_hexagon_A2_addh_h16_hl>; -def HEXAGON_A2_addh_h16_lh: - si_ALU64_sisi_h16_lh <"add", int_hexagon_A2_addh_h16_lh>; -def HEXAGON_A2_addh_h16_ll: - si_ALU64_sisi_h16_ll <"add", int_hexagon_A2_addh_h16_ll>; - -def HEXAGON_A2_addh_h16_sat_hh: - si_ALU64_sisi_h16_sat_hh <"add", int_hexagon_A2_addh_h16_sat_hh>; -def HEXAGON_A2_addh_h16_sat_hl: - si_ALU64_sisi_h16_sat_hl <"add", int_hexagon_A2_addh_h16_sat_hl>; -def HEXAGON_A2_addh_h16_sat_lh: - si_ALU64_sisi_h16_sat_lh <"add", int_hexagon_A2_addh_h16_sat_lh>; -def HEXAGON_A2_addh_h16_sat_ll: - si_ALU64_sisi_h16_sat_ll <"add", int_hexagon_A2_addh_h16_sat_ll>; - -// ALU64 / ALU / Compare. -def HEXAGON_C2_cmpeqp: - qi_ALU64_didi <"cmp.eq", int_hexagon_C2_cmpeqp>; -def HEXAGON_C2_cmpgtp: - qi_ALU64_didi <"cmp.gt", int_hexagon_C2_cmpgtp>; -def HEXAGON_C2_cmpgtup: - qi_ALU64_didi <"cmp.gtu", int_hexagon_C2_cmpgtup>; - -// ALU64 / ALU / Logical operations. -def HEXAGON_A2_andp: - di_ALU64_didi <"and", int_hexagon_A2_andp>; -def HEXAGON_A2_orp: - di_ALU64_didi <"or", int_hexagon_A2_orp>; -def HEXAGON_A2_xorp: - di_ALU64_didi <"xor", int_hexagon_A2_xorp>; - -// ALU64 / ALU / Maximum. -def HEXAGON_A2_max: - si_ALU64_sisi <"max", int_hexagon_A2_max>; -def HEXAGON_A2_maxu: - si_ALU64_sisi <"maxu", int_hexagon_A2_maxu>; - -// ALU64 / ALU / Minimum. -def HEXAGON_A2_min: - si_ALU64_sisi <"min", int_hexagon_A2_min>; -def HEXAGON_A2_minu: - si_ALU64_sisi <"minu", int_hexagon_A2_minu>; - -// ALU64 / ALU / Subtract. -def HEXAGON_A2_subp: - di_ALU64_didi <"sub", int_hexagon_A2_subp>; -def HEXAGON_A2_subsat: - si_ALU64_sisi_sat <"sub", int_hexagon_A2_subsat>; - -// ALU64 / ALU / Subtract halfword. -// Even though the definition says hl, it should be lh - -//so DON'T change the class " si_ALU64_sisi_l16_lh " it inherits. -def HEXAGON_A2_subh_l16_hl: - si_ALU64_sisi_l16_lh <"sub", int_hexagon_A2_subh_l16_hl>; -def HEXAGON_A2_subh_l16_ll: - si_ALU64_sisi_l16_ll <"sub", int_hexagon_A2_subh_l16_ll>; - -def HEXAGON_A2_subh_l16_sat_hl: - si_ALU64_sisi_l16_sat_lh <"sub", int_hexagon_A2_subh_l16_sat_hl>; -def HEXAGON_A2_subh_l16_sat_ll: - si_ALU64_sisi_l16_sat_ll <"sub", int_hexagon_A2_subh_l16_sat_ll>; - -def HEXAGON_A2_subh_h16_hh: - si_ALU64_sisi_h16_hh <"sub", int_hexagon_A2_subh_h16_hh>; -def HEXAGON_A2_subh_h16_hl: - si_ALU64_sisi_h16_hl <"sub", int_hexagon_A2_subh_h16_hl>; -def HEXAGON_A2_subh_h16_lh: - si_ALU64_sisi_h16_lh <"sub", int_hexagon_A2_subh_h16_lh>; -def HEXAGON_A2_subh_h16_ll: - si_ALU64_sisi_h16_ll <"sub", int_hexagon_A2_subh_h16_ll>; - -def HEXAGON_A2_subh_h16_sat_hh: - si_ALU64_sisi_h16_sat_hh <"sub", int_hexagon_A2_subh_h16_sat_hh>; -def HEXAGON_A2_subh_h16_sat_hl: - si_ALU64_sisi_h16_sat_hl <"sub", int_hexagon_A2_subh_h16_sat_hl>; -def HEXAGON_A2_subh_h16_sat_lh: - si_ALU64_sisi_h16_sat_lh <"sub", int_hexagon_A2_subh_h16_sat_lh>; -def HEXAGON_A2_subh_h16_sat_ll: - si_ALU64_sisi_h16_sat_ll <"sub", int_hexagon_A2_subh_h16_sat_ll>; - -// ALU64 / ALU / Transfer register. -def HEXAGON_A2_tfrp: - di_ALU64_di <"", int_hexagon_A2_tfrp>; +class T_D_pat <InstHexagon MI, Intrinsic IntID> + : Pat<(IntID (F64:$Rs)), + (MI (F64:$Rs))>; -/******************************************************************** -* ALU64/BIT * -*********************************************************************/ +class T_DI_pat <InstHexagon MI, Intrinsic IntID, + PatLeaf ImmPred = PatLeaf<(i32 imm)>> + : Pat<(IntID F64:$Rs, ImmPred:$It), + (MI F64:$Rs, ImmPred:$It)>; -// ALU64 / BIT / Masked parity. -def HEXAGON_S2_parityp: - si_ALU64_didi <"parity", int_hexagon_S2_parityp>; +class T_F_pat <InstHexagon MI, Intrinsic IntID> + : Pat<(IntID F32:$Rs), + (MI F32:$Rs)>; -/******************************************************************** -* ALU64/PERM * -*********************************************************************/ +class T_FI_pat <InstHexagon MI, Intrinsic IntID, + PatLeaf ImmPred = PatLeaf<(i32 imm)>> + : Pat<(IntID F32:$Rs, ImmPred:$It), + (MI F32:$Rs, ImmPred:$It)>; -// ALU64 / PERM / Vector pack high and low halfwords. -def HEXAGON_S2_packhl: - di_ALU64_sisi <"packhl", int_hexagon_S2_packhl>; +class T_FF_pat <InstHexagon MI, Intrinsic IntID> + : Pat<(IntID F32:$Rs, F32:$Rt), + (MI F32:$Rs, F32:$Rt)>; -/******************************************************************** -* ALU64/VB * -*********************************************************************/ +class T_DD_pat <InstHexagon MI, Intrinsic IntID> + : Pat<(IntID F64:$Rs, F64:$Rt), + (MI F64:$Rs, F64:$Rt)>; -// ALU64 / VB / Vector add unsigned bytes. -def HEXAGON_A2_vaddub: - di_ALU64_didi <"vaddub", int_hexagon_A2_vaddub>; -def HEXAGON_A2_vaddubs: - di_ALU64_didi_sat <"vaddub", int_hexagon_A2_vaddubs>; - -// ALU64 / VB / Vector average unsigned bytes. -def HEXAGON_A2_vavgub: - di_ALU64_didi <"vavgub", int_hexagon_A2_vavgub>; -def HEXAGON_A2_vavgubr: - di_ALU64_didi_rnd <"vavgub", int_hexagon_A2_vavgubr>; - -// ALU64 / VB / Vector compare unsigned bytes. -def HEXAGON_A2_vcmpbeq: - qi_ALU64_didi <"vcmpb.eq", int_hexagon_A2_vcmpbeq>; -def HEXAGON_A2_vcmpbgtu: - qi_ALU64_didi <"vcmpb.gtu",int_hexagon_A2_vcmpbgtu>; - -// ALU64 / VB / Vector maximum/minimum unsigned bytes. -def HEXAGON_A2_vmaxub: - di_ALU64_didi <"vmaxub", int_hexagon_A2_vmaxub>; -def HEXAGON_A2_vminub: - di_ALU64_didi <"vminub", int_hexagon_A2_vminub>; - -// ALU64 / VB / Vector subtract unsigned bytes. -def HEXAGON_A2_vsubub: - di_ALU64_didi <"vsubub", int_hexagon_A2_vsubub>; -def HEXAGON_A2_vsububs: - di_ALU64_didi_sat <"vsubub", int_hexagon_A2_vsububs>; +class T_FFF_pat <InstHexagon MI, Intrinsic IntID> + : Pat<(IntID F32:$Rs, F32:$Rt, F32:$Ru), + (MI F32:$Rs, F32:$Rt, F32:$Ru)>; -// ALU64 / VB / Vector mux. -def HEXAGON_C2_vmux: - di_ALU64_qididi <"vmux", int_hexagon_C2_vmux>; +class T_FFFQ_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID F32:$Rs, F32:$Rt, F32:$Ru, (i32 PredRegs:$Rx)), + (MI F32:$Rs, F32:$Rt, F32:$Ru, PredRegs:$Rx)>; +//===----------------------------------------------------------------------===// +// MPYS / Multipy signed/unsigned halfwords +//Rd=mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:rnd][:sat] +//===----------------------------------------------------------------------===// -/******************************************************************** -* ALU64/VH * -*********************************************************************/ +def : T_RR_pat <M2_mpy_ll_s1, int_hexagon_M2_mpy_ll_s1>; +def : T_RR_pat <M2_mpy_ll_s0, int_hexagon_M2_mpy_ll_s0>; +def : T_RR_pat <M2_mpy_lh_s1, int_hexagon_M2_mpy_lh_s1>; +def : T_RR_pat <M2_mpy_lh_s0, int_hexagon_M2_mpy_lh_s0>; +def : T_RR_pat <M2_mpy_hl_s1, int_hexagon_M2_mpy_hl_s1>; +def : T_RR_pat <M2_mpy_hl_s0, int_hexagon_M2_mpy_hl_s0>; +def : T_RR_pat <M2_mpy_hh_s1, int_hexagon_M2_mpy_hh_s1>; +def : T_RR_pat <M2_mpy_hh_s0, int_hexagon_M2_mpy_hh_s0>; + +def : T_RR_pat <M2_mpyu_ll_s1, int_hexagon_M2_mpyu_ll_s1>; +def : T_RR_pat <M2_mpyu_ll_s0, int_hexagon_M2_mpyu_ll_s0>; +def : T_RR_pat <M2_mpyu_lh_s1, int_hexagon_M2_mpyu_lh_s1>; +def : T_RR_pat <M2_mpyu_lh_s0, int_hexagon_M2_mpyu_lh_s0>; +def : T_RR_pat <M2_mpyu_hl_s1, int_hexagon_M2_mpyu_hl_s1>; +def : T_RR_pat <M2_mpyu_hl_s0, int_hexagon_M2_mpyu_hl_s0>; +def : T_RR_pat <M2_mpyu_hh_s1, int_hexagon_M2_mpyu_hh_s1>; +def : T_RR_pat <M2_mpyu_hh_s0, int_hexagon_M2_mpyu_hh_s0>; + +def : T_RR_pat <M2_mpy_sat_ll_s1, int_hexagon_M2_mpy_sat_ll_s1>; +def : T_RR_pat <M2_mpy_sat_ll_s0, int_hexagon_M2_mpy_sat_ll_s0>; +def : T_RR_pat <M2_mpy_sat_lh_s1, int_hexagon_M2_mpy_sat_lh_s1>; +def : T_RR_pat <M2_mpy_sat_lh_s0, int_hexagon_M2_mpy_sat_lh_s0>; +def : T_RR_pat <M2_mpy_sat_hl_s1, int_hexagon_M2_mpy_sat_hl_s1>; +def : T_RR_pat <M2_mpy_sat_hl_s0, int_hexagon_M2_mpy_sat_hl_s0>; +def : T_RR_pat <M2_mpy_sat_hh_s1, int_hexagon_M2_mpy_sat_hh_s1>; +def : T_RR_pat <M2_mpy_sat_hh_s0, int_hexagon_M2_mpy_sat_hh_s0>; + +def : T_RR_pat <M2_mpy_rnd_ll_s1, int_hexagon_M2_mpy_rnd_ll_s1>; +def : T_RR_pat <M2_mpy_rnd_ll_s0, int_hexagon_M2_mpy_rnd_ll_s0>; +def : T_RR_pat <M2_mpy_rnd_lh_s1, int_hexagon_M2_mpy_rnd_lh_s1>; +def : T_RR_pat <M2_mpy_rnd_lh_s0, int_hexagon_M2_mpy_rnd_lh_s0>; +def : T_RR_pat <M2_mpy_rnd_hl_s1, int_hexagon_M2_mpy_rnd_hl_s1>; +def : T_RR_pat <M2_mpy_rnd_hl_s0, int_hexagon_M2_mpy_rnd_hl_s0>; +def : T_RR_pat <M2_mpy_rnd_hh_s1, int_hexagon_M2_mpy_rnd_hh_s1>; +def : T_RR_pat <M2_mpy_rnd_hh_s0, int_hexagon_M2_mpy_rnd_hh_s0>; + +def : T_RR_pat <M2_mpy_sat_rnd_ll_s1, int_hexagon_M2_mpy_sat_rnd_ll_s1>; +def : T_RR_pat <M2_mpy_sat_rnd_ll_s0, int_hexagon_M2_mpy_sat_rnd_ll_s0>; +def : T_RR_pat <M2_mpy_sat_rnd_lh_s1, int_hexagon_M2_mpy_sat_rnd_lh_s1>; +def : T_RR_pat <M2_mpy_sat_rnd_lh_s0, int_hexagon_M2_mpy_sat_rnd_lh_s0>; +def : T_RR_pat <M2_mpy_sat_rnd_hl_s1, int_hexagon_M2_mpy_sat_rnd_hl_s1>; +def : T_RR_pat <M2_mpy_sat_rnd_hl_s0, int_hexagon_M2_mpy_sat_rnd_hl_s0>; +def : T_RR_pat <M2_mpy_sat_rnd_hh_s1, int_hexagon_M2_mpy_sat_rnd_hh_s1>; +def : T_RR_pat <M2_mpy_sat_rnd_hh_s0, int_hexagon_M2_mpy_sat_rnd_hh_s0>; -// ALU64 / VH / Vector add halfwords. -// Rdd64=vadd[u]h(Rss64,Rtt64:sat] -def HEXAGON_A2_vaddh: - di_ALU64_didi <"vaddh", int_hexagon_A2_vaddh>; -def HEXAGON_A2_vaddhs: - di_ALU64_didi_sat <"vaddh", int_hexagon_A2_vaddhs>; -def HEXAGON_A2_vadduhs: - di_ALU64_didi_sat <"vadduh", int_hexagon_A2_vadduhs>; - -// ALU64 / VH / Vector average halfwords. -// Rdd64=v[n]avg[u]h(Rss64,Rtt64:rnd/:crnd][:sat] -def HEXAGON_A2_vavgh: - di_ALU64_didi <"vavgh", int_hexagon_A2_vavgh>; -def HEXAGON_A2_vavghcr: - di_ALU64_didi_crnd <"vavgh", int_hexagon_A2_vavghcr>; -def HEXAGON_A2_vavghr: - di_ALU64_didi_rnd <"vavgh", int_hexagon_A2_vavghr>; -def HEXAGON_A2_vavguh: - di_ALU64_didi <"vavguh", int_hexagon_A2_vavguh>; -def HEXAGON_A2_vavguhr: - di_ALU64_didi_rnd <"vavguh", int_hexagon_A2_vavguhr>; -def HEXAGON_A2_vnavgh: - di_ALU64_didi <"vnavgh", int_hexagon_A2_vnavgh>; -def HEXAGON_A2_vnavghcr: - di_ALU64_didi_crnd_sat <"vnavgh", int_hexagon_A2_vnavghcr>; -def HEXAGON_A2_vnavghr: - di_ALU64_didi_rnd_sat <"vnavgh", int_hexagon_A2_vnavghr>; - -// ALU64 / VH / Vector compare halfwords. -def HEXAGON_A2_vcmpheq: - qi_ALU64_didi <"vcmph.eq", int_hexagon_A2_vcmpheq>; -def HEXAGON_A2_vcmphgt: - qi_ALU64_didi <"vcmph.gt", int_hexagon_A2_vcmphgt>; -def HEXAGON_A2_vcmphgtu: - qi_ALU64_didi <"vcmph.gtu",int_hexagon_A2_vcmphgtu>; - -// ALU64 / VH / Vector maximum halfwords. -def HEXAGON_A2_vmaxh: - di_ALU64_didi <"vmaxh", int_hexagon_A2_vmaxh>; -def HEXAGON_A2_vmaxuh: - di_ALU64_didi <"vmaxuh", int_hexagon_A2_vmaxuh>; - -// ALU64 / VH / Vector minimum halfwords. -def HEXAGON_A2_vminh: - di_ALU64_didi <"vminh", int_hexagon_A2_vminh>; -def HEXAGON_A2_vminuh: - di_ALU64_didi <"vminuh", int_hexagon_A2_vminuh>; - -// ALU64 / VH / Vector subtract halfwords. -def HEXAGON_A2_vsubh: - di_ALU64_didi <"vsubh", int_hexagon_A2_vsubh>; -def HEXAGON_A2_vsubhs: - di_ALU64_didi_sat <"vsubh", int_hexagon_A2_vsubhs>; -def HEXAGON_A2_vsubuhs: - di_ALU64_didi_sat <"vsubuh", int_hexagon_A2_vsubuhs>; +//===----------------------------------------------------------------------===// +// MPYS / Multipy signed/unsigned halfwords and add/subtract the +// result from the accumulator. +//Rx [-+]= mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:sat] +//===----------------------------------------------------------------------===// -/******************************************************************** -* ALU64/VW * -*********************************************************************/ +def : T_RRR_pat <M2_mpy_acc_ll_s1, int_hexagon_M2_mpy_acc_ll_s1>; +def : T_RRR_pat <M2_mpy_acc_ll_s0, int_hexagon_M2_mpy_acc_ll_s0>; +def : T_RRR_pat <M2_mpy_acc_lh_s1, int_hexagon_M2_mpy_acc_lh_s1>; +def : T_RRR_pat <M2_mpy_acc_lh_s0, int_hexagon_M2_mpy_acc_lh_s0>; +def : T_RRR_pat <M2_mpy_acc_hl_s1, int_hexagon_M2_mpy_acc_hl_s1>; +def : T_RRR_pat <M2_mpy_acc_hl_s0, int_hexagon_M2_mpy_acc_hl_s0>; +def : T_RRR_pat <M2_mpy_acc_hh_s1, int_hexagon_M2_mpy_acc_hh_s1>; +def : T_RRR_pat <M2_mpy_acc_hh_s0, int_hexagon_M2_mpy_acc_hh_s0>; + +def : T_RRR_pat <M2_mpyu_acc_ll_s1, int_hexagon_M2_mpyu_acc_ll_s1>; +def : T_RRR_pat <M2_mpyu_acc_ll_s0, int_hexagon_M2_mpyu_acc_ll_s0>; +def : T_RRR_pat <M2_mpyu_acc_lh_s1, int_hexagon_M2_mpyu_acc_lh_s1>; +def : T_RRR_pat <M2_mpyu_acc_lh_s0, int_hexagon_M2_mpyu_acc_lh_s0>; +def : T_RRR_pat <M2_mpyu_acc_hl_s1, int_hexagon_M2_mpyu_acc_hl_s1>; +def : T_RRR_pat <M2_mpyu_acc_hl_s0, int_hexagon_M2_mpyu_acc_hl_s0>; +def : T_RRR_pat <M2_mpyu_acc_hh_s1, int_hexagon_M2_mpyu_acc_hh_s1>; +def : T_RRR_pat <M2_mpyu_acc_hh_s0, int_hexagon_M2_mpyu_acc_hh_s0>; + +def : T_RRR_pat <M2_mpy_nac_ll_s1, int_hexagon_M2_mpy_nac_ll_s1>; +def : T_RRR_pat <M2_mpy_nac_ll_s0, int_hexagon_M2_mpy_nac_ll_s0>; +def : T_RRR_pat <M2_mpy_nac_lh_s1, int_hexagon_M2_mpy_nac_lh_s1>; +def : T_RRR_pat <M2_mpy_nac_lh_s0, int_hexagon_M2_mpy_nac_lh_s0>; +def : T_RRR_pat <M2_mpy_nac_hl_s1, int_hexagon_M2_mpy_nac_hl_s1>; +def : T_RRR_pat <M2_mpy_nac_hl_s0, int_hexagon_M2_mpy_nac_hl_s0>; +def : T_RRR_pat <M2_mpy_nac_hh_s1, int_hexagon_M2_mpy_nac_hh_s1>; +def : T_RRR_pat <M2_mpy_nac_hh_s0, int_hexagon_M2_mpy_nac_hh_s0>; + +def : T_RRR_pat <M2_mpyu_nac_ll_s1, int_hexagon_M2_mpyu_nac_ll_s1>; +def : T_RRR_pat <M2_mpyu_nac_ll_s0, int_hexagon_M2_mpyu_nac_ll_s0>; +def : T_RRR_pat <M2_mpyu_nac_lh_s1, int_hexagon_M2_mpyu_nac_lh_s1>; +def : T_RRR_pat <M2_mpyu_nac_lh_s0, int_hexagon_M2_mpyu_nac_lh_s0>; +def : T_RRR_pat <M2_mpyu_nac_hl_s1, int_hexagon_M2_mpyu_nac_hl_s1>; +def : T_RRR_pat <M2_mpyu_nac_hl_s0, int_hexagon_M2_mpyu_nac_hl_s0>; +def : T_RRR_pat <M2_mpyu_nac_hh_s1, int_hexagon_M2_mpyu_nac_hh_s1>; +def : T_RRR_pat <M2_mpyu_nac_hh_s0, int_hexagon_M2_mpyu_nac_hh_s0>; + +def : T_RRR_pat <M2_mpy_acc_sat_ll_s1, int_hexagon_M2_mpy_acc_sat_ll_s1>; +def : T_RRR_pat <M2_mpy_acc_sat_ll_s0, int_hexagon_M2_mpy_acc_sat_ll_s0>; +def : T_RRR_pat <M2_mpy_acc_sat_lh_s1, int_hexagon_M2_mpy_acc_sat_lh_s1>; +def : T_RRR_pat <M2_mpy_acc_sat_lh_s0, int_hexagon_M2_mpy_acc_sat_lh_s0>; +def : T_RRR_pat <M2_mpy_acc_sat_hl_s1, int_hexagon_M2_mpy_acc_sat_hl_s1>; +def : T_RRR_pat <M2_mpy_acc_sat_hl_s0, int_hexagon_M2_mpy_acc_sat_hl_s0>; +def : T_RRR_pat <M2_mpy_acc_sat_hh_s1, int_hexagon_M2_mpy_acc_sat_hh_s1>; +def : T_RRR_pat <M2_mpy_acc_sat_hh_s0, int_hexagon_M2_mpy_acc_sat_hh_s0>; + +def : T_RRR_pat <M2_mpy_nac_sat_ll_s1, int_hexagon_M2_mpy_nac_sat_ll_s1>; +def : T_RRR_pat <M2_mpy_nac_sat_ll_s0, int_hexagon_M2_mpy_nac_sat_ll_s0>; +def : T_RRR_pat <M2_mpy_nac_sat_lh_s1, int_hexagon_M2_mpy_nac_sat_lh_s1>; +def : T_RRR_pat <M2_mpy_nac_sat_lh_s0, int_hexagon_M2_mpy_nac_sat_lh_s0>; +def : T_RRR_pat <M2_mpy_nac_sat_hl_s1, int_hexagon_M2_mpy_nac_sat_hl_s1>; +def : T_RRR_pat <M2_mpy_nac_sat_hl_s0, int_hexagon_M2_mpy_nac_sat_hl_s0>; +def : T_RRR_pat <M2_mpy_nac_sat_hh_s1, int_hexagon_M2_mpy_nac_sat_hh_s1>; +def : T_RRR_pat <M2_mpy_nac_sat_hh_s0, int_hexagon_M2_mpy_nac_sat_hh_s0>; -// ALU64 / VW / Vector add words. -// Rdd32=vaddw(Rss32,Rtt32)[:sat] -def HEXAGON_A2_vaddw: - di_ALU64_didi <"vaddw", int_hexagon_A2_vaddw>; -def HEXAGON_A2_vaddws: - di_ALU64_didi_sat <"vaddw", int_hexagon_A2_vaddws>; - -// ALU64 / VW / Vector average words. -def HEXAGON_A2_vavguw: - di_ALU64_didi <"vavguw", int_hexagon_A2_vavguw>; -def HEXAGON_A2_vavguwr: - di_ALU64_didi_rnd <"vavguw", int_hexagon_A2_vavguwr>; -def HEXAGON_A2_vavgw: - di_ALU64_didi <"vavgw", int_hexagon_A2_vavgw>; -def HEXAGON_A2_vavgwcr: - di_ALU64_didi_crnd <"vavgw", int_hexagon_A2_vavgwcr>; -def HEXAGON_A2_vavgwr: - di_ALU64_didi_rnd <"vavgw", int_hexagon_A2_vavgwr>; -def HEXAGON_A2_vnavgw: - di_ALU64_didi <"vnavgw", int_hexagon_A2_vnavgw>; -def HEXAGON_A2_vnavgwcr: - di_ALU64_didi_crnd_sat <"vnavgw", int_hexagon_A2_vnavgwcr>; -def HEXAGON_A2_vnavgwr: - di_ALU64_didi_rnd_sat <"vnavgw", int_hexagon_A2_vnavgwr>; - -// ALU64 / VW / Vector compare words. -def HEXAGON_A2_vcmpweq: - qi_ALU64_didi <"vcmpw.eq", int_hexagon_A2_vcmpweq>; -def HEXAGON_A2_vcmpwgt: - qi_ALU64_didi <"vcmpw.gt", int_hexagon_A2_vcmpwgt>; -def HEXAGON_A2_vcmpwgtu: - qi_ALU64_didi <"vcmpw.gtu",int_hexagon_A2_vcmpwgtu>; - -// ALU64 / VW / Vector maximum words. -def HEXAGON_A2_vmaxw: - di_ALU64_didi <"vmaxw", int_hexagon_A2_vmaxw>; -def HEXAGON_A2_vmaxuw: - di_ALU64_didi <"vmaxuw", int_hexagon_A2_vmaxuw>; - -// ALU64 / VW / Vector minimum words. -def HEXAGON_A2_vminw: - di_ALU64_didi <"vminw", int_hexagon_A2_vminw>; -def HEXAGON_A2_vminuw: - di_ALU64_didi <"vminuw", int_hexagon_A2_vminuw>; - -// ALU64 / VW / Vector subtract words. -def HEXAGON_A2_vsubw: - di_ALU64_didi <"vsubw", int_hexagon_A2_vsubw>; -def HEXAGON_A2_vsubws: - di_ALU64_didi_sat <"vsubw", int_hexagon_A2_vsubws>; +//===----------------------------------------------------------------------===// +// Multiply signed/unsigned halfwords with and without saturation and rounding +// into a 64-bits destination register. +//===----------------------------------------------------------------------===// -/******************************************************************** -* CR * -*********************************************************************/ +def : T_RR_pat <M2_mpyd_hh_s0, int_hexagon_M2_mpyd_hh_s0>; +def : T_RR_pat <M2_mpyd_hl_s0, int_hexagon_M2_mpyd_hl_s0>; +def : T_RR_pat <M2_mpyd_lh_s0, int_hexagon_M2_mpyd_lh_s0>; +def : T_RR_pat <M2_mpyd_ll_s0, int_hexagon_M2_mpyd_ll_s0>; +def : T_RR_pat <M2_mpyd_hh_s1, int_hexagon_M2_mpyd_hh_s1>; +def : T_RR_pat <M2_mpyd_hl_s1, int_hexagon_M2_mpyd_hl_s1>; +def : T_RR_pat <M2_mpyd_lh_s1, int_hexagon_M2_mpyd_lh_s1>; +def : T_RR_pat <M2_mpyd_ll_s1, int_hexagon_M2_mpyd_ll_s1>; + +def : T_RR_pat <M2_mpyd_rnd_hh_s0, int_hexagon_M2_mpyd_rnd_hh_s0>; +def : T_RR_pat <M2_mpyd_rnd_hl_s0, int_hexagon_M2_mpyd_rnd_hl_s0>; +def : T_RR_pat <M2_mpyd_rnd_lh_s0, int_hexagon_M2_mpyd_rnd_lh_s0>; +def : T_RR_pat <M2_mpyd_rnd_ll_s0, int_hexagon_M2_mpyd_rnd_ll_s0>; +def : T_RR_pat <M2_mpyd_rnd_hh_s1, int_hexagon_M2_mpyd_rnd_hh_s1>; +def : T_RR_pat <M2_mpyd_rnd_hl_s1, int_hexagon_M2_mpyd_rnd_hl_s1>; +def : T_RR_pat <M2_mpyd_rnd_lh_s1, int_hexagon_M2_mpyd_rnd_lh_s1>; +def : T_RR_pat <M2_mpyd_rnd_ll_s1, int_hexagon_M2_mpyd_rnd_ll_s1>; + +def : T_RR_pat <M2_mpyud_hh_s0, int_hexagon_M2_mpyud_hh_s0>; +def : T_RR_pat <M2_mpyud_hl_s0, int_hexagon_M2_mpyud_hl_s0>; +def : T_RR_pat <M2_mpyud_lh_s0, int_hexagon_M2_mpyud_lh_s0>; +def : T_RR_pat <M2_mpyud_ll_s0, int_hexagon_M2_mpyud_ll_s0>; +def : T_RR_pat <M2_mpyud_hh_s1, int_hexagon_M2_mpyud_hh_s1>; +def : T_RR_pat <M2_mpyud_hl_s1, int_hexagon_M2_mpyud_hl_s1>; +def : T_RR_pat <M2_mpyud_lh_s1, int_hexagon_M2_mpyud_lh_s1>; +def : T_RR_pat <M2_mpyud_ll_s1, int_hexagon_M2_mpyud_ll_s1>; + +//===----------------------------------------------------------------------===// +// MPYS / Multipy signed/unsigned halfwords and add/subtract the +// result from the 64-bit destination register. +//Rxx [-+]= mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:sat] +//===----------------------------------------------------------------------===// -// CR / Logical reductions on predicates. -def HEXAGON_C2_all8: - qi_SInst_qi <"all8", int_hexagon_C2_all8>; -def HEXAGON_C2_any8: - qi_SInst_qi <"any8", int_hexagon_C2_any8>; - -// CR / Logical operations on predicates. -def HEXAGON_C2_pxfer_map: - qi_SInst_qi_pxfer <"", int_hexagon_C2_pxfer_map>; -def HEXAGON_C2_and: - qi_SInst_qiqi <"and", int_hexagon_C2_and>; -def HEXAGON_C2_andn: - qi_SInst_qiqi_neg <"and", int_hexagon_C2_andn>; -def HEXAGON_C2_not: - qi_SInst_qi <"not", int_hexagon_C2_not>; -def HEXAGON_C2_or: - qi_SInst_qiqi <"or", int_hexagon_C2_or>; -def HEXAGON_C2_orn: - qi_SInst_qiqi_neg <"or", int_hexagon_C2_orn>; -def HEXAGON_C2_xor: - qi_SInst_qiqi <"xor", int_hexagon_C2_xor>; +def : T_PRR_pat <M2_mpyd_acc_hh_s0, int_hexagon_M2_mpyd_acc_hh_s0>; +def : T_PRR_pat <M2_mpyd_acc_hl_s0, int_hexagon_M2_mpyd_acc_hl_s0>; +def : T_PRR_pat <M2_mpyd_acc_lh_s0, int_hexagon_M2_mpyd_acc_lh_s0>; +def : T_PRR_pat <M2_mpyd_acc_ll_s0, int_hexagon_M2_mpyd_acc_ll_s0>; + +def : T_PRR_pat <M2_mpyd_acc_hh_s1, int_hexagon_M2_mpyd_acc_hh_s1>; +def : T_PRR_pat <M2_mpyd_acc_hl_s1, int_hexagon_M2_mpyd_acc_hl_s1>; +def : T_PRR_pat <M2_mpyd_acc_lh_s1, int_hexagon_M2_mpyd_acc_lh_s1>; +def : T_PRR_pat <M2_mpyd_acc_ll_s1, int_hexagon_M2_mpyd_acc_ll_s1>; + +def : T_PRR_pat <M2_mpyd_nac_hh_s0, int_hexagon_M2_mpyd_nac_hh_s0>; +def : T_PRR_pat <M2_mpyd_nac_hl_s0, int_hexagon_M2_mpyd_nac_hl_s0>; +def : T_PRR_pat <M2_mpyd_nac_lh_s0, int_hexagon_M2_mpyd_nac_lh_s0>; +def : T_PRR_pat <M2_mpyd_nac_ll_s0, int_hexagon_M2_mpyd_nac_ll_s0>; + +def : T_PRR_pat <M2_mpyd_nac_hh_s1, int_hexagon_M2_mpyd_nac_hh_s1>; +def : T_PRR_pat <M2_mpyd_nac_hl_s1, int_hexagon_M2_mpyd_nac_hl_s1>; +def : T_PRR_pat <M2_mpyd_nac_lh_s1, int_hexagon_M2_mpyd_nac_lh_s1>; +def : T_PRR_pat <M2_mpyd_nac_ll_s1, int_hexagon_M2_mpyd_nac_ll_s1>; + +def : T_PRR_pat <M2_mpyud_acc_hh_s0, int_hexagon_M2_mpyud_acc_hh_s0>; +def : T_PRR_pat <M2_mpyud_acc_hl_s0, int_hexagon_M2_mpyud_acc_hl_s0>; +def : T_PRR_pat <M2_mpyud_acc_lh_s0, int_hexagon_M2_mpyud_acc_lh_s0>; +def : T_PRR_pat <M2_mpyud_acc_ll_s0, int_hexagon_M2_mpyud_acc_ll_s0>; + +def : T_PRR_pat <M2_mpyud_acc_hh_s1, int_hexagon_M2_mpyud_acc_hh_s1>; +def : T_PRR_pat <M2_mpyud_acc_hl_s1, int_hexagon_M2_mpyud_acc_hl_s1>; +def : T_PRR_pat <M2_mpyud_acc_lh_s1, int_hexagon_M2_mpyud_acc_lh_s1>; +def : T_PRR_pat <M2_mpyud_acc_ll_s1, int_hexagon_M2_mpyud_acc_ll_s1>; + +def : T_PRR_pat <M2_mpyud_nac_hh_s0, int_hexagon_M2_mpyud_nac_hh_s0>; +def : T_PRR_pat <M2_mpyud_nac_hl_s0, int_hexagon_M2_mpyud_nac_hl_s0>; +def : T_PRR_pat <M2_mpyud_nac_lh_s0, int_hexagon_M2_mpyud_nac_lh_s0>; +def : T_PRR_pat <M2_mpyud_nac_ll_s0, int_hexagon_M2_mpyud_nac_ll_s0>; + +def : T_PRR_pat <M2_mpyud_nac_hh_s1, int_hexagon_M2_mpyud_nac_hh_s1>; +def : T_PRR_pat <M2_mpyud_nac_hl_s1, int_hexagon_M2_mpyud_nac_hl_s1>; +def : T_PRR_pat <M2_mpyud_nac_lh_s1, int_hexagon_M2_mpyud_nac_lh_s1>; +def : T_PRR_pat <M2_mpyud_nac_ll_s1, int_hexagon_M2_mpyud_nac_ll_s1>; + +// Vector complex multiply imaginary: Rdd=vcmpyi(Rss,Rtt)[:<<1]:sat +def : T_PP_pat <M2_vcmpy_s1_sat_i, int_hexagon_M2_vcmpy_s1_sat_i>; +def : T_PP_pat <M2_vcmpy_s0_sat_i, int_hexagon_M2_vcmpy_s0_sat_i>; + +// Vector complex multiply real: Rdd=vcmpyr(Rss,Rtt)[:<<1]:sat +def : T_PP_pat <M2_vcmpy_s1_sat_r, int_hexagon_M2_vcmpy_s1_sat_r>; +def : T_PP_pat <M2_vcmpy_s0_sat_r, int_hexagon_M2_vcmpy_s0_sat_r>; + +// Vector dual multiply: Rdd=vdmpy(Rss,Rtt)[:<<1]:sat +def : T_PP_pat <M2_vdmpys_s1, int_hexagon_M2_vdmpys_s1>; +def : T_PP_pat <M2_vdmpys_s0, int_hexagon_M2_vdmpys_s0>; + +// Vector multiply even halfwords: Rdd=vmpyeh(Rss,Rtt)[:<<1]:sat +def : T_PP_pat <M2_vmpy2es_s1, int_hexagon_M2_vmpy2es_s1>; +def : T_PP_pat <M2_vmpy2es_s0, int_hexagon_M2_vmpy2es_s0>; + +//Rdd=vmpywoh(Rss,Rtt)[:<<1][:rnd]:sat +def : T_PP_pat <M2_mmpyh_s0, int_hexagon_M2_mmpyh_s0>; +def : T_PP_pat <M2_mmpyh_s1, int_hexagon_M2_mmpyh_s1>; +def : T_PP_pat <M2_mmpyh_rs0, int_hexagon_M2_mmpyh_rs0>; +def : T_PP_pat <M2_mmpyh_rs1, int_hexagon_M2_mmpyh_rs1>; + +//Rdd=vmpyweh(Rss,Rtt)[:<<1][:rnd]:sat +def : T_PP_pat <M2_mmpyl_s0, int_hexagon_M2_mmpyl_s0>; +def : T_PP_pat <M2_mmpyl_s1, int_hexagon_M2_mmpyl_s1>; +def : T_PP_pat <M2_mmpyl_rs0, int_hexagon_M2_mmpyl_rs0>; +def : T_PP_pat <M2_mmpyl_rs1, int_hexagon_M2_mmpyl_rs1>; + +//Rdd=vmpywouh(Rss,Rtt)[:<<1][:rnd]:sat +def : T_PP_pat <M2_mmpyuh_s0, int_hexagon_M2_mmpyuh_s0>; +def : T_PP_pat <M2_mmpyuh_s1, int_hexagon_M2_mmpyuh_s1>; +def : T_PP_pat <M2_mmpyuh_rs0, int_hexagon_M2_mmpyuh_rs0>; +def : T_PP_pat <M2_mmpyuh_rs1, int_hexagon_M2_mmpyuh_rs1>; + +//Rdd=vmpyweuh(Rss,Rtt)[:<<1][:rnd]:sat +def : T_PP_pat <M2_mmpyul_s0, int_hexagon_M2_mmpyul_s0>; +def : T_PP_pat <M2_mmpyul_s1, int_hexagon_M2_mmpyul_s1>; +def : T_PP_pat <M2_mmpyul_rs0, int_hexagon_M2_mmpyul_rs0>; +def : T_PP_pat <M2_mmpyul_rs1, int_hexagon_M2_mmpyul_rs1>; + +// Vector reduce add unsigned bytes: Rdd32[+]=vrmpybu(Rss32,Rtt32) +def : T_PP_pat <A2_vraddub, int_hexagon_A2_vraddub>; +def : T_PPP_pat <A2_vraddub_acc, int_hexagon_A2_vraddub_acc>; + +// Vector sum of absolute differences unsigned bytes: Rdd=vrsadub(Rss,Rtt) +def : T_PP_pat <A2_vrsadub, int_hexagon_A2_vrsadub>; +def : T_PPP_pat <A2_vrsadub_acc, int_hexagon_A2_vrsadub_acc>; + +// Vector absolute difference: Rdd=vabsdiffh(Rtt,Rss) +def : T_PP_pat <M2_vabsdiffh, int_hexagon_M2_vabsdiffh>; + +// Vector absolute difference words: Rdd=vabsdiffw(Rtt,Rss) +def : T_PP_pat <M2_vabsdiffw, int_hexagon_M2_vabsdiffw>; + +// Vector reduce complex multiply real or imaginary: +// Rdd[+]=vrcmpy[ir](Rss,Rtt[*]) +def : T_PP_pat <M2_vrcmpyi_s0, int_hexagon_M2_vrcmpyi_s0>; +def : T_PP_pat <M2_vrcmpyi_s0c, int_hexagon_M2_vrcmpyi_s0c>; +def : T_PPP_pat <M2_vrcmaci_s0, int_hexagon_M2_vrcmaci_s0>; +def : T_PPP_pat <M2_vrcmaci_s0c, int_hexagon_M2_vrcmaci_s0c>; + +def : T_PP_pat <M2_vrcmpyr_s0, int_hexagon_M2_vrcmpyr_s0>; +def : T_PP_pat <M2_vrcmpyr_s0c, int_hexagon_M2_vrcmpyr_s0c>; +def : T_PPP_pat <M2_vrcmacr_s0, int_hexagon_M2_vrcmacr_s0>; +def : T_PPP_pat <M2_vrcmacr_s0c, int_hexagon_M2_vrcmacr_s0c>; + +// Vector reduce halfwords +// Rdd[+]=vrmpyh(Rss,Rtt) +def : T_PP_pat <M2_vrmpy_s0, int_hexagon_M2_vrmpy_s0>; +def : T_PPP_pat <M2_vrmac_s0, int_hexagon_M2_vrmac_s0>; +//===----------------------------------------------------------------------===// +// Vector Multipy with accumulation +//===----------------------------------------------------------------------===// + +// Vector multiply word by signed half with accumulation +// Rxx+=vmpyw[eo]h(Rss,Rtt)[:<<1][:rnd]:sat +def : T_PPP_pat <M2_mmacls_s1, int_hexagon_M2_mmacls_s1>; +def : T_PPP_pat <M2_mmacls_s0, int_hexagon_M2_mmacls_s0>; +def : T_PPP_pat <M2_mmacls_rs1, int_hexagon_M2_mmacls_rs1>; +def : T_PPP_pat <M2_mmacls_rs0, int_hexagon_M2_mmacls_rs0>; +def : T_PPP_pat <M2_mmachs_s1, int_hexagon_M2_mmachs_s1>; +def : T_PPP_pat <M2_mmachs_s0, int_hexagon_M2_mmachs_s0>; +def : T_PPP_pat <M2_mmachs_rs1, int_hexagon_M2_mmachs_rs1>; +def : T_PPP_pat <M2_mmachs_rs0, int_hexagon_M2_mmachs_rs0>; + +// Vector multiply word by unsigned half with accumulation +// Rxx+=vmpyw[eo]uh(Rss,Rtt)[:<<1][:rnd]:sat +def : T_PPP_pat <M2_mmaculs_s1, int_hexagon_M2_mmaculs_s1>; +def : T_PPP_pat <M2_mmaculs_s0, int_hexagon_M2_mmaculs_s0>; +def : T_PPP_pat <M2_mmaculs_rs1, int_hexagon_M2_mmaculs_rs1>; +def : T_PPP_pat <M2_mmaculs_rs0, int_hexagon_M2_mmaculs_rs0>; +def : T_PPP_pat <M2_mmacuhs_s1, int_hexagon_M2_mmacuhs_s1>; +def : T_PPP_pat <M2_mmacuhs_s0, int_hexagon_M2_mmacuhs_s0>; +def : T_PPP_pat <M2_mmacuhs_rs1, int_hexagon_M2_mmacuhs_rs1>; +def : T_PPP_pat <M2_mmacuhs_rs0, int_hexagon_M2_mmacuhs_rs0>; + +// Vector multiply even halfwords with accumulation +// Rxx+=vmpyeh(Rss,Rtt)[:<<1][:sat] +def : T_PPP_pat <M2_vmac2es, int_hexagon_M2_vmac2es>; +def : T_PPP_pat <M2_vmac2es_s1, int_hexagon_M2_vmac2es_s1>; +def : T_PPP_pat <M2_vmac2es_s0, int_hexagon_M2_vmac2es_s0>; + +// Vector dual multiply with accumulation +// Rxx+=vdmpy(Rss,Rtt)[:sat] +def : T_PPP_pat <M2_vdmacs_s1, int_hexagon_M2_vdmacs_s1>; +def : T_PPP_pat <M2_vdmacs_s0, int_hexagon_M2_vdmacs_s0>; + +// Vector complex multiply real or imaginary with accumulation +// Rxx+=vcmpy[ir](Rss,Rtt):sat +def : T_PPP_pat <M2_vcmac_s0_sat_r, int_hexagon_M2_vcmac_s0_sat_r>; +def : T_PPP_pat <M2_vcmac_s0_sat_i, int_hexagon_M2_vcmac_s0_sat_i>; + +//===----------------------------------------------------------------------===// +// Add/Subtract halfword +// Rd=add(Rt.L,Rs.[HL])[:sat] +// Rd=sub(Rt.L,Rs.[HL])[:sat] +// Rd=add(Rt.[LH],Rs.[HL])[:sat][:<16] +// Rd=sub(Rt.[LH],Rs.[HL])[:sat][:<16] +//===----------------------------------------------------------------------===// + +//Rd=add(Rt.L,Rs.[LH]) +def : T_RR_pat <A2_addh_l16_ll, int_hexagon_A2_addh_l16_ll>; +def : T_RR_pat <A2_addh_l16_hl, int_hexagon_A2_addh_l16_hl>; + +//Rd=add(Rt.L,Rs.[LH]):sat +def : T_RR_pat <A2_addh_l16_sat_ll, int_hexagon_A2_addh_l16_sat_ll>; +def : T_RR_pat <A2_addh_l16_sat_hl, int_hexagon_A2_addh_l16_sat_hl>; + +//Rd=sub(Rt.L,Rs.[LH]) +def : T_RR_pat <A2_subh_l16_ll, int_hexagon_A2_subh_l16_ll>; +def : T_RR_pat <A2_subh_l16_hl, int_hexagon_A2_subh_l16_hl>; + +//Rd=sub(Rt.L,Rs.[LH]):sat +def : T_RR_pat <A2_subh_l16_sat_ll, int_hexagon_A2_subh_l16_sat_ll>; +def : T_RR_pat <A2_subh_l16_sat_hl, int_hexagon_A2_subh_l16_sat_hl>; + +//Rd=add(Rt.[LH],Rs.[LH]):<<16 +def : T_RR_pat <A2_addh_h16_ll, int_hexagon_A2_addh_h16_ll>; +def : T_RR_pat <A2_addh_h16_lh, int_hexagon_A2_addh_h16_lh>; +def : T_RR_pat <A2_addh_h16_hl, int_hexagon_A2_addh_h16_hl>; +def : T_RR_pat <A2_addh_h16_hh, int_hexagon_A2_addh_h16_hh>; + +//Rd=sub(Rt.[LH],Rs.[LH]):<<16 +def : T_RR_pat <A2_subh_h16_ll, int_hexagon_A2_subh_h16_ll>; +def : T_RR_pat <A2_subh_h16_lh, int_hexagon_A2_subh_h16_lh>; +def : T_RR_pat <A2_subh_h16_hl, int_hexagon_A2_subh_h16_hl>; +def : T_RR_pat <A2_subh_h16_hh, int_hexagon_A2_subh_h16_hh>; + +//Rd=add(Rt.[LH],Rs.[LH]):sat:<<16 +def : T_RR_pat <A2_addh_h16_sat_ll, int_hexagon_A2_addh_h16_sat_ll>; +def : T_RR_pat <A2_addh_h16_sat_lh, int_hexagon_A2_addh_h16_sat_lh>; +def : T_RR_pat <A2_addh_h16_sat_hl, int_hexagon_A2_addh_h16_sat_hl>; +def : T_RR_pat <A2_addh_h16_sat_hh, int_hexagon_A2_addh_h16_sat_hh>; + +//Rd=sub(Rt.[LH],Rs.[LH]):sat:<<16 +def : T_RR_pat <A2_subh_h16_sat_ll, int_hexagon_A2_subh_h16_sat_ll>; +def : T_RR_pat <A2_subh_h16_sat_lh, int_hexagon_A2_subh_h16_sat_lh>; +def : T_RR_pat <A2_subh_h16_sat_hl, int_hexagon_A2_subh_h16_sat_hl>; +def : T_RR_pat <A2_subh_h16_sat_hh, int_hexagon_A2_subh_h16_sat_hh>; + +// ALU64 / ALU / min max +def : T_RR_pat<A2_max, int_hexagon_A2_max>; +def : T_RR_pat<A2_min, int_hexagon_A2_min>; +def : T_RR_pat<A2_maxu, int_hexagon_A2_maxu>; +def : T_RR_pat<A2_minu, int_hexagon_A2_minu>; + +// Shift and accumulate +def : T_RRI_pat <S2_asr_i_r_nac, int_hexagon_S2_asr_i_r_nac>; +def : T_RRI_pat <S2_lsr_i_r_nac, int_hexagon_S2_lsr_i_r_nac>; +def : T_RRI_pat <S2_asl_i_r_nac, int_hexagon_S2_asl_i_r_nac>; +def : T_RRI_pat <S2_asr_i_r_acc, int_hexagon_S2_asr_i_r_acc>; +def : T_RRI_pat <S2_lsr_i_r_acc, int_hexagon_S2_lsr_i_r_acc>; +def : T_RRI_pat <S2_asl_i_r_acc, int_hexagon_S2_asl_i_r_acc>; + +def : T_RRI_pat <S2_asr_i_r_and, int_hexagon_S2_asr_i_r_and>; +def : T_RRI_pat <S2_lsr_i_r_and, int_hexagon_S2_lsr_i_r_and>; +def : T_RRI_pat <S2_asl_i_r_and, int_hexagon_S2_asl_i_r_and>; +def : T_RRI_pat <S2_asr_i_r_or, int_hexagon_S2_asr_i_r_or>; +def : T_RRI_pat <S2_lsr_i_r_or, int_hexagon_S2_lsr_i_r_or>; +def : T_RRI_pat <S2_asl_i_r_or, int_hexagon_S2_asl_i_r_or>; +def : T_RRI_pat <S2_lsr_i_r_xacc, int_hexagon_S2_lsr_i_r_xacc>; +def : T_RRI_pat <S2_asl_i_r_xacc, int_hexagon_S2_asl_i_r_xacc>; + +def : T_PPI_pat <S2_asr_i_p_nac, int_hexagon_S2_asr_i_p_nac>; +def : T_PPI_pat <S2_lsr_i_p_nac, int_hexagon_S2_lsr_i_p_nac>; +def : T_PPI_pat <S2_asl_i_p_nac, int_hexagon_S2_asl_i_p_nac>; +def : T_PPI_pat <S2_asr_i_p_acc, int_hexagon_S2_asr_i_p_acc>; +def : T_PPI_pat <S2_lsr_i_p_acc, int_hexagon_S2_lsr_i_p_acc>; +def : T_PPI_pat <S2_asl_i_p_acc, int_hexagon_S2_asl_i_p_acc>; + +def : T_PPI_pat <S2_asr_i_p_and, int_hexagon_S2_asr_i_p_and>; +def : T_PPI_pat <S2_lsr_i_p_and, int_hexagon_S2_lsr_i_p_and>; +def : T_PPI_pat <S2_asl_i_p_and, int_hexagon_S2_asl_i_p_and>; +def : T_PPI_pat <S2_asr_i_p_or, int_hexagon_S2_asr_i_p_or>; +def : T_PPI_pat <S2_lsr_i_p_or, int_hexagon_S2_lsr_i_p_or>; +def : T_PPI_pat <S2_asl_i_p_or, int_hexagon_S2_asl_i_p_or>; +def : T_PPI_pat <S2_lsr_i_p_xacc, int_hexagon_S2_lsr_i_p_xacc>; +def : T_PPI_pat <S2_asl_i_p_xacc, int_hexagon_S2_asl_i_p_xacc>; + +def : T_RRR_pat <S2_asr_r_r_nac, int_hexagon_S2_asr_r_r_nac>; +def : T_RRR_pat <S2_lsr_r_r_nac, int_hexagon_S2_lsr_r_r_nac>; +def : T_RRR_pat <S2_asl_r_r_nac, int_hexagon_S2_asl_r_r_nac>; +def : T_RRR_pat <S2_lsl_r_r_nac, int_hexagon_S2_lsl_r_r_nac>; +def : T_RRR_pat <S2_asr_r_r_acc, int_hexagon_S2_asr_r_r_acc>; +def : T_RRR_pat <S2_lsr_r_r_acc, int_hexagon_S2_lsr_r_r_acc>; +def : T_RRR_pat <S2_asl_r_r_acc, int_hexagon_S2_asl_r_r_acc>; +def : T_RRR_pat <S2_lsl_r_r_acc, int_hexagon_S2_lsl_r_r_acc>; + +def : T_RRR_pat <S2_asr_r_r_and, int_hexagon_S2_asr_r_r_and>; +def : T_RRR_pat <S2_lsr_r_r_and, int_hexagon_S2_lsr_r_r_and>; +def : T_RRR_pat <S2_asl_r_r_and, int_hexagon_S2_asl_r_r_and>; +def : T_RRR_pat <S2_lsl_r_r_and, int_hexagon_S2_lsl_r_r_and>; +def : T_RRR_pat <S2_asr_r_r_or, int_hexagon_S2_asr_r_r_or>; +def : T_RRR_pat <S2_lsr_r_r_or, int_hexagon_S2_lsr_r_r_or>; +def : T_RRR_pat <S2_asl_r_r_or, int_hexagon_S2_asl_r_r_or>; +def : T_RRR_pat <S2_lsl_r_r_or, int_hexagon_S2_lsl_r_r_or>; + +def : T_PPR_pat <S2_asr_r_p_nac, int_hexagon_S2_asr_r_p_nac>; +def : T_PPR_pat <S2_lsr_r_p_nac, int_hexagon_S2_lsr_r_p_nac>; +def : T_PPR_pat <S2_asl_r_p_nac, int_hexagon_S2_asl_r_p_nac>; +def : T_PPR_pat <S2_lsl_r_p_nac, int_hexagon_S2_lsl_r_p_nac>; +def : T_PPR_pat <S2_asr_r_p_acc, int_hexagon_S2_asr_r_p_acc>; +def : T_PPR_pat <S2_lsr_r_p_acc, int_hexagon_S2_lsr_r_p_acc>; +def : T_PPR_pat <S2_asl_r_p_acc, int_hexagon_S2_asl_r_p_acc>; +def : T_PPR_pat <S2_lsl_r_p_acc, int_hexagon_S2_lsl_r_p_acc>; + +def : T_PPR_pat <S2_asr_r_p_and, int_hexagon_S2_asr_r_p_and>; +def : T_PPR_pat <S2_lsr_r_p_and, int_hexagon_S2_lsr_r_p_and>; +def : T_PPR_pat <S2_asl_r_p_and, int_hexagon_S2_asl_r_p_and>; +def : T_PPR_pat <S2_lsl_r_p_and, int_hexagon_S2_lsl_r_p_and>; +def : T_PPR_pat <S2_asr_r_p_or, int_hexagon_S2_asr_r_p_or>; +def : T_PPR_pat <S2_lsr_r_p_or, int_hexagon_S2_lsr_r_p_or>; +def : T_PPR_pat <S2_asl_r_p_or, int_hexagon_S2_asl_r_p_or>; +def : T_PPR_pat <S2_lsl_r_p_or, int_hexagon_S2_lsl_r_p_or>; + +def : T_RRI_pat <S2_asr_i_r_nac, int_hexagon_S2_asr_i_r_nac>; +def : T_RRI_pat <S2_lsr_i_r_nac, int_hexagon_S2_lsr_i_r_nac>; +def : T_RRI_pat <S2_asl_i_r_nac, int_hexagon_S2_asl_i_r_nac>; +def : T_RRI_pat <S2_asr_i_r_acc, int_hexagon_S2_asr_i_r_acc>; +def : T_RRI_pat <S2_lsr_i_r_acc, int_hexagon_S2_lsr_i_r_acc>; +def : T_RRI_pat <S2_asl_i_r_acc, int_hexagon_S2_asl_i_r_acc>; + +def : T_RRI_pat <S2_asr_i_r_and, int_hexagon_S2_asr_i_r_and>; +def : T_RRI_pat <S2_lsr_i_r_and, int_hexagon_S2_lsr_i_r_and>; +def : T_RRI_pat <S2_asl_i_r_and, int_hexagon_S2_asl_i_r_and>; +def : T_RRI_pat <S2_asr_i_r_or, int_hexagon_S2_asr_i_r_or>; +def : T_RRI_pat <S2_lsr_i_r_or, int_hexagon_S2_lsr_i_r_or>; +def : T_RRI_pat <S2_asl_i_r_or, int_hexagon_S2_asl_i_r_or>; +def : T_RRI_pat <S2_lsr_i_r_xacc, int_hexagon_S2_lsr_i_r_xacc>; +def : T_RRI_pat <S2_asl_i_r_xacc, int_hexagon_S2_asl_i_r_xacc>; + +def : T_PPI_pat <S2_asr_i_p_nac, int_hexagon_S2_asr_i_p_nac>; +def : T_PPI_pat <S2_lsr_i_p_nac, int_hexagon_S2_lsr_i_p_nac>; +def : T_PPI_pat <S2_asl_i_p_nac, int_hexagon_S2_asl_i_p_nac>; +def : T_PPI_pat <S2_asr_i_p_acc, int_hexagon_S2_asr_i_p_acc>; +def : T_PPI_pat <S2_lsr_i_p_acc, int_hexagon_S2_lsr_i_p_acc>; +def : T_PPI_pat <S2_asl_i_p_acc, int_hexagon_S2_asl_i_p_acc>; + +def : T_PPI_pat <S2_asr_i_p_and, int_hexagon_S2_asr_i_p_and>; +def : T_PPI_pat <S2_lsr_i_p_and, int_hexagon_S2_lsr_i_p_and>; +def : T_PPI_pat <S2_asl_i_p_and, int_hexagon_S2_asl_i_p_and>; +def : T_PPI_pat <S2_asr_i_p_or, int_hexagon_S2_asr_i_p_or>; +def : T_PPI_pat <S2_lsr_i_p_or, int_hexagon_S2_lsr_i_p_or>; +def : T_PPI_pat <S2_asl_i_p_or, int_hexagon_S2_asl_i_p_or>; +def : T_PPI_pat <S2_lsr_i_p_xacc, int_hexagon_S2_lsr_i_p_xacc>; +def : T_PPI_pat <S2_asl_i_p_xacc, int_hexagon_S2_asl_i_p_xacc>; + +def : T_RRR_pat <S2_asr_r_r_nac, int_hexagon_S2_asr_r_r_nac>; +def : T_RRR_pat <S2_lsr_r_r_nac, int_hexagon_S2_lsr_r_r_nac>; +def : T_RRR_pat <S2_asl_r_r_nac, int_hexagon_S2_asl_r_r_nac>; +def : T_RRR_pat <S2_lsl_r_r_nac, int_hexagon_S2_lsl_r_r_nac>; +def : T_RRR_pat <S2_asr_r_r_acc, int_hexagon_S2_asr_r_r_acc>; +def : T_RRR_pat <S2_lsr_r_r_acc, int_hexagon_S2_lsr_r_r_acc>; +def : T_RRR_pat <S2_asl_r_r_acc, int_hexagon_S2_asl_r_r_acc>; +def : T_RRR_pat <S2_lsl_r_r_acc, int_hexagon_S2_lsl_r_r_acc>; + +def : T_RRR_pat <S2_asr_r_r_and, int_hexagon_S2_asr_r_r_and>; +def : T_RRR_pat <S2_lsr_r_r_and, int_hexagon_S2_lsr_r_r_and>; +def : T_RRR_pat <S2_asl_r_r_and, int_hexagon_S2_asl_r_r_and>; +def : T_RRR_pat <S2_lsl_r_r_and, int_hexagon_S2_lsl_r_r_and>; +def : T_RRR_pat <S2_asr_r_r_or, int_hexagon_S2_asr_r_r_or>; +def : T_RRR_pat <S2_lsr_r_r_or, int_hexagon_S2_lsr_r_r_or>; +def : T_RRR_pat <S2_asl_r_r_or, int_hexagon_S2_asl_r_r_or>; +def : T_RRR_pat <S2_lsl_r_r_or, int_hexagon_S2_lsl_r_r_or>; + +def : T_PPR_pat <S2_asr_r_p_nac, int_hexagon_S2_asr_r_p_nac>; +def : T_PPR_pat <S2_lsr_r_p_nac, int_hexagon_S2_lsr_r_p_nac>; +def : T_PPR_pat <S2_asl_r_p_nac, int_hexagon_S2_asl_r_p_nac>; +def : T_PPR_pat <S2_lsl_r_p_nac, int_hexagon_S2_lsl_r_p_nac>; +def : T_PPR_pat <S2_asr_r_p_acc, int_hexagon_S2_asr_r_p_acc>; +def : T_PPR_pat <S2_lsr_r_p_acc, int_hexagon_S2_lsr_r_p_acc>; +def : T_PPR_pat <S2_asl_r_p_acc, int_hexagon_S2_asl_r_p_acc>; +def : T_PPR_pat <S2_lsl_r_p_acc, int_hexagon_S2_lsl_r_p_acc>; + +def : T_PPR_pat <S2_asr_r_p_and, int_hexagon_S2_asr_r_p_and>; +def : T_PPR_pat <S2_lsr_r_p_and, int_hexagon_S2_lsr_r_p_and>; +def : T_PPR_pat <S2_asl_r_p_and, int_hexagon_S2_asl_r_p_and>; +def : T_PPR_pat <S2_lsl_r_p_and, int_hexagon_S2_lsl_r_p_and>; +def : T_PPR_pat <S2_asr_r_p_or, int_hexagon_S2_asr_r_p_or>; +def : T_PPR_pat <S2_lsr_r_p_or, int_hexagon_S2_lsr_r_p_or>; +def : T_PPR_pat <S2_asl_r_p_or, int_hexagon_S2_asl_r_p_or>; +def : T_PPR_pat <S2_lsl_r_p_or, int_hexagon_S2_lsl_r_p_or>; /******************************************************************** -* MTYPE/ALU * +* ALU32/ALU * *********************************************************************/ +def : T_RR_pat<A2_add, int_hexagon_A2_add>; +def : T_RI_pat<A2_addi, int_hexagon_A2_addi>; +def : T_RR_pat<A2_sub, int_hexagon_A2_sub>; +def : T_IR_pat<A2_subri, int_hexagon_A2_subri>; +def : T_RR_pat<A2_and, int_hexagon_A2_and>; +def : T_RI_pat<A2_andir, int_hexagon_A2_andir>; +def : T_RR_pat<A2_or, int_hexagon_A2_or>; +def : T_RI_pat<A2_orir, int_hexagon_A2_orir>; +def : T_RR_pat<A2_xor, int_hexagon_A2_xor>; +def : T_RR_pat<A2_combinew, int_hexagon_A2_combinew>; + +// Assembler mapped from Rd32=not(Rs32) to Rd32=sub(#-1,Rs32) +def : Pat <(int_hexagon_A2_not (I32:$Rs)), + (A2_subri -1, IntRegs:$Rs)>; + +// Assembler mapped from Rd32=neg(Rs32) to Rd32=sub(#0,Rs32) +def : Pat <(int_hexagon_A2_neg IntRegs:$Rs), + (A2_subri 0, IntRegs:$Rs)>; + +// Transfer immediate +def : Pat <(int_hexagon_A2_tfril (I32:$Rs), u16_0ImmPred:$Is), + (A2_tfril IntRegs:$Rs, u16_0ImmPred:$Is)>; +def : Pat <(int_hexagon_A2_tfrih (I32:$Rs), u16_0ImmPred:$Is), + (A2_tfrih IntRegs:$Rs, u16_0ImmPred:$Is)>; + +// Transfer Register/immediate. +def : T_R_pat <A2_tfr, int_hexagon_A2_tfr>; +def : T_I_pat <A2_tfrsi, int_hexagon_A2_tfrsi>; + +// Assembler mapped from Rdd32=Rss32 to Rdd32=combine(Rss.H32,Rss.L32) +def : Pat<(int_hexagon_A2_tfrp DoubleRegs:$src), + (A2_combinew (HiReg DoubleRegs:$src), (LoReg DoubleRegs:$src))>; -// MTYPE / ALU / Add and accumulate. -def HEXAGON_M2_acci: - si_MInst_sisisi_acc <"add", int_hexagon_M2_acci>; -def HEXAGON_M2_accii: - si_MInst_sisis8_acc <"add", int_hexagon_M2_accii>; -def HEXAGON_M2_nacci: - si_MInst_sisisi_nac <"add", int_hexagon_M2_nacci>; -def HEXAGON_M2_naccii: - si_MInst_sisis8_nac <"add", int_hexagon_M2_naccii>; +/******************************************************************** +* ALU32/PERM * +*********************************************************************/ +// Combine +def: T_RR_pat<A2_combine_hh, int_hexagon_A2_combine_hh>; +def: T_RR_pat<A2_combine_hl, int_hexagon_A2_combine_hl>; +def: T_RR_pat<A2_combine_lh, int_hexagon_A2_combine_lh>; +def: T_RR_pat<A2_combine_ll, int_hexagon_A2_combine_ll>; + +def: T_II_pat<A2_combineii, int_hexagon_A2_combineii, s32ImmPred, s8ImmPred>; -// MTYPE / ALU / Subtract and accumulate. -def HEXAGON_M2_subacc: - si_MInst_sisisi_acc <"sub", int_hexagon_M2_subacc>; +def: Pat<(i32 (int_hexagon_C2_mux (I32:$Rp), (I32:$Rs), (I32:$Rt))), + (i32 (C2_mux (C2_tfrrp IntRegs:$Rp), IntRegs:$Rs, IntRegs:$Rt))>; -// MTYPE / ALU / Vector absolute difference. -def HEXAGON_M2_vabsdiffh: - di_MInst_didi <"vabsdiffh",int_hexagon_M2_vabsdiffh>; -def HEXAGON_M2_vabsdiffw: - di_MInst_didi <"vabsdiffw",int_hexagon_M2_vabsdiffw>; +// Mux +def : T_QRI_pat<C2_muxir, int_hexagon_C2_muxir, s32ImmPred>; +def : T_QIR_pat<C2_muxri, int_hexagon_C2_muxri, s32ImmPred>; +def : T_QII_pat<C2_muxii, int_hexagon_C2_muxii, s32ImmPred, s8ImmPred>; -// MTYPE / ALU / XOR and xor with destination. -def HEXAGON_M2_xor_xacc: - si_MInst_sisisi_xacc <"xor", int_hexagon_M2_xor_xacc>; +// Shift halfword +def : T_R_pat<A2_aslh, int_hexagon_A2_aslh>; +def : T_R_pat<A2_asrh, int_hexagon_A2_asrh>; +def : T_R_pat<A2_asrh, int_hexagon_SI_to_SXTHI_asrh>; +// Sign/zero extend +def : T_R_pat<A2_sxth, int_hexagon_A2_sxth>; +def : T_R_pat<A2_sxtb, int_hexagon_A2_sxtb>; +def : T_R_pat<A2_zxth, int_hexagon_A2_zxth>; +def : T_R_pat<A2_zxtb, int_hexagon_A2_zxtb>; /******************************************************************** -* MTYPE/COMPLEX * +* ALU32/PRED * *********************************************************************/ +// Compare +def : T_RR_pat<C2_cmpeq, int_hexagon_C2_cmpeq>; +def : T_RR_pat<C2_cmpgt, int_hexagon_C2_cmpgt>; +def : T_RR_pat<C2_cmpgtu, int_hexagon_C2_cmpgtu>; -// MTYPE / COMPLEX / Complex multiply. -// Rdd[-+]=cmpy(Rs, Rt:<<1]:sat -def HEXAGON_M2_cmpys_s1: - di_MInst_sisi_s1_sat <"cmpy", int_hexagon_M2_cmpys_s1>; -def HEXAGON_M2_cmpys_s0: - di_MInst_sisi_sat <"cmpy", int_hexagon_M2_cmpys_s0>; -def HEXAGON_M2_cmpysc_s1: - di_MInst_sisi_s1_sat_conj <"cmpy", int_hexagon_M2_cmpysc_s1>; -def HEXAGON_M2_cmpysc_s0: - di_MInst_sisi_sat_conj <"cmpy", int_hexagon_M2_cmpysc_s0>; - -def HEXAGON_M2_cmacs_s1: - di_MInst_disisi_acc_s1_sat <"cmpy", int_hexagon_M2_cmacs_s1>; -def HEXAGON_M2_cmacs_s0: - di_MInst_disisi_acc_sat <"cmpy", int_hexagon_M2_cmacs_s0>; -def HEXAGON_M2_cmacsc_s1: - di_MInst_disisi_acc_s1_sat_conj <"cmpy", int_hexagon_M2_cmacsc_s1>; -def HEXAGON_M2_cmacsc_s0: - di_MInst_disisi_acc_sat_conj <"cmpy", int_hexagon_M2_cmacsc_s0>; - -def HEXAGON_M2_cnacs_s1: - di_MInst_disisi_nac_s1_sat <"cmpy", int_hexagon_M2_cnacs_s1>; -def HEXAGON_M2_cnacs_s0: - di_MInst_disisi_nac_sat <"cmpy", int_hexagon_M2_cnacs_s0>; -def HEXAGON_M2_cnacsc_s1: - di_MInst_disisi_nac_s1_sat_conj <"cmpy", int_hexagon_M2_cnacsc_s1>; -def HEXAGON_M2_cnacsc_s0: - di_MInst_disisi_nac_sat_conj <"cmpy", int_hexagon_M2_cnacsc_s0>; - -// MTYPE / COMPLEX / Complex multiply real or imaginary. -def HEXAGON_M2_cmpyr_s0: - di_MInst_sisi <"cmpyr", int_hexagon_M2_cmpyr_s0>; -def HEXAGON_M2_cmacr_s0: - di_MInst_disisi_acc <"cmpyr", int_hexagon_M2_cmacr_s0>; - -def HEXAGON_M2_cmpyi_s0: - di_MInst_sisi <"cmpyi", int_hexagon_M2_cmpyi_s0>; -def HEXAGON_M2_cmaci_s0: - di_MInst_disisi_acc <"cmpyi", int_hexagon_M2_cmaci_s0>; - -// MTYPE / COMPLEX / Complex multiply with round and pack. -// Rxx32+=cmpy(Rs32,[*]Rt32:<<1]:rnd:sat -def HEXAGON_M2_cmpyrs_s0: - si_MInst_sisi_rnd_sat <"cmpy", int_hexagon_M2_cmpyrs_s0>; -def HEXAGON_M2_cmpyrs_s1: - si_MInst_sisi_s1_rnd_sat <"cmpy", int_hexagon_M2_cmpyrs_s1>; - -def HEXAGON_M2_cmpyrsc_s0: - si_MInst_sisi_rnd_sat_conj <"cmpy", int_hexagon_M2_cmpyrsc_s0>; -def HEXAGON_M2_cmpyrsc_s1: - si_MInst_sisi_s1_rnd_sat_conj <"cmpy", int_hexagon_M2_cmpyrsc_s1>; - -//MTYPE / COMPLEX / Vector complex multiply real or imaginary. -def HEXAGON_M2_vcmpy_s0_sat_i: - di_MInst_didi_sat <"vcmpyi", int_hexagon_M2_vcmpy_s0_sat_i>; -def HEXAGON_M2_vcmpy_s1_sat_i: - di_MInst_didi_s1_sat <"vcmpyi", int_hexagon_M2_vcmpy_s1_sat_i>; - -def HEXAGON_M2_vcmpy_s0_sat_r: - di_MInst_didi_sat <"vcmpyr", int_hexagon_M2_vcmpy_s0_sat_r>; -def HEXAGON_M2_vcmpy_s1_sat_r: - di_MInst_didi_s1_sat <"vcmpyr", int_hexagon_M2_vcmpy_s1_sat_r>; - -def HEXAGON_M2_vcmac_s0_sat_i: - di_MInst_dididi_acc_sat <"vcmpyi", int_hexagon_M2_vcmac_s0_sat_i>; -def HEXAGON_M2_vcmac_s0_sat_r: - di_MInst_dididi_acc_sat <"vcmpyr", int_hexagon_M2_vcmac_s0_sat_r>; - -//MTYPE / COMPLEX / Vector reduce complex multiply real or imaginary. -def HEXAGON_M2_vrcmpyi_s0: - di_MInst_didi <"vrcmpyi", int_hexagon_M2_vrcmpyi_s0>; -def HEXAGON_M2_vrcmpyr_s0: - di_MInst_didi <"vrcmpyr", int_hexagon_M2_vrcmpyr_s0>; - -def HEXAGON_M2_vrcmpyi_s0c: - di_MInst_didi_conj <"vrcmpyi", int_hexagon_M2_vrcmpyi_s0c>; -def HEXAGON_M2_vrcmpyr_s0c: - di_MInst_didi_conj <"vrcmpyr", int_hexagon_M2_vrcmpyr_s0c>; - -def HEXAGON_M2_vrcmaci_s0: - di_MInst_dididi_acc <"vrcmpyi", int_hexagon_M2_vrcmaci_s0>; -def HEXAGON_M2_vrcmacr_s0: - di_MInst_dididi_acc <"vrcmpyr", int_hexagon_M2_vrcmacr_s0>; - -def HEXAGON_M2_vrcmaci_s0c: - di_MInst_dididi_acc_conj <"vrcmpyi", int_hexagon_M2_vrcmaci_s0c>; -def HEXAGON_M2_vrcmacr_s0c: - di_MInst_dididi_acc_conj <"vrcmpyr", int_hexagon_M2_vrcmacr_s0c>; +def : T_RI_pat<C2_cmpeqi, int_hexagon_C2_cmpeqi, s32ImmPred>; +def : T_RI_pat<C2_cmpgti, int_hexagon_C2_cmpgti, s32ImmPred>; +def : T_RI_pat<C2_cmpgtui, int_hexagon_C2_cmpgtui, u32ImmPred>; +def : Pat <(i32 (int_hexagon_C2_cmpgei (I32:$src1), s32ImmPred:$src2)), + (i32 (C2_cmpgti (I32:$src1), + (DEC_CONST_SIGNED s32ImmPred:$src2)))>; -/******************************************************************** -* MTYPE/MPYH * -*********************************************************************/ +def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), u32ImmPred:$src2)), + (i32 (C2_cmpgtui (I32:$src1), + (DEC_CONST_UNSIGNED u32ImmPred:$src2)))>; + +// The instruction, Pd=cmp.geu(Rs, #u8) -> Pd=cmp.eq(Rs,Rs) when #u8 == 0. +def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), 0)), + (i32 (C2_cmpeq (I32:$src1), (I32:$src1)))>; -// MTYPE / MPYH / Multiply and use lower result. -//def HEXAGON_M2_mpysmi: -//FIXME: Hexagon_M2_mpysmi should really by of the type si_MInst_sim9, -// not si_MInst_sis9 - but for now, we will use s9. -// def Hexagon_M2_mpysmi: -// si_MInst_sim9 <"mpyi", int_hexagon_M2_mpysmi>; -def Hexagon_M2_mpysmi: - si_MInst_sis9 <"mpyi", int_hexagon_M2_mpysmi>; -def HEXAGON_M2_mpyi: - si_MInst_sisi <"mpyi", int_hexagon_M2_mpyi>; -def HEXAGON_M2_mpyui: - si_MInst_sisi <"mpyui", int_hexagon_M2_mpyui>; -def HEXAGON_M2_macsip: - si_MInst_sisiu8_acc <"mpyi", int_hexagon_M2_macsip>; -def HEXAGON_M2_maci: - si_MInst_sisisi_acc <"mpyi", int_hexagon_M2_maci>; -def HEXAGON_M2_macsin: - si_MInst_sisiu8_nac <"mpyi", int_hexagon_M2_macsin>; - -// MTYPE / MPYH / Multiply word by half (32x16). -//Rdd[+]=vmpywoh(Rss,Rtt)[:<<1][:rnd][:sat] -//Rdd[+]=vmpyweh(Rss,Rtt)[:<<1][:rnd][:sat] -def HEXAGON_M2_mmpyl_rs1: - di_MInst_didi_s1_rnd_sat <"vmpyweh", int_hexagon_M2_mmpyl_rs1>; -def HEXAGON_M2_mmpyl_s1: - di_MInst_didi_s1_sat <"vmpyweh", int_hexagon_M2_mmpyl_s1>; -def HEXAGON_M2_mmpyl_rs0: - di_MInst_didi_rnd_sat <"vmpyweh", int_hexagon_M2_mmpyl_rs0>; -def HEXAGON_M2_mmpyl_s0: - di_MInst_didi_sat <"vmpyweh", int_hexagon_M2_mmpyl_s0>; -def HEXAGON_M2_mmpyh_rs1: - di_MInst_didi_s1_rnd_sat <"vmpywoh", int_hexagon_M2_mmpyh_rs1>; -def HEXAGON_M2_mmpyh_s1: - di_MInst_didi_s1_sat <"vmpywoh", int_hexagon_M2_mmpyh_s1>; -def HEXAGON_M2_mmpyh_rs0: - di_MInst_didi_rnd_sat <"vmpywoh", int_hexagon_M2_mmpyh_rs0>; -def HEXAGON_M2_mmpyh_s0: - di_MInst_didi_sat <"vmpywoh", int_hexagon_M2_mmpyh_s0>; -def HEXAGON_M2_mmacls_rs1: - di_MInst_dididi_acc_s1_rnd_sat <"vmpyweh", int_hexagon_M2_mmacls_rs1>; -def HEXAGON_M2_mmacls_s1: - di_MInst_dididi_acc_s1_sat <"vmpyweh", int_hexagon_M2_mmacls_s1>; -def HEXAGON_M2_mmacls_rs0: - di_MInst_dididi_acc_rnd_sat <"vmpyweh", int_hexagon_M2_mmacls_rs0>; -def HEXAGON_M2_mmacls_s0: - di_MInst_dididi_acc_sat <"vmpyweh", int_hexagon_M2_mmacls_s0>; -def HEXAGON_M2_mmachs_rs1: - di_MInst_dididi_acc_s1_rnd_sat <"vmpywoh", int_hexagon_M2_mmachs_rs1>; -def HEXAGON_M2_mmachs_s1: - di_MInst_dididi_acc_s1_sat <"vmpywoh", int_hexagon_M2_mmachs_s1>; -def HEXAGON_M2_mmachs_rs0: - di_MInst_dididi_acc_rnd_sat <"vmpywoh", int_hexagon_M2_mmachs_rs0>; -def HEXAGON_M2_mmachs_s0: - di_MInst_dididi_acc_sat <"vmpywoh", int_hexagon_M2_mmachs_s0>; - -// MTYPE / MPYH / Multiply word by unsigned half (32x16). -//Rdd[+]=vmpywouh(Rss,Rtt)[:<<1][:rnd][:sat] -//Rdd[+]=vmpyweuh(Rss,Rtt)[:<<1][:rnd][:sat] -def HEXAGON_M2_mmpyul_rs1: - di_MInst_didi_s1_rnd_sat <"vmpyweuh", int_hexagon_M2_mmpyul_rs1>; -def HEXAGON_M2_mmpyul_s1: - di_MInst_didi_s1_sat <"vmpyweuh", int_hexagon_M2_mmpyul_s1>; -def HEXAGON_M2_mmpyul_rs0: - di_MInst_didi_rnd_sat <"vmpyweuh", int_hexagon_M2_mmpyul_rs0>; -def HEXAGON_M2_mmpyul_s0: - di_MInst_didi_sat <"vmpyweuh", int_hexagon_M2_mmpyul_s0>; -def HEXAGON_M2_mmpyuh_rs1: - di_MInst_didi_s1_rnd_sat <"vmpywouh", int_hexagon_M2_mmpyuh_rs1>; -def HEXAGON_M2_mmpyuh_s1: - di_MInst_didi_s1_sat <"vmpywouh", int_hexagon_M2_mmpyuh_s1>; -def HEXAGON_M2_mmpyuh_rs0: - di_MInst_didi_rnd_sat <"vmpywouh", int_hexagon_M2_mmpyuh_rs0>; -def HEXAGON_M2_mmpyuh_s0: - di_MInst_didi_sat <"vmpywouh", int_hexagon_M2_mmpyuh_s0>; -def HEXAGON_M2_mmaculs_rs1: - di_MInst_dididi_acc_s1_rnd_sat <"vmpyweuh", int_hexagon_M2_mmaculs_rs1>; -def HEXAGON_M2_mmaculs_s1: - di_MInst_dididi_acc_s1_sat <"vmpyweuh", int_hexagon_M2_mmaculs_s1>; -def HEXAGON_M2_mmaculs_rs0: - di_MInst_dididi_acc_rnd_sat <"vmpyweuh", int_hexagon_M2_mmaculs_rs0>; -def HEXAGON_M2_mmaculs_s0: - di_MInst_dididi_acc_sat <"vmpyweuh", int_hexagon_M2_mmaculs_s0>; -def HEXAGON_M2_mmacuhs_rs1: - di_MInst_dididi_acc_s1_rnd_sat <"vmpywouh", int_hexagon_M2_mmacuhs_rs1>; -def HEXAGON_M2_mmacuhs_s1: - di_MInst_dididi_acc_s1_sat <"vmpywouh", int_hexagon_M2_mmacuhs_s1>; -def HEXAGON_M2_mmacuhs_rs0: - di_MInst_dididi_acc_rnd_sat <"vmpywouh", int_hexagon_M2_mmacuhs_rs0>; -def HEXAGON_M2_mmacuhs_s0: - di_MInst_dididi_acc_sat <"vmpywouh", int_hexagon_M2_mmacuhs_s0>; - -// MTYPE / MPYH / Multiply and use upper result. -def HEXAGON_M2_hmmpyh_rs1: - si_MInst_sisi_h_s1_rnd_sat <"mpy", int_hexagon_M2_hmmpyh_rs1>; -def HEXAGON_M2_hmmpyl_rs1: - si_MInst_sisi_l_s1_rnd_sat <"mpy", int_hexagon_M2_hmmpyl_rs1>; -def HEXAGON_M2_mpy_up: - si_MInst_sisi <"mpy", int_hexagon_M2_mpy_up>; -def HEXAGON_M2_dpmpyss_rnd_s0: - si_MInst_sisi_rnd <"mpy", int_hexagon_M2_dpmpyss_rnd_s0>; -def HEXAGON_M2_mpyu_up: - si_MInst_sisi <"mpyu", int_hexagon_M2_mpyu_up>; - -// MTYPE / MPYH / Multiply and use full result. -def HEXAGON_M2_dpmpyuu_s0: - di_MInst_sisi <"mpyu", int_hexagon_M2_dpmpyuu_s0>; -def HEXAGON_M2_dpmpyuu_acc_s0: - di_MInst_disisi_acc <"mpyu", int_hexagon_M2_dpmpyuu_acc_s0>; -def HEXAGON_M2_dpmpyuu_nac_s0: - di_MInst_disisi_nac <"mpyu", int_hexagon_M2_dpmpyuu_nac_s0>; -def HEXAGON_M2_dpmpyss_s0: - di_MInst_sisi <"mpy", int_hexagon_M2_dpmpyss_s0>; -def HEXAGON_M2_dpmpyss_acc_s0: - di_MInst_disisi_acc <"mpy", int_hexagon_M2_dpmpyss_acc_s0>; -def HEXAGON_M2_dpmpyss_nac_s0: - di_MInst_disisi_nac <"mpy", int_hexagon_M2_dpmpyss_nac_s0>; +def : Pat <(i32 (int_hexagon_C2_cmplt (I32:$src1), + (I32:$src2))), + (i32 (C2_cmpgt (I32:$src2), (I32:$src1)))>; +def : Pat <(i32 (int_hexagon_C2_cmpltu (I32:$src1), + (I32:$src2))), + (i32 (C2_cmpgtu (I32:$src2), (I32:$src1)))>; /******************************************************************** -* MTYPE/MPYS * +* ALU32/VH * *********************************************************************/ +// Vector add, subtract, average halfwords +def: T_RR_pat<A2_svaddh, int_hexagon_A2_svaddh>; +def: T_RR_pat<A2_svaddhs, int_hexagon_A2_svaddhs>; +def: T_RR_pat<A2_svadduhs, int_hexagon_A2_svadduhs>; -// MTYPE / MPYS / Scalar 16x16 multiply signed. -//Rd=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1]| -// [:<<0[:rnd|:sat|:rnd:sat]|:<<1[:rnd|:sat|:rnd:sat]]] -def HEXAGON_M2_mpy_hh_s0: - si_MInst_sisi_hh <"mpy", int_hexagon_M2_mpy_hh_s0>; -def HEXAGON_M2_mpy_hh_s1: - si_MInst_sisi_hh_s1 <"mpy", int_hexagon_M2_mpy_hh_s1>; -def HEXAGON_M2_mpy_rnd_hh_s1: - si_MInst_sisi_rnd_hh_s1 <"mpy", int_hexagon_M2_mpy_rnd_hh_s1>; -def HEXAGON_M2_mpy_sat_rnd_hh_s1: - si_MInst_sisi_sat_rnd_hh_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_hh_s1>; -def HEXAGON_M2_mpy_sat_hh_s1: - si_MInst_sisi_sat_hh_s1 <"mpy", int_hexagon_M2_mpy_sat_hh_s1>; -def HEXAGON_M2_mpy_rnd_hh_s0: - si_MInst_sisi_rnd_hh <"mpy", int_hexagon_M2_mpy_rnd_hh_s0>; -def HEXAGON_M2_mpy_sat_rnd_hh_s0: - si_MInst_sisi_sat_rnd_hh <"mpy", int_hexagon_M2_mpy_sat_rnd_hh_s0>; -def HEXAGON_M2_mpy_sat_hh_s0: - si_MInst_sisi_sat_hh <"mpy", int_hexagon_M2_mpy_sat_hh_s0>; - -def HEXAGON_M2_mpy_hl_s0: - si_MInst_sisi_hl <"mpy", int_hexagon_M2_mpy_hl_s0>; -def HEXAGON_M2_mpy_hl_s1: - si_MInst_sisi_hl_s1 <"mpy", int_hexagon_M2_mpy_hl_s1>; -def HEXAGON_M2_mpy_rnd_hl_s1: - si_MInst_sisi_rnd_hl_s1 <"mpy", int_hexagon_M2_mpy_rnd_hl_s1>; -def HEXAGON_M2_mpy_sat_rnd_hl_s1: - si_MInst_sisi_sat_rnd_hl_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_hl_s1>; -def HEXAGON_M2_mpy_sat_hl_s1: - si_MInst_sisi_sat_hl_s1 <"mpy", int_hexagon_M2_mpy_sat_hl_s1>; -def HEXAGON_M2_mpy_rnd_hl_s0: - si_MInst_sisi_rnd_hl <"mpy", int_hexagon_M2_mpy_rnd_hl_s0>; -def HEXAGON_M2_mpy_sat_rnd_hl_s0: - si_MInst_sisi_sat_rnd_hl <"mpy", int_hexagon_M2_mpy_sat_rnd_hl_s0>; -def HEXAGON_M2_mpy_sat_hl_s0: - si_MInst_sisi_sat_hl <"mpy", int_hexagon_M2_mpy_sat_hl_s0>; - -def HEXAGON_M2_mpy_lh_s0: - si_MInst_sisi_lh <"mpy", int_hexagon_M2_mpy_lh_s0>; -def HEXAGON_M2_mpy_lh_s1: - si_MInst_sisi_lh_s1 <"mpy", int_hexagon_M2_mpy_lh_s1>; -def HEXAGON_M2_mpy_rnd_lh_s1: - si_MInst_sisi_rnd_lh_s1 <"mpy", int_hexagon_M2_mpy_rnd_lh_s1>; -def HEXAGON_M2_mpy_sat_rnd_lh_s1: - si_MInst_sisi_sat_rnd_lh_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_lh_s1>; -def HEXAGON_M2_mpy_sat_lh_s1: - si_MInst_sisi_sat_lh_s1 <"mpy", int_hexagon_M2_mpy_sat_lh_s1>; -def HEXAGON_M2_mpy_rnd_lh_s0: - si_MInst_sisi_rnd_lh <"mpy", int_hexagon_M2_mpy_rnd_lh_s0>; -def HEXAGON_M2_mpy_sat_rnd_lh_s0: - si_MInst_sisi_sat_rnd_lh <"mpy", int_hexagon_M2_mpy_sat_rnd_lh_s0>; -def HEXAGON_M2_mpy_sat_lh_s0: - si_MInst_sisi_sat_lh <"mpy", int_hexagon_M2_mpy_sat_lh_s0>; - -def HEXAGON_M2_mpy_ll_s0: - si_MInst_sisi_ll <"mpy", int_hexagon_M2_mpy_ll_s0>; -def HEXAGON_M2_mpy_ll_s1: - si_MInst_sisi_ll_s1 <"mpy", int_hexagon_M2_mpy_ll_s1>; -def HEXAGON_M2_mpy_rnd_ll_s1: - si_MInst_sisi_rnd_ll_s1 <"mpy", int_hexagon_M2_mpy_rnd_ll_s1>; -def HEXAGON_M2_mpy_sat_rnd_ll_s1: - si_MInst_sisi_sat_rnd_ll_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_ll_s1>; -def HEXAGON_M2_mpy_sat_ll_s1: - si_MInst_sisi_sat_ll_s1 <"mpy", int_hexagon_M2_mpy_sat_ll_s1>; -def HEXAGON_M2_mpy_rnd_ll_s0: - si_MInst_sisi_rnd_ll <"mpy", int_hexagon_M2_mpy_rnd_ll_s0>; -def HEXAGON_M2_mpy_sat_rnd_ll_s0: - si_MInst_sisi_sat_rnd_ll <"mpy", int_hexagon_M2_mpy_sat_rnd_ll_s0>; -def HEXAGON_M2_mpy_sat_ll_s0: - si_MInst_sisi_sat_ll <"mpy", int_hexagon_M2_mpy_sat_ll_s0>; - -//Rdd=mpy(Rs.[H|L],Rt.[H|L])[[:<<0|:<<1]|[:<<0:rnd|:<<1:rnd]] -def HEXAGON_M2_mpyd_hh_s0: - di_MInst_sisi_hh <"mpy", int_hexagon_M2_mpyd_hh_s0>; -def HEXAGON_M2_mpyd_hh_s1: - di_MInst_sisi_hh_s1 <"mpy", int_hexagon_M2_mpyd_hh_s1>; -def HEXAGON_M2_mpyd_rnd_hh_s1: - di_MInst_sisi_rnd_hh_s1 <"mpy", int_hexagon_M2_mpyd_rnd_hh_s1>; -def HEXAGON_M2_mpyd_rnd_hh_s0: - di_MInst_sisi_rnd_hh <"mpy", int_hexagon_M2_mpyd_rnd_hh_s0>; - -def HEXAGON_M2_mpyd_hl_s0: - di_MInst_sisi_hl <"mpy", int_hexagon_M2_mpyd_hl_s0>; -def HEXAGON_M2_mpyd_hl_s1: - di_MInst_sisi_hl_s1 <"mpy", int_hexagon_M2_mpyd_hl_s1>; -def HEXAGON_M2_mpyd_rnd_hl_s1: - di_MInst_sisi_rnd_hl_s1 <"mpy", int_hexagon_M2_mpyd_rnd_hl_s1>; -def HEXAGON_M2_mpyd_rnd_hl_s0: - di_MInst_sisi_rnd_hl <"mpy", int_hexagon_M2_mpyd_rnd_hl_s0>; - -def HEXAGON_M2_mpyd_lh_s0: - di_MInst_sisi_lh <"mpy", int_hexagon_M2_mpyd_lh_s0>; -def HEXAGON_M2_mpyd_lh_s1: - di_MInst_sisi_lh_s1 <"mpy", int_hexagon_M2_mpyd_lh_s1>; -def HEXAGON_M2_mpyd_rnd_lh_s1: - di_MInst_sisi_rnd_lh_s1 <"mpy", int_hexagon_M2_mpyd_rnd_lh_s1>; -def HEXAGON_M2_mpyd_rnd_lh_s0: - di_MInst_sisi_rnd_lh <"mpy", int_hexagon_M2_mpyd_rnd_lh_s0>; - -def HEXAGON_M2_mpyd_ll_s0: - di_MInst_sisi_ll <"mpy", int_hexagon_M2_mpyd_ll_s0>; -def HEXAGON_M2_mpyd_ll_s1: - di_MInst_sisi_ll_s1 <"mpy", int_hexagon_M2_mpyd_ll_s1>; -def HEXAGON_M2_mpyd_rnd_ll_s1: - di_MInst_sisi_rnd_ll_s1 <"mpy", int_hexagon_M2_mpyd_rnd_ll_s1>; -def HEXAGON_M2_mpyd_rnd_ll_s0: - di_MInst_sisi_rnd_ll <"mpy", int_hexagon_M2_mpyd_rnd_ll_s0>; - -//Rx+=mpy(Rs.[H|L],Rt.[H|L])[[[:<<0|:<<1]|[:<<0:sat|:<<1:sat]] -def HEXAGON_M2_mpy_acc_hh_s0: - si_MInst_sisisi_acc_hh <"mpy", int_hexagon_M2_mpy_acc_hh_s0>; -def HEXAGON_M2_mpy_acc_hh_s1: - si_MInst_sisisi_acc_hh_s1 <"mpy", int_hexagon_M2_mpy_acc_hh_s1>; -def HEXAGON_M2_mpy_acc_sat_hh_s1: - si_MInst_sisisi_acc_sat_hh_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_hh_s1>; -def HEXAGON_M2_mpy_acc_sat_hh_s0: - si_MInst_sisisi_acc_sat_hh <"mpy", int_hexagon_M2_mpy_acc_sat_hh_s0>; - -def HEXAGON_M2_mpy_acc_hl_s0: - si_MInst_sisisi_acc_hl <"mpy", int_hexagon_M2_mpy_acc_hl_s0>; -def HEXAGON_M2_mpy_acc_hl_s1: - si_MInst_sisisi_acc_hl_s1 <"mpy", int_hexagon_M2_mpy_acc_hl_s1>; -def HEXAGON_M2_mpy_acc_sat_hl_s1: - si_MInst_sisisi_acc_sat_hl_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_hl_s1>; -def HEXAGON_M2_mpy_acc_sat_hl_s0: - si_MInst_sisisi_acc_sat_hl <"mpy", int_hexagon_M2_mpy_acc_sat_hl_s0>; - -def HEXAGON_M2_mpy_acc_lh_s0: - si_MInst_sisisi_acc_lh <"mpy", int_hexagon_M2_mpy_acc_lh_s0>; -def HEXAGON_M2_mpy_acc_lh_s1: - si_MInst_sisisi_acc_lh_s1 <"mpy", int_hexagon_M2_mpy_acc_lh_s1>; -def HEXAGON_M2_mpy_acc_sat_lh_s1: - si_MInst_sisisi_acc_sat_lh_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_lh_s1>; -def HEXAGON_M2_mpy_acc_sat_lh_s0: - si_MInst_sisisi_acc_sat_lh <"mpy", int_hexagon_M2_mpy_acc_sat_lh_s0>; - -def HEXAGON_M2_mpy_acc_ll_s0: - si_MInst_sisisi_acc_ll <"mpy", int_hexagon_M2_mpy_acc_ll_s0>; -def HEXAGON_M2_mpy_acc_ll_s1: - si_MInst_sisisi_acc_ll_s1 <"mpy", int_hexagon_M2_mpy_acc_ll_s1>; -def HEXAGON_M2_mpy_acc_sat_ll_s1: - si_MInst_sisisi_acc_sat_ll_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_ll_s1>; -def HEXAGON_M2_mpy_acc_sat_ll_s0: - si_MInst_sisisi_acc_sat_ll <"mpy", int_hexagon_M2_mpy_acc_sat_ll_s0>; - -//Rx-=mpy(Rs.[H|L],Rt.[H|L])[[[:<<0|:<<1]|[:<<0:sat|:<<1:sat]] -def HEXAGON_M2_mpy_nac_hh_s0: - si_MInst_sisisi_nac_hh <"mpy", int_hexagon_M2_mpy_nac_hh_s0>; -def HEXAGON_M2_mpy_nac_hh_s1: - si_MInst_sisisi_nac_hh_s1 <"mpy", int_hexagon_M2_mpy_nac_hh_s1>; -def HEXAGON_M2_mpy_nac_sat_hh_s1: - si_MInst_sisisi_nac_sat_hh_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_hh_s1>; -def HEXAGON_M2_mpy_nac_sat_hh_s0: - si_MInst_sisisi_nac_sat_hh <"mpy", int_hexagon_M2_mpy_nac_sat_hh_s0>; - -def HEXAGON_M2_mpy_nac_hl_s0: - si_MInst_sisisi_nac_hl <"mpy", int_hexagon_M2_mpy_nac_hl_s0>; -def HEXAGON_M2_mpy_nac_hl_s1: - si_MInst_sisisi_nac_hl_s1 <"mpy", int_hexagon_M2_mpy_nac_hl_s1>; -def HEXAGON_M2_mpy_nac_sat_hl_s1: - si_MInst_sisisi_nac_sat_hl_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_hl_s1>; -def HEXAGON_M2_mpy_nac_sat_hl_s0: - si_MInst_sisisi_nac_sat_hl <"mpy", int_hexagon_M2_mpy_nac_sat_hl_s0>; - -def HEXAGON_M2_mpy_nac_lh_s0: - si_MInst_sisisi_nac_lh <"mpy", int_hexagon_M2_mpy_nac_lh_s0>; -def HEXAGON_M2_mpy_nac_lh_s1: - si_MInst_sisisi_nac_lh_s1 <"mpy", int_hexagon_M2_mpy_nac_lh_s1>; -def HEXAGON_M2_mpy_nac_sat_lh_s1: - si_MInst_sisisi_nac_sat_lh_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_lh_s1>; -def HEXAGON_M2_mpy_nac_sat_lh_s0: - si_MInst_sisisi_nac_sat_lh <"mpy", int_hexagon_M2_mpy_nac_sat_lh_s0>; - -def HEXAGON_M2_mpy_nac_ll_s0: - si_MInst_sisisi_nac_ll <"mpy", int_hexagon_M2_mpy_nac_ll_s0>; -def HEXAGON_M2_mpy_nac_ll_s1: - si_MInst_sisisi_nac_ll_s1 <"mpy", int_hexagon_M2_mpy_nac_ll_s1>; -def HEXAGON_M2_mpy_nac_sat_ll_s1: - si_MInst_sisisi_nac_sat_ll_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_ll_s1>; -def HEXAGON_M2_mpy_nac_sat_ll_s0: - si_MInst_sisisi_nac_sat_ll <"mpy", int_hexagon_M2_mpy_nac_sat_ll_s0>; - -//Rx+=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1] -def HEXAGON_M2_mpyd_acc_hh_s0: - di_MInst_disisi_acc_hh <"mpy", int_hexagon_M2_mpyd_acc_hh_s0>; -def HEXAGON_M2_mpyd_acc_hh_s1: - di_MInst_disisi_acc_hh_s1 <"mpy", int_hexagon_M2_mpyd_acc_hh_s1>; - -def HEXAGON_M2_mpyd_acc_hl_s0: - di_MInst_disisi_acc_hl <"mpy", int_hexagon_M2_mpyd_acc_hl_s0>; -def HEXAGON_M2_mpyd_acc_hl_s1: - di_MInst_disisi_acc_hl_s1 <"mpy", int_hexagon_M2_mpyd_acc_hl_s1>; - -def HEXAGON_M2_mpyd_acc_lh_s0: - di_MInst_disisi_acc_lh <"mpy", int_hexagon_M2_mpyd_acc_lh_s0>; -def HEXAGON_M2_mpyd_acc_lh_s1: - di_MInst_disisi_acc_lh_s1 <"mpy", int_hexagon_M2_mpyd_acc_lh_s1>; - -def HEXAGON_M2_mpyd_acc_ll_s0: - di_MInst_disisi_acc_ll <"mpy", int_hexagon_M2_mpyd_acc_ll_s0>; -def HEXAGON_M2_mpyd_acc_ll_s1: - di_MInst_disisi_acc_ll_s1 <"mpy", int_hexagon_M2_mpyd_acc_ll_s1>; - -//Rx-=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1] -def HEXAGON_M2_mpyd_nac_hh_s0: - di_MInst_disisi_nac_hh <"mpy", int_hexagon_M2_mpyd_nac_hh_s0>; -def HEXAGON_M2_mpyd_nac_hh_s1: - di_MInst_disisi_nac_hh_s1 <"mpy", int_hexagon_M2_mpyd_nac_hh_s1>; - -def HEXAGON_M2_mpyd_nac_hl_s0: - di_MInst_disisi_nac_hl <"mpy", int_hexagon_M2_mpyd_nac_hl_s0>; -def HEXAGON_M2_mpyd_nac_hl_s1: - di_MInst_disisi_nac_hl_s1 <"mpy", int_hexagon_M2_mpyd_nac_hl_s1>; - -def HEXAGON_M2_mpyd_nac_lh_s0: - di_MInst_disisi_nac_lh <"mpy", int_hexagon_M2_mpyd_nac_lh_s0>; -def HEXAGON_M2_mpyd_nac_lh_s1: - di_MInst_disisi_nac_lh_s1 <"mpy", int_hexagon_M2_mpyd_nac_lh_s1>; - -def HEXAGON_M2_mpyd_nac_ll_s0: - di_MInst_disisi_nac_ll <"mpy", int_hexagon_M2_mpyd_nac_ll_s0>; -def HEXAGON_M2_mpyd_nac_ll_s1: - di_MInst_disisi_nac_ll_s1 <"mpy", int_hexagon_M2_mpyd_nac_ll_s1>; - -// MTYPE / MPYS / Scalar 16x16 multiply unsigned. -//Rd=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1] -def HEXAGON_M2_mpyu_hh_s0: - si_MInst_sisi_hh <"mpyu", int_hexagon_M2_mpyu_hh_s0>; -def HEXAGON_M2_mpyu_hh_s1: - si_MInst_sisi_hh_s1 <"mpyu", int_hexagon_M2_mpyu_hh_s1>; -def HEXAGON_M2_mpyu_hl_s0: - si_MInst_sisi_hl <"mpyu", int_hexagon_M2_mpyu_hl_s0>; -def HEXAGON_M2_mpyu_hl_s1: - si_MInst_sisi_hl_s1 <"mpyu", int_hexagon_M2_mpyu_hl_s1>; -def HEXAGON_M2_mpyu_lh_s0: - si_MInst_sisi_lh <"mpyu", int_hexagon_M2_mpyu_lh_s0>; -def HEXAGON_M2_mpyu_lh_s1: - si_MInst_sisi_lh_s1 <"mpyu", int_hexagon_M2_mpyu_lh_s1>; -def HEXAGON_M2_mpyu_ll_s0: - si_MInst_sisi_ll <"mpyu", int_hexagon_M2_mpyu_ll_s0>; -def HEXAGON_M2_mpyu_ll_s1: - si_MInst_sisi_ll_s1 <"mpyu", int_hexagon_M2_mpyu_ll_s1>; - -//Rdd=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1] -def HEXAGON_M2_mpyud_hh_s0: - di_MInst_sisi_hh <"mpyu", int_hexagon_M2_mpyud_hh_s0>; -def HEXAGON_M2_mpyud_hh_s1: - di_MInst_sisi_hh_s1 <"mpyu", int_hexagon_M2_mpyud_hh_s1>; -def HEXAGON_M2_mpyud_hl_s0: - di_MInst_sisi_hl <"mpyu", int_hexagon_M2_mpyud_hl_s0>; -def HEXAGON_M2_mpyud_hl_s1: - di_MInst_sisi_hl_s1 <"mpyu", int_hexagon_M2_mpyud_hl_s1>; -def HEXAGON_M2_mpyud_lh_s0: - di_MInst_sisi_lh <"mpyu", int_hexagon_M2_mpyud_lh_s0>; -def HEXAGON_M2_mpyud_lh_s1: - di_MInst_sisi_lh_s1 <"mpyu", int_hexagon_M2_mpyud_lh_s1>; -def HEXAGON_M2_mpyud_ll_s0: - di_MInst_sisi_ll <"mpyu", int_hexagon_M2_mpyud_ll_s0>; -def HEXAGON_M2_mpyud_ll_s1: - di_MInst_sisi_ll_s1 <"mpyu", int_hexagon_M2_mpyud_ll_s1>; - -//Rd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1] -def HEXAGON_M2_mpyu_acc_hh_s0: - si_MInst_sisisi_acc_hh <"mpyu", int_hexagon_M2_mpyu_acc_hh_s0>; -def HEXAGON_M2_mpyu_acc_hh_s1: - si_MInst_sisisi_acc_hh_s1 <"mpyu", int_hexagon_M2_mpyu_acc_hh_s1>; -def HEXAGON_M2_mpyu_acc_hl_s0: - si_MInst_sisisi_acc_hl <"mpyu", int_hexagon_M2_mpyu_acc_hl_s0>; -def HEXAGON_M2_mpyu_acc_hl_s1: - si_MInst_sisisi_acc_hl_s1 <"mpyu", int_hexagon_M2_mpyu_acc_hl_s1>; -def HEXAGON_M2_mpyu_acc_lh_s0: - si_MInst_sisisi_acc_lh <"mpyu", int_hexagon_M2_mpyu_acc_lh_s0>; -def HEXAGON_M2_mpyu_acc_lh_s1: - si_MInst_sisisi_acc_lh_s1 <"mpyu", int_hexagon_M2_mpyu_acc_lh_s1>; -def HEXAGON_M2_mpyu_acc_ll_s0: - si_MInst_sisisi_acc_ll <"mpyu", int_hexagon_M2_mpyu_acc_ll_s0>; -def HEXAGON_M2_mpyu_acc_ll_s1: - si_MInst_sisisi_acc_ll_s1 <"mpyu", int_hexagon_M2_mpyu_acc_ll_s1>; - -//Rd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1] -def HEXAGON_M2_mpyu_nac_hh_s0: - si_MInst_sisisi_nac_hh <"mpyu", int_hexagon_M2_mpyu_nac_hh_s0>; -def HEXAGON_M2_mpyu_nac_hh_s1: - si_MInst_sisisi_nac_hh_s1 <"mpyu", int_hexagon_M2_mpyu_nac_hh_s1>; -def HEXAGON_M2_mpyu_nac_hl_s0: - si_MInst_sisisi_nac_hl <"mpyu", int_hexagon_M2_mpyu_nac_hl_s0>; -def HEXAGON_M2_mpyu_nac_hl_s1: - si_MInst_sisisi_nac_hl_s1 <"mpyu", int_hexagon_M2_mpyu_nac_hl_s1>; -def HEXAGON_M2_mpyu_nac_lh_s0: - si_MInst_sisisi_nac_lh <"mpyu", int_hexagon_M2_mpyu_nac_lh_s0>; -def HEXAGON_M2_mpyu_nac_lh_s1: - si_MInst_sisisi_nac_lh_s1 <"mpyu", int_hexagon_M2_mpyu_nac_lh_s1>; -def HEXAGON_M2_mpyu_nac_ll_s0: - si_MInst_sisisi_nac_ll <"mpyu", int_hexagon_M2_mpyu_nac_ll_s0>; -def HEXAGON_M2_mpyu_nac_ll_s1: - si_MInst_sisisi_nac_ll_s1 <"mpyu", int_hexagon_M2_mpyu_nac_ll_s1>; - -//Rdd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1] -def HEXAGON_M2_mpyud_acc_hh_s0: - di_MInst_disisi_acc_hh <"mpyu", int_hexagon_M2_mpyud_acc_hh_s0>; -def HEXAGON_M2_mpyud_acc_hh_s1: - di_MInst_disisi_acc_hh_s1 <"mpyu", int_hexagon_M2_mpyud_acc_hh_s1>; -def HEXAGON_M2_mpyud_acc_hl_s0: - di_MInst_disisi_acc_hl <"mpyu", int_hexagon_M2_mpyud_acc_hl_s0>; -def HEXAGON_M2_mpyud_acc_hl_s1: - di_MInst_disisi_acc_hl_s1 <"mpyu", int_hexagon_M2_mpyud_acc_hl_s1>; -def HEXAGON_M2_mpyud_acc_lh_s0: - di_MInst_disisi_acc_lh <"mpyu", int_hexagon_M2_mpyud_acc_lh_s0>; -def HEXAGON_M2_mpyud_acc_lh_s1: - di_MInst_disisi_acc_lh_s1 <"mpyu", int_hexagon_M2_mpyud_acc_lh_s1>; -def HEXAGON_M2_mpyud_acc_ll_s0: - di_MInst_disisi_acc_ll <"mpyu", int_hexagon_M2_mpyud_acc_ll_s0>; -def HEXAGON_M2_mpyud_acc_ll_s1: - di_MInst_disisi_acc_ll_s1 <"mpyu", int_hexagon_M2_mpyud_acc_ll_s1>; - -//Rdd-=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1] -def HEXAGON_M2_mpyud_nac_hh_s0: - di_MInst_disisi_nac_hh <"mpyu", int_hexagon_M2_mpyud_nac_hh_s0>; -def HEXAGON_M2_mpyud_nac_hh_s1: - di_MInst_disisi_nac_hh_s1 <"mpyu", int_hexagon_M2_mpyud_nac_hh_s1>; -def HEXAGON_M2_mpyud_nac_hl_s0: - di_MInst_disisi_nac_hl <"mpyu", int_hexagon_M2_mpyud_nac_hl_s0>; -def HEXAGON_M2_mpyud_nac_hl_s1: - di_MInst_disisi_nac_hl_s1 <"mpyu", int_hexagon_M2_mpyud_nac_hl_s1>; -def HEXAGON_M2_mpyud_nac_lh_s0: - di_MInst_disisi_nac_lh <"mpyu", int_hexagon_M2_mpyud_nac_lh_s0>; -def HEXAGON_M2_mpyud_nac_lh_s1: - di_MInst_disisi_nac_lh_s1 <"mpyu", int_hexagon_M2_mpyud_nac_lh_s1>; -def HEXAGON_M2_mpyud_nac_ll_s0: - di_MInst_disisi_nac_ll <"mpyu", int_hexagon_M2_mpyud_nac_ll_s0>; -def HEXAGON_M2_mpyud_nac_ll_s1: - di_MInst_disisi_nac_ll_s1 <"mpyu", int_hexagon_M2_mpyud_nac_ll_s1>; +def: T_RR_pat<A2_svsubh, int_hexagon_A2_svsubh>; +def: T_RR_pat<A2_svsubhs, int_hexagon_A2_svsubhs>; +def: T_RR_pat<A2_svsubuhs, int_hexagon_A2_svsubuhs>; +def: T_RR_pat<A2_svavgh, int_hexagon_A2_svavgh>; +def: T_RR_pat<A2_svavghs, int_hexagon_A2_svavghs>; +def: T_RR_pat<A2_svnavgh, int_hexagon_A2_svnavgh>; /******************************************************************** -* MTYPE/VB * +* ALU64/ALU * *********************************************************************/ +def: T_RR_pat<A2_addsat, int_hexagon_A2_addsat>; +def: T_RR_pat<A2_subsat, int_hexagon_A2_subsat>; +def: T_PP_pat<A2_addp, int_hexagon_A2_addp>; +def: T_PP_pat<A2_subp, int_hexagon_A2_subp>; + +def: T_PP_pat<A2_andp, int_hexagon_A2_andp>; +def: T_PP_pat<A2_orp, int_hexagon_A2_orp>; +def: T_PP_pat<A2_xorp, int_hexagon_A2_xorp>; -// MTYPE / VB / Vector reduce add unsigned bytes. -def HEXAGON_A2_vraddub: - di_MInst_didi <"vraddub", int_hexagon_A2_vraddub>; -def HEXAGON_A2_vraddub_acc: - di_MInst_dididi_acc <"vraddub", int_hexagon_A2_vraddub_acc>; +def: T_PP_pat<C2_cmpeqp, int_hexagon_C2_cmpeqp>; +def: T_PP_pat<C2_cmpgtp, int_hexagon_C2_cmpgtp>; +def: T_PP_pat<C2_cmpgtup, int_hexagon_C2_cmpgtup>; -// MTYPE / VB / Vector sum of absolute differences unsigned bytes. -def HEXAGON_A2_vrsadub: - di_MInst_didi <"vrsadub", int_hexagon_A2_vrsadub>; -def HEXAGON_A2_vrsadub_acc: - di_MInst_dididi_acc <"vrsadub", int_hexagon_A2_vrsadub_acc>; +def: T_PP_pat<S2_parityp, int_hexagon_S2_parityp>; +def: T_RR_pat<S2_packhl, int_hexagon_S2_packhl>; /******************************************************************** -* MTYPE/VH * +* ALU64/VB * *********************************************************************/ +// ALU64 - Vector add +def : T_PP_pat <A2_vaddub, int_hexagon_A2_vaddub>; +def : T_PP_pat <A2_vaddubs, int_hexagon_A2_vaddubs>; +def : T_PP_pat <A2_vaddh, int_hexagon_A2_vaddh>; +def : T_PP_pat <A2_vaddhs, int_hexagon_A2_vaddhs>; +def : T_PP_pat <A2_vadduhs, int_hexagon_A2_vadduhs>; +def : T_PP_pat <A2_vaddw, int_hexagon_A2_vaddw>; +def : T_PP_pat <A2_vaddws, int_hexagon_A2_vaddws>; + +// ALU64 - Vector average +def : T_PP_pat <A2_vavgub, int_hexagon_A2_vavgub>; +def : T_PP_pat <A2_vavgubr, int_hexagon_A2_vavgubr>; +def : T_PP_pat <A2_vavgh, int_hexagon_A2_vavgh>; +def : T_PP_pat <A2_vavghr, int_hexagon_A2_vavghr>; +def : T_PP_pat <A2_vavghcr, int_hexagon_A2_vavghcr>; +def : T_PP_pat <A2_vavguh, int_hexagon_A2_vavguh>; +def : T_PP_pat <A2_vavguhr, int_hexagon_A2_vavguhr>; + +def : T_PP_pat <A2_vavgw, int_hexagon_A2_vavgw>; +def : T_PP_pat <A2_vavgwr, int_hexagon_A2_vavgwr>; +def : T_PP_pat <A2_vavgwcr, int_hexagon_A2_vavgwcr>; +def : T_PP_pat <A2_vavguw, int_hexagon_A2_vavguw>; +def : T_PP_pat <A2_vavguwr, int_hexagon_A2_vavguwr>; + +// ALU64 - Vector negative average +def : T_PP_pat <A2_vnavgh, int_hexagon_A2_vnavgh>; +def : T_PP_pat <A2_vnavghr, int_hexagon_A2_vnavghr>; +def : T_PP_pat <A2_vnavghcr, int_hexagon_A2_vnavghcr>; +def : T_PP_pat <A2_vnavgw, int_hexagon_A2_vnavgw>; +def : T_PP_pat <A2_vnavgwr, int_hexagon_A2_vnavgwr>; +def : T_PP_pat <A2_vnavgwcr, int_hexagon_A2_vnavgwcr>; + +// ALU64 - Vector max +def : T_PP_pat <A2_vmaxh, int_hexagon_A2_vmaxh>; +def : T_PP_pat <A2_vmaxw, int_hexagon_A2_vmaxw>; +def : T_PP_pat <A2_vmaxub, int_hexagon_A2_vmaxub>; +def : T_PP_pat <A2_vmaxuh, int_hexagon_A2_vmaxuh>; +def : T_PP_pat <A2_vmaxuw, int_hexagon_A2_vmaxuw>; + +// ALU64 - Vector min +def : T_PP_pat <A2_vminh, int_hexagon_A2_vminh>; +def : T_PP_pat <A2_vminw, int_hexagon_A2_vminw>; +def : T_PP_pat <A2_vminub, int_hexagon_A2_vminub>; +def : T_PP_pat <A2_vminuh, int_hexagon_A2_vminuh>; +def : T_PP_pat <A2_vminuw, int_hexagon_A2_vminuw>; + +// ALU64 - Vector sub +def : T_PP_pat <A2_vsubub, int_hexagon_A2_vsubub>; +def : T_PP_pat <A2_vsububs, int_hexagon_A2_vsububs>; +def : T_PP_pat <A2_vsubh, int_hexagon_A2_vsubh>; +def : T_PP_pat <A2_vsubhs, int_hexagon_A2_vsubhs>; +def : T_PP_pat <A2_vsubuhs, int_hexagon_A2_vsubuhs>; +def : T_PP_pat <A2_vsubw, int_hexagon_A2_vsubw>; +def : T_PP_pat <A2_vsubws, int_hexagon_A2_vsubws>; + +// ALU64 - Vector compare bytes +def : T_PP_pat <A2_vcmpbeq, int_hexagon_A2_vcmpbeq>; +def : T_PP_pat <A4_vcmpbgt, int_hexagon_A4_vcmpbgt>; +def : T_PP_pat <A2_vcmpbgtu, int_hexagon_A2_vcmpbgtu>; + +// ALU64 - Vector compare halfwords +def : T_PP_pat <A2_vcmpheq, int_hexagon_A2_vcmpheq>; +def : T_PP_pat <A2_vcmphgt, int_hexagon_A2_vcmphgt>; +def : T_PP_pat <A2_vcmphgtu, int_hexagon_A2_vcmphgtu>; + +// ALU64 - Vector compare words +def : T_PP_pat <A2_vcmpweq, int_hexagon_A2_vcmpweq>; +def : T_PP_pat <A2_vcmpwgt, int_hexagon_A2_vcmpwgt>; +def : T_PP_pat <A2_vcmpwgtu, int_hexagon_A2_vcmpwgtu>; -// MTYPE / VH / Vector dual multiply. -def HEXAGON_M2_vdmpys_s1: - di_MInst_didi_s1_sat <"vdmpy", int_hexagon_M2_vdmpys_s1>; -def HEXAGON_M2_vdmpys_s0: - di_MInst_didi_sat <"vdmpy", int_hexagon_M2_vdmpys_s0>; -def HEXAGON_M2_vdmacs_s1: - di_MInst_dididi_acc_s1_sat <"vdmpy", int_hexagon_M2_vdmacs_s1>; -def HEXAGON_M2_vdmacs_s0: - di_MInst_dididi_acc_sat <"vdmpy", int_hexagon_M2_vdmacs_s0>; - -// MTYPE / VH / Vector dual multiply with round and pack. -def HEXAGON_M2_vdmpyrs_s0: - si_MInst_didi_rnd_sat <"vdmpy", int_hexagon_M2_vdmpyrs_s0>; -def HEXAGON_M2_vdmpyrs_s1: - si_MInst_didi_s1_rnd_sat <"vdmpy", int_hexagon_M2_vdmpyrs_s1>; - -// MTYPE / VH / Vector multiply even halfwords. -def HEXAGON_M2_vmpy2es_s1: - di_MInst_didi_s1_sat <"vmpyeh", int_hexagon_M2_vmpy2es_s1>; -def HEXAGON_M2_vmpy2es_s0: - di_MInst_didi_sat <"vmpyeh", int_hexagon_M2_vmpy2es_s0>; -def HEXAGON_M2_vmac2es: - di_MInst_dididi_acc <"vmpyeh", int_hexagon_M2_vmac2es>; -def HEXAGON_M2_vmac2es_s1: - di_MInst_dididi_acc_s1_sat <"vmpyeh", int_hexagon_M2_vmac2es_s1>; -def HEXAGON_M2_vmac2es_s0: - di_MInst_dididi_acc_sat <"vmpyeh", int_hexagon_M2_vmac2es_s0>; - -// MTYPE / VH / Vector multiply halfwords. -def HEXAGON_M2_vmpy2s_s0: - di_MInst_sisi_sat <"vmpyh", int_hexagon_M2_vmpy2s_s0>; -def HEXAGON_M2_vmpy2s_s1: - di_MInst_sisi_s1_sat <"vmpyh", int_hexagon_M2_vmpy2s_s1>; -def HEXAGON_M2_vmac2: - di_MInst_disisi_acc <"vmpyh", int_hexagon_M2_vmac2>; -def HEXAGON_M2_vmac2s_s0: - di_MInst_disisi_acc_sat <"vmpyh", int_hexagon_M2_vmac2s_s0>; -def HEXAGON_M2_vmac2s_s1: - di_MInst_disisi_acc_s1_sat <"vmpyh", int_hexagon_M2_vmac2s_s1>; - -// MTYPE / VH / Vector multiply halfwords with round and pack. -def HEXAGON_M2_vmpy2s_s0pack: - si_MInst_sisi_rnd_sat <"vmpyh", int_hexagon_M2_vmpy2s_s0pack>; -def HEXAGON_M2_vmpy2s_s1pack: - si_MInst_sisi_s1_rnd_sat <"vmpyh", int_hexagon_M2_vmpy2s_s1pack>; - -// MTYPE / VH / Vector reduce multiply halfwords. -// Rxx32+=vrmpyh(Rss32,Rtt32) -def HEXAGON_M2_vrmpy_s0: - di_MInst_didi <"vrmpyh", int_hexagon_M2_vrmpy_s0>; -def HEXAGON_M2_vrmac_s0: - di_MInst_dididi_acc <"vrmpyh", int_hexagon_M2_vrmac_s0>; - +// ALU64 / VB / Vector mux. +def : Pat<(int_hexagon_C2_vmux PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt), + (C2_vmux PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt)>; + +// MPY - Multiply and use full result +// Rdd = mpy[u](Rs, Rt) +def : T_RR_pat <M2_dpmpyss_s0, int_hexagon_M2_dpmpyss_s0>; +def : T_RR_pat <M2_dpmpyuu_s0, int_hexagon_M2_dpmpyuu_s0>; + +// Complex multiply real or imaginary +def : T_RR_pat <M2_cmpyi_s0, int_hexagon_M2_cmpyi_s0>; +def : T_RR_pat <M2_cmpyr_s0, int_hexagon_M2_cmpyr_s0>; + +// Complex multiply +def : T_RR_pat <M2_cmpys_s0, int_hexagon_M2_cmpys_s0>; +def : T_RR_pat <M2_cmpysc_s0, int_hexagon_M2_cmpysc_s0>; +def : T_RR_pat <M2_cmpys_s1, int_hexagon_M2_cmpys_s1>; +def : T_RR_pat <M2_cmpysc_s1, int_hexagon_M2_cmpysc_s1>; + +// Vector multiply halfwords +// Rdd=vmpyh(Rs,Rt)[:<<1]:sat +def : T_RR_pat <M2_vmpy2s_s0, int_hexagon_M2_vmpy2s_s0>; +def : T_RR_pat <M2_vmpy2s_s1, int_hexagon_M2_vmpy2s_s1>; + +// Rxx[+-]= mpy[u](Rs,Rt) +def : T_PRR_pat <M2_dpmpyss_acc_s0, int_hexagon_M2_dpmpyss_acc_s0>; +def : T_PRR_pat <M2_dpmpyss_nac_s0, int_hexagon_M2_dpmpyss_nac_s0>; +def : T_PRR_pat <M2_dpmpyuu_acc_s0, int_hexagon_M2_dpmpyuu_acc_s0>; +def : T_PRR_pat <M2_dpmpyuu_nac_s0, int_hexagon_M2_dpmpyuu_nac_s0>; + +// Rxx[-+]=cmpy(Rs,Rt)[:<<1]:sat +def : T_PRR_pat <M2_cmacs_s0, int_hexagon_M2_cmacs_s0>; +def : T_PRR_pat <M2_cnacs_s0, int_hexagon_M2_cnacs_s0>; +def : T_PRR_pat <M2_cmacs_s1, int_hexagon_M2_cmacs_s1>; +def : T_PRR_pat <M2_cnacs_s1, int_hexagon_M2_cnacs_s1>; + +// Rxx[-+]=cmpy(Rs,Rt*)[:<<1]:sat +def : T_PRR_pat <M2_cmacsc_s0, int_hexagon_M2_cmacsc_s0>; +def : T_PRR_pat <M2_cnacsc_s0, int_hexagon_M2_cnacsc_s0>; +def : T_PRR_pat <M2_cmacsc_s1, int_hexagon_M2_cmacsc_s1>; +def : T_PRR_pat <M2_cnacsc_s1, int_hexagon_M2_cnacsc_s1>; + +// Rxx+=cmpy[ir](Rs,Rt) +def : T_PRR_pat <M2_cmaci_s0, int_hexagon_M2_cmaci_s0>; +def : T_PRR_pat <M2_cmacr_s0, int_hexagon_M2_cmacr_s0>; + +// Rxx+=vmpyh(Rs,Rt)[:<<1][:sat] +def : T_PRR_pat <M2_vmac2, int_hexagon_M2_vmac2>; +def : T_PRR_pat <M2_vmac2s_s0, int_hexagon_M2_vmac2s_s0>; +def : T_PRR_pat <M2_vmac2s_s1, int_hexagon_M2_vmac2s_s1>; /******************************************************************** -* STYPE/ALU * +* CR * *********************************************************************/ +class qi_CRInst_qi_pat<InstHexagon Inst, Intrinsic IntID> : + Pat<(i32 (IntID IntRegs:$Rs)), + (i32 (C2_tfrpr (Inst (C2_tfrrp IntRegs:$Rs))))>; + +class qi_CRInst_qiqi_pat<InstHexagon Inst, Intrinsic IntID> : + Pat<(i32 (IntID IntRegs:$Rs, IntRegs:$Rt)), + (i32 (C2_tfrpr (Inst (C2_tfrrp IntRegs:$Rs), (C2_tfrrp IntRegs:$Rt))))>; + +def: qi_CRInst_qi_pat<C2_not, int_hexagon_C2_not>; +def: qi_CRInst_qi_pat<C2_all8, int_hexagon_C2_all8>; +def: qi_CRInst_qi_pat<C2_any8, int_hexagon_C2_any8>; + +def: qi_CRInst_qiqi_pat<C2_and, int_hexagon_C2_and>; +def: qi_CRInst_qiqi_pat<C2_andn, int_hexagon_C2_andn>; +def: qi_CRInst_qiqi_pat<C2_or, int_hexagon_C2_or>; +def: qi_CRInst_qiqi_pat<C2_orn, int_hexagon_C2_orn>; +def: qi_CRInst_qiqi_pat<C2_xor, int_hexagon_C2_xor>; + +// Multiply 32x32 and use lower result +def : T_RRI_pat <M2_macsip, int_hexagon_M2_macsip>; +def : T_RRI_pat <M2_macsin, int_hexagon_M2_macsin>; +def : T_RRR_pat <M2_maci, int_hexagon_M2_maci>; + +// Subtract and accumulate +def : T_RRR_pat <M2_subacc, int_hexagon_M2_subacc>; + +// Add and accumulate +def : T_RRR_pat <M2_acci, int_hexagon_M2_acci>; +def : T_RRR_pat <M2_nacci, int_hexagon_M2_nacci>; +def : T_RRI_pat <M2_accii, int_hexagon_M2_accii>; +def : T_RRI_pat <M2_naccii, int_hexagon_M2_naccii>; + +// XOR and XOR with destination +def : T_RRR_pat <M2_xor_xacc, int_hexagon_M2_xor_xacc>; + +class MType_R32_pat <Intrinsic IntID, InstHexagon OutputInst> : + Pat <(IntID IntRegs:$src1, IntRegs:$src2), + (OutputInst IntRegs:$src1, IntRegs:$src2)>; -// STYPE / ALU / Absolute value. -def HEXAGON_A2_abs: - si_SInst_si <"abs", int_hexagon_A2_abs>; -def HEXAGON_A2_absp: - di_SInst_di <"abs", int_hexagon_A2_absp>; -def HEXAGON_A2_abssat: - si_SInst_si_sat <"abs", int_hexagon_A2_abssat>; +// Vector dual multiply with round and pack -// STYPE / ALU / Negate. -def HEXAGON_A2_negp: - di_SInst_di <"neg", int_hexagon_A2_negp>; -def HEXAGON_A2_negsat: - si_SInst_si_sat <"neg", int_hexagon_A2_negsat>; +def : Pat <(int_hexagon_M2_vdmpyrs_s0 DoubleRegs:$src1, DoubleRegs:$src2), + (M2_vdmpyrs_s0 DoubleRegs:$src1, DoubleRegs:$src2)>; -// STYPE / ALU / Logical Not. -def HEXAGON_A2_notp: - di_SInst_di <"not", int_hexagon_A2_notp>; +def : Pat <(int_hexagon_M2_vdmpyrs_s1 DoubleRegs:$src1, DoubleRegs:$src2), + (M2_vdmpyrs_s1 DoubleRegs:$src1, DoubleRegs:$src2)>; -// STYPE / ALU / Sign extend word to doubleword. -def HEXAGON_A2_sxtw: - di_SInst_si <"sxtw", int_hexagon_A2_sxtw>; +// Vector multiply halfwords with round and pack +def : MType_R32_pat <int_hexagon_M2_vmpy2s_s0pack, M2_vmpy2s_s0pack>; +def : MType_R32_pat <int_hexagon_M2_vmpy2s_s1pack, M2_vmpy2s_s1pack>; + +// Multiply and use lower result +def : MType_R32_pat <int_hexagon_M2_mpyi, M2_mpyi>; +def : T_RI_pat<M2_mpysmi, int_hexagon_M2_mpysmi>; + +// Assembler mapped from Rd32=mpyui(Rs32,Rt32) to Rd32=mpyi(Rs32,Rt32) +def : MType_R32_pat <int_hexagon_M2_mpyui, M2_mpyi>; + +// Multiply and use upper result +def : MType_R32_pat <int_hexagon_M2_mpy_up, M2_mpy_up>; +def : MType_R32_pat <int_hexagon_M2_mpyu_up, M2_mpyu_up>; +def : MType_R32_pat <int_hexagon_M2_hmmpyh_rs1, M2_hmmpyh_rs1>; +def : MType_R32_pat <int_hexagon_M2_hmmpyl_rs1, M2_hmmpyl_rs1>; +def : MType_R32_pat <int_hexagon_M2_dpmpyss_rnd_s0, M2_dpmpyss_rnd_s0>; + +// Complex multiply with round and pack +// Rxx32+=cmpy(Rs32,[*]Rt32:<<1]:rnd:sat +def : MType_R32_pat <int_hexagon_M2_cmpyrs_s0, M2_cmpyrs_s0>; +def : MType_R32_pat <int_hexagon_M2_cmpyrs_s1, M2_cmpyrs_s1>; +def : MType_R32_pat <int_hexagon_M2_cmpyrsc_s0, M2_cmpyrsc_s0>; +def : MType_R32_pat <int_hexagon_M2_cmpyrsc_s1, M2_cmpyrsc_s1>; /******************************************************************** -* STYPE/BIT * +* STYPE/ALU * *********************************************************************/ +def : T_P_pat <A2_absp, int_hexagon_A2_absp>; +def : T_P_pat <A2_negp, int_hexagon_A2_negp>; +def : T_P_pat <A2_notp, int_hexagon_A2_notp>; -// STYPE / BIT / Count leading. -def HEXAGON_S2_cl0: - si_SInst_si <"cl0", int_hexagon_S2_cl0>; -def HEXAGON_S2_cl0p: - si_SInst_di <"cl0", int_hexagon_S2_cl0p>; -def HEXAGON_S2_cl1: - si_SInst_si <"cl1", int_hexagon_S2_cl1>; -def HEXAGON_S2_cl1p: - si_SInst_di <"cl1", int_hexagon_S2_cl1p>; -def HEXAGON_S2_clb: - si_SInst_si <"clb", int_hexagon_S2_clb>; -def HEXAGON_S2_clbp: - si_SInst_di <"clb", int_hexagon_S2_clbp>; -def HEXAGON_S2_clbnorm: - si_SInst_si <"normamt", int_hexagon_S2_clbnorm>; - -// STYPE / BIT / Count trailing. -def HEXAGON_S2_ct0: - si_SInst_si <"ct0", int_hexagon_S2_ct0>; -def HEXAGON_S2_ct1: - si_SInst_si <"ct1", int_hexagon_S2_ct1>; - -// STYPE / BIT / Compare bit mask. -def Hexagon_C2_bitsclr: - qi_SInst_sisi <"bitsclr", int_hexagon_C2_bitsclr>; -def Hexagon_C2_bitsclri: - qi_SInst_siu6 <"bitsclr", int_hexagon_C2_bitsclri>; -def Hexagon_C2_bitsset: - qi_SInst_sisi <"bitsset", int_hexagon_C2_bitsset>; - -// STYPE / BIT / Extract unsigned. -// Rd[d][32/64]=extractu(Rs[s],Rt[t],[imm]) -def HEXAGON_S2_extractu: - si_SInst_siu5u5 <"extractu",int_hexagon_S2_extractu>; -def HEXAGON_S2_extractu_rp: - si_SInst_sidi <"extractu",int_hexagon_S2_extractu_rp>; -def HEXAGON_S2_extractup: - di_SInst_diu6u6 <"extractu",int_hexagon_S2_extractup>; -def HEXAGON_S2_extractup_rp: - di_SInst_didi <"extractu",int_hexagon_S2_extractup_rp>; - -// STYPE / BIT / Insert bitfield. -def Hexagon_S2_insert: - si_SInst_sisiu5u5 <"insert", int_hexagon_S2_insert>; -def Hexagon_S2_insert_rp: - si_SInst_sisidi <"insert", int_hexagon_S2_insert_rp>; -def Hexagon_S2_insertp: - di_SInst_didiu6u6 <"insert", int_hexagon_S2_insertp>; -def Hexagon_S2_insertp_rp: - di_SInst_dididi <"insert", int_hexagon_S2_insertp_rp>; - -// STYPE / BIT / Innterleave/deinterleave. -def Hexagon_S2_interleave: - di_SInst_di <"interleave", int_hexagon_S2_interleave>; -def Hexagon_S2_deinterleave: - di_SInst_di <"deinterleave", int_hexagon_S2_deinterleave>; - -// STYPE / BIT / Linear feedback-shift Iteration. -def Hexagon_S2_lfsp: - di_SInst_didi <"lfs", int_hexagon_S2_lfsp>; - -// STYPE / BIT / Bit reverse. -def Hexagon_S2_brev: - si_SInst_si <"brev", int_hexagon_S2_brev>; - -// STYPE / BIT / Set/Clear/Toggle Bit. -def HEXAGON_S2_setbit_i: - si_SInst_siu5 <"setbit", int_hexagon_S2_setbit_i>; -def HEXAGON_S2_togglebit_i: - si_SInst_siu5 <"togglebit", int_hexagon_S2_togglebit_i>; -def HEXAGON_S2_clrbit_i: - si_SInst_siu5 <"clrbit", int_hexagon_S2_clrbit_i>; -def HEXAGON_S2_setbit_r: - si_SInst_sisi <"setbit", int_hexagon_S2_setbit_r>; -def HEXAGON_S2_togglebit_r: - si_SInst_sisi <"togglebit", int_hexagon_S2_togglebit_r>; -def HEXAGON_S2_clrbit_r: - si_SInst_sisi <"clrbit", int_hexagon_S2_clrbit_r>; - -// STYPE / BIT / Test Bit. -def HEXAGON_S2_tstbit_i: - qi_SInst_siu5 <"tstbit", int_hexagon_S2_tstbit_i>; -def HEXAGON_S2_tstbit_r: - qi_SInst_sisi <"tstbit", int_hexagon_S2_tstbit_r>; +/******************************************************************** +* STYPE/BIT * +*********************************************************************/ +// Count leading/trailing +def: T_R_pat<S2_cl0, int_hexagon_S2_cl0>; +def: T_P_pat<S2_cl0p, int_hexagon_S2_cl0p>; +def: T_R_pat<S2_cl1, int_hexagon_S2_cl1>; +def: T_P_pat<S2_cl1p, int_hexagon_S2_cl1p>; +def: T_R_pat<S2_clb, int_hexagon_S2_clb>; +def: T_P_pat<S2_clbp, int_hexagon_S2_clbp>; +def: T_R_pat<S2_clbnorm, int_hexagon_S2_clbnorm>; +def: T_R_pat<S2_ct0, int_hexagon_S2_ct0>; +def: T_R_pat<S2_ct1, int_hexagon_S2_ct1>; + +// Compare bit mask +def: T_RR_pat<C2_bitsclr, int_hexagon_C2_bitsclr>; +def: T_RI_pat<C2_bitsclri, int_hexagon_C2_bitsclri>; +def: T_RR_pat<C2_bitsset, int_hexagon_C2_bitsset>; + +// Vector shuffle +def : T_PP_pat <S2_shuffeb, int_hexagon_S2_shuffeb>; +def : T_PP_pat <S2_shuffob, int_hexagon_S2_shuffob>; +def : T_PP_pat <S2_shuffeh, int_hexagon_S2_shuffeh>; +def : T_PP_pat <S2_shuffoh, int_hexagon_S2_shuffoh>; + +// Vector truncate +def : T_PP_pat <S2_vtrunewh, int_hexagon_S2_vtrunewh>; +def : T_PP_pat <S2_vtrunowh, int_hexagon_S2_vtrunowh>; + +// Linear feedback-shift Iteration. +def : T_PP_pat <S2_lfsp, int_hexagon_S2_lfsp>; + +// Vector splice +def : T_PPQ_pat <S2_vsplicerb, int_hexagon_S2_vsplicerb>; +def : T_PPI_pat <S2_vspliceib, int_hexagon_S2_vspliceib>; + +// Shift by immediate and add +def : T_RRI_pat<S2_addasl_rrri, int_hexagon_S2_addasl_rrri>; + +// Extract bitfield +def : T_PII_pat<S2_extractup, int_hexagon_S2_extractup>; +def : T_RII_pat<S2_extractu, int_hexagon_S2_extractu>; +def : T_RP_pat <S2_extractu_rp, int_hexagon_S2_extractu_rp>; +def : T_PP_pat <S2_extractup_rp, int_hexagon_S2_extractup_rp>; + +// Insert bitfield +def : Pat <(int_hexagon_S2_insert_rp IntRegs:$src1, IntRegs:$src2, + DoubleRegs:$src3), + (S2_insert_rp IntRegs:$src1, IntRegs:$src2, DoubleRegs:$src3)>; + +def : Pat<(i64 (int_hexagon_S2_insertp_rp (I64:$src1), + (I64:$src2), (I64:$src3))), + (i64 (S2_insertp_rp (I64:$src1), (I64:$src2), + (I64:$src3)))>; + +def : Pat<(int_hexagon_S2_insert IntRegs:$src1, IntRegs:$src2, + u5ImmPred:$src3, u5ImmPred:$src4), + (S2_insert IntRegs:$src1, IntRegs:$src2, + u5ImmPred:$src3, u5ImmPred:$src4)>; + +def : Pat<(i64 (int_hexagon_S2_insertp (I64:$src1), + (I64:$src2), u6ImmPred:$src3, u6ImmPred:$src4)), + (i64 (S2_insertp (I64:$src1), (I64:$src2), + u6ImmPred:$src3, u6ImmPred:$src4))>; + + +// Innterleave/deinterleave +def : T_P_pat <S2_interleave, int_hexagon_S2_interleave>; +def : T_P_pat <S2_deinterleave, int_hexagon_S2_deinterleave>; + +// Set/Clear/Toggle Bit +def: T_RI_pat<S2_setbit_i, int_hexagon_S2_setbit_i>; +def: T_RI_pat<S2_clrbit_i, int_hexagon_S2_clrbit_i>; +def: T_RI_pat<S2_togglebit_i, int_hexagon_S2_togglebit_i>; + +def: T_RR_pat<S2_setbit_r, int_hexagon_S2_setbit_r>; +def: T_RR_pat<S2_clrbit_r, int_hexagon_S2_clrbit_r>; +def: T_RR_pat<S2_togglebit_r, int_hexagon_S2_togglebit_r>; + +// Test Bit +def: T_RI_pat<S2_tstbit_i, int_hexagon_S2_tstbit_i>; +def: T_RR_pat<S2_tstbit_r, int_hexagon_S2_tstbit_r>; /******************************************************************** * STYPE/COMPLEX * *********************************************************************/ +// Vector Complex conjugate +def : T_P_pat <A2_vconj, int_hexagon_A2_vconj>; -// STYPE / COMPLEX / Vector Complex conjugate. -def HEXAGON_A2_vconj: - di_SInst_di_sat <"vconj", int_hexagon_A2_vconj>; - -// STYPE / COMPLEX / Vector Complex rotate. -def HEXAGON_S2_vcrotate: - di_SInst_disi <"vcrotate",int_hexagon_S2_vcrotate>; - +// Vector Complex rotate +def : T_PR_pat <S2_vcrotate, int_hexagon_S2_vcrotate>; /******************************************************************** * STYPE/PERM * *********************************************************************/ -// STYPE / PERM / Saturate. -def HEXAGON_A2_sat: - si_SInst_di <"sat", int_hexagon_A2_sat>; -def HEXAGON_A2_satb: - si_SInst_si <"satb", int_hexagon_A2_satb>; -def HEXAGON_A2_sath: - si_SInst_si <"sath", int_hexagon_A2_sath>; -def HEXAGON_A2_satub: - si_SInst_si <"satub", int_hexagon_A2_satub>; -def HEXAGON_A2_satuh: - si_SInst_si <"satuh", int_hexagon_A2_satuh>; - -// STYPE / PERM / Swizzle bytes. -def HEXAGON_A2_swiz: - si_SInst_si <"swiz", int_hexagon_A2_swiz>; - -// STYPE / PERM / Vector align. -// Need custom lowering -def HEXAGON_S2_valignib: - di_SInst_didiu3 <"valignb", int_hexagon_S2_valignib>; -def HEXAGON_S2_valignrb: - di_SInst_didiqi <"valignb", int_hexagon_S2_valignrb>; - -// STYPE / PERM / Vector round and pack. -def HEXAGON_S2_vrndpackwh: - si_SInst_di <"vrndwh", int_hexagon_S2_vrndpackwh>; -def HEXAGON_S2_vrndpackwhs: - si_SInst_di_sat <"vrndwh", int_hexagon_S2_vrndpackwhs>; - -// STYPE / PERM / Vector saturate and pack. -def HEXAGON_S2_svsathb: - si_SInst_si <"vsathb", int_hexagon_S2_svsathb>; -def HEXAGON_S2_vsathb: - si_SInst_di <"vsathb", int_hexagon_S2_vsathb>; -def HEXAGON_S2_svsathub: - si_SInst_si <"vsathub", int_hexagon_S2_svsathub>; -def HEXAGON_S2_vsathub: - si_SInst_di <"vsathub", int_hexagon_S2_vsathub>; -def HEXAGON_S2_vsatwh: - si_SInst_di <"vsatwh", int_hexagon_S2_vsatwh>; -def HEXAGON_S2_vsatwuh: - si_SInst_di <"vsatwuh", int_hexagon_S2_vsatwuh>; - -// STYPE / PERM / Vector saturate without pack. -def HEXAGON_S2_vsathb_nopack: - di_SInst_di <"vsathb", int_hexagon_S2_vsathb_nopack>; -def HEXAGON_S2_vsathub_nopack: - di_SInst_di <"vsathub", int_hexagon_S2_vsathub_nopack>; -def HEXAGON_S2_vsatwh_nopack: - di_SInst_di <"vsatwh", int_hexagon_S2_vsatwh_nopack>; -def HEXAGON_S2_vsatwuh_nopack: - di_SInst_di <"vsatwuh", int_hexagon_S2_vsatwuh_nopack>; - -// STYPE / PERM / Vector shuffle. -def HEXAGON_S2_shuffeb: - di_SInst_didi <"shuffeb", int_hexagon_S2_shuffeb>; -def HEXAGON_S2_shuffeh: - di_SInst_didi <"shuffeh", int_hexagon_S2_shuffeh>; -def HEXAGON_S2_shuffob: - di_SInst_didi <"shuffob", int_hexagon_S2_shuffob>; -def HEXAGON_S2_shuffoh: - di_SInst_didi <"shuffoh", int_hexagon_S2_shuffoh>; - -// STYPE / PERM / Vector splat bytes. -def HEXAGON_S2_vsplatrb: - si_SInst_si <"vsplatb", int_hexagon_S2_vsplatrb>; - -// STYPE / PERM / Vector splat halfwords. -def HEXAGON_S2_vsplatrh: - di_SInst_si <"vsplath", int_hexagon_S2_vsplatrh>; - -// STYPE / PERM / Vector splice. -def Hexagon_S2_vsplicerb: - di_SInst_didiqi <"vspliceb",int_hexagon_S2_vsplicerb>; -def Hexagon_S2_vspliceib: - di_SInst_didiu3 <"vspliceb",int_hexagon_S2_vspliceib>; - -// STYPE / PERM / Sign extend. -def HEXAGON_S2_vsxtbh: - di_SInst_si <"vsxtbh", int_hexagon_S2_vsxtbh>; -def HEXAGON_S2_vsxthw: - di_SInst_si <"vsxthw", int_hexagon_S2_vsxthw>; - -// STYPE / PERM / Truncate. -def HEXAGON_S2_vtrunehb: - si_SInst_di <"vtrunehb",int_hexagon_S2_vtrunehb>; -def HEXAGON_S2_vtrunohb: - si_SInst_di <"vtrunohb",int_hexagon_S2_vtrunohb>; -def HEXAGON_S2_vtrunewh: - di_SInst_didi <"vtrunewh",int_hexagon_S2_vtrunewh>; -def HEXAGON_S2_vtrunowh: - di_SInst_didi <"vtrunowh",int_hexagon_S2_vtrunowh>; - -// STYPE / PERM / Zero extend. -def HEXAGON_S2_vzxtbh: - di_SInst_si <"vzxtbh", int_hexagon_S2_vzxtbh>; -def HEXAGON_S2_vzxthw: - di_SInst_si <"vzxthw", int_hexagon_S2_vzxthw>; - +// Vector saturate without pack +def : T_P_pat <S2_vsathb_nopack, int_hexagon_S2_vsathb_nopack>; +def : T_P_pat <S2_vsathub_nopack, int_hexagon_S2_vsathub_nopack>; +def : T_P_pat <S2_vsatwh_nopack, int_hexagon_S2_vsatwh_nopack>; +def : T_P_pat <S2_vsatwuh_nopack, int_hexagon_S2_vsatwuh_nopack>; /******************************************************************** * STYPE/PRED * *********************************************************************/ -// STYPE / PRED / Mask generate from predicate. -def HEXAGON_C2_mask: - di_SInst_qi <"mask", int_hexagon_C2_mask>; - -// STYPE / PRED / Predicate transfer. -def HEXAGON_C2_tfrpr: - si_SInst_qi <"", int_hexagon_C2_tfrpr>; -def HEXAGON_C2_tfrrp: - qi_SInst_si <"", int_hexagon_C2_tfrrp>; +// Predicate transfer +def: Pat<(i32 (int_hexagon_C2_tfrpr (I32:$Rs))), + (i32 (C2_tfrpr (C2_tfrrp (I32:$Rs))))>; +def: Pat<(i32 (int_hexagon_C2_tfrrp (I32:$Rs))), + (i32 (C2_tfrpr (C2_tfrrp (I32:$Rs))))>; -// STYPE / PRED / Viterbi pack even and odd predicate bits. -def HEXAGON_C2_vitpack: - si_SInst_qiqi <"vitpack",int_hexagon_C2_vitpack>; +// Mask generate from predicate +def: Pat<(i64 (int_hexagon_C2_mask (I32:$Rs))), + (i64 (C2_mask (C2_tfrrp (I32:$Rs))))>; +// Viterbi pack even and odd predicate bits +def: Pat<(i32 (int_hexagon_C2_vitpack (I32:$Rs), (I32:$Rt))), + (i32 (C2_vitpack (C2_tfrrp (I32:$Rs)), + (C2_tfrrp (I32:$Rt))))>; /******************************************************************** * STYPE/SHIFT * *********************************************************************/ -// STYPE / SHIFT / Shift by immediate. -def HEXAGON_S2_asl_i_r: - si_SInst_siu5 <"asl", int_hexagon_S2_asl_i_r>; -def HEXAGON_S2_asr_i_r: - si_SInst_siu5 <"asr", int_hexagon_S2_asr_i_r>; -def HEXAGON_S2_lsr_i_r: - si_SInst_siu5 <"lsr", int_hexagon_S2_lsr_i_r>; -def HEXAGON_S2_asl_i_p: - di_SInst_diu6 <"asl", int_hexagon_S2_asl_i_p>; -def HEXAGON_S2_asr_i_p: - di_SInst_diu6 <"asr", int_hexagon_S2_asr_i_p>; -def HEXAGON_S2_lsr_i_p: - di_SInst_diu6 <"lsr", int_hexagon_S2_lsr_i_p>; - -// STYPE / SHIFT / Shift by immediate and accumulate. -def HEXAGON_S2_asl_i_r_acc: - si_SInst_sisiu5_acc <"asl", int_hexagon_S2_asl_i_r_acc>; -def HEXAGON_S2_asr_i_r_acc: - si_SInst_sisiu5_acc <"asr", int_hexagon_S2_asr_i_r_acc>; -def HEXAGON_S2_lsr_i_r_acc: - si_SInst_sisiu5_acc <"lsr", int_hexagon_S2_lsr_i_r_acc>; -def HEXAGON_S2_asl_i_r_nac: - si_SInst_sisiu5_nac <"asl", int_hexagon_S2_asl_i_r_nac>; -def HEXAGON_S2_asr_i_r_nac: - si_SInst_sisiu5_nac <"asr", int_hexagon_S2_asr_i_r_nac>; -def HEXAGON_S2_lsr_i_r_nac: - si_SInst_sisiu5_nac <"lsr", int_hexagon_S2_lsr_i_r_nac>; -def HEXAGON_S2_asl_i_p_acc: - di_SInst_didiu6_acc <"asl", int_hexagon_S2_asl_i_p_acc>; -def HEXAGON_S2_asr_i_p_acc: - di_SInst_didiu6_acc <"asr", int_hexagon_S2_asr_i_p_acc>; -def HEXAGON_S2_lsr_i_p_acc: - di_SInst_didiu6_acc <"lsr", int_hexagon_S2_lsr_i_p_acc>; -def HEXAGON_S2_asl_i_p_nac: - di_SInst_didiu6_nac <"asl", int_hexagon_S2_asl_i_p_nac>; -def HEXAGON_S2_asr_i_p_nac: - di_SInst_didiu6_nac <"asr", int_hexagon_S2_asr_i_p_nac>; -def HEXAGON_S2_lsr_i_p_nac: - di_SInst_didiu6_nac <"lsr", int_hexagon_S2_lsr_i_p_nac>; - -// STYPE / SHIFT / Shift by immediate and add. -def HEXAGON_S2_addasl_rrri: - si_SInst_sisiu3 <"addasl", int_hexagon_S2_addasl_rrri>; - -// STYPE / SHIFT / Shift by immediate and logical. -def HEXAGON_S2_asl_i_r_and: - si_SInst_sisiu5_and <"asl", int_hexagon_S2_asl_i_r_and>; -def HEXAGON_S2_asr_i_r_and: - si_SInst_sisiu5_and <"asr", int_hexagon_S2_asr_i_r_and>; -def HEXAGON_S2_lsr_i_r_and: - si_SInst_sisiu5_and <"lsr", int_hexagon_S2_lsr_i_r_and>; - -def HEXAGON_S2_asl_i_r_xacc: - si_SInst_sisiu5_xor <"asl", int_hexagon_S2_asl_i_r_xacc>; -def HEXAGON_S2_lsr_i_r_xacc: - si_SInst_sisiu5_xor <"lsr", int_hexagon_S2_lsr_i_r_xacc>; - -def HEXAGON_S2_asl_i_r_or: - si_SInst_sisiu5_or <"asl", int_hexagon_S2_asl_i_r_or>; -def HEXAGON_S2_asr_i_r_or: - si_SInst_sisiu5_or <"asr", int_hexagon_S2_asr_i_r_or>; -def HEXAGON_S2_lsr_i_r_or: - si_SInst_sisiu5_or <"lsr", int_hexagon_S2_lsr_i_r_or>; - -def HEXAGON_S2_asl_i_p_and: - di_SInst_didiu6_and <"asl", int_hexagon_S2_asl_i_p_and>; -def HEXAGON_S2_asr_i_p_and: - di_SInst_didiu6_and <"asr", int_hexagon_S2_asr_i_p_and>; -def HEXAGON_S2_lsr_i_p_and: - di_SInst_didiu6_and <"lsr", int_hexagon_S2_lsr_i_p_and>; - -def HEXAGON_S2_asl_i_p_xacc: - di_SInst_didiu6_xor <"asl", int_hexagon_S2_asl_i_p_xacc>; -def HEXAGON_S2_lsr_i_p_xacc: - di_SInst_didiu6_xor <"lsr", int_hexagon_S2_lsr_i_p_xacc>; - -def HEXAGON_S2_asl_i_p_or: - di_SInst_didiu6_or <"asl", int_hexagon_S2_asl_i_p_or>; -def HEXAGON_S2_asr_i_p_or: - di_SInst_didiu6_or <"asr", int_hexagon_S2_asr_i_p_or>; -def HEXAGON_S2_lsr_i_p_or: - di_SInst_didiu6_or <"lsr", int_hexagon_S2_lsr_i_p_or>; - -// STYPE / SHIFT / Shift right by immediate with rounding. -def HEXAGON_S2_asr_i_r_rnd: - si_SInst_siu5_rnd <"asr", int_hexagon_S2_asr_i_r_rnd>; -def HEXAGON_S2_asr_i_r_rnd_goodsyntax: - si_SInst_siu5 <"asrrnd", int_hexagon_S2_asr_i_r_rnd_goodsyntax>; - -// STYPE / SHIFT / Shift left by immediate with saturation. -def HEXAGON_S2_asl_i_r_sat: - si_SInst_sisi_sat <"asl", int_hexagon_S2_asl_i_r_sat>; - -// STYPE / SHIFT / Shift by register. -def HEXAGON_S2_asl_r_r: - si_SInst_sisi <"asl", int_hexagon_S2_asl_r_r>; -def HEXAGON_S2_asr_r_r: - si_SInst_sisi <"asr", int_hexagon_S2_asr_r_r>; -def HEXAGON_S2_lsl_r_r: - si_SInst_sisi <"lsl", int_hexagon_S2_lsl_r_r>; -def HEXAGON_S2_lsr_r_r: - si_SInst_sisi <"lsr", int_hexagon_S2_lsr_r_r>; -def HEXAGON_S2_asl_r_p: - di_SInst_disi <"asl", int_hexagon_S2_asl_r_p>; -def HEXAGON_S2_asr_r_p: - di_SInst_disi <"asr", int_hexagon_S2_asr_r_p>; -def HEXAGON_S2_lsl_r_p: - di_SInst_disi <"lsl", int_hexagon_S2_lsl_r_p>; -def HEXAGON_S2_lsr_r_p: - di_SInst_disi <"lsr", int_hexagon_S2_lsr_r_p>; - -// STYPE / SHIFT / Shift by register and accumulate. -def HEXAGON_S2_asl_r_r_acc: - si_SInst_sisisi_acc <"asl", int_hexagon_S2_asl_r_r_acc>; -def HEXAGON_S2_asr_r_r_acc: - si_SInst_sisisi_acc <"asr", int_hexagon_S2_asr_r_r_acc>; -def HEXAGON_S2_lsl_r_r_acc: - si_SInst_sisisi_acc <"lsl", int_hexagon_S2_lsl_r_r_acc>; -def HEXAGON_S2_lsr_r_r_acc: - si_SInst_sisisi_acc <"lsr", int_hexagon_S2_lsr_r_r_acc>; -def HEXAGON_S2_asl_r_p_acc: - di_SInst_didisi_acc <"asl", int_hexagon_S2_asl_r_p_acc>; -def HEXAGON_S2_asr_r_p_acc: - di_SInst_didisi_acc <"asr", int_hexagon_S2_asr_r_p_acc>; -def HEXAGON_S2_lsl_r_p_acc: - di_SInst_didisi_acc <"lsl", int_hexagon_S2_lsl_r_p_acc>; -def HEXAGON_S2_lsr_r_p_acc: - di_SInst_didisi_acc <"lsr", int_hexagon_S2_lsr_r_p_acc>; - -def HEXAGON_S2_asl_r_r_nac: - si_SInst_sisisi_nac <"asl", int_hexagon_S2_asl_r_r_nac>; -def HEXAGON_S2_asr_r_r_nac: - si_SInst_sisisi_nac <"asr", int_hexagon_S2_asr_r_r_nac>; -def HEXAGON_S2_lsl_r_r_nac: - si_SInst_sisisi_nac <"lsl", int_hexagon_S2_lsl_r_r_nac>; -def HEXAGON_S2_lsr_r_r_nac: - si_SInst_sisisi_nac <"lsr", int_hexagon_S2_lsr_r_r_nac>; -def HEXAGON_S2_asl_r_p_nac: - di_SInst_didisi_nac <"asl", int_hexagon_S2_asl_r_p_nac>; -def HEXAGON_S2_asr_r_p_nac: - di_SInst_didisi_nac <"asr", int_hexagon_S2_asr_r_p_nac>; -def HEXAGON_S2_lsl_r_p_nac: - di_SInst_didisi_nac <"lsl", int_hexagon_S2_lsl_r_p_nac>; -def HEXAGON_S2_lsr_r_p_nac: - di_SInst_didisi_nac <"lsr", int_hexagon_S2_lsr_r_p_nac>; - -// STYPE / SHIFT / Shift by register and logical. -def HEXAGON_S2_asl_r_r_and: - si_SInst_sisisi_and <"asl", int_hexagon_S2_asl_r_r_and>; -def HEXAGON_S2_asr_r_r_and: - si_SInst_sisisi_and <"asr", int_hexagon_S2_asr_r_r_and>; -def HEXAGON_S2_lsl_r_r_and: - si_SInst_sisisi_and <"lsl", int_hexagon_S2_lsl_r_r_and>; -def HEXAGON_S2_lsr_r_r_and: - si_SInst_sisisi_and <"lsr", int_hexagon_S2_lsr_r_r_and>; - -def HEXAGON_S2_asl_r_r_or: - si_SInst_sisisi_or <"asl", int_hexagon_S2_asl_r_r_or>; -def HEXAGON_S2_asr_r_r_or: - si_SInst_sisisi_or <"asr", int_hexagon_S2_asr_r_r_or>; -def HEXAGON_S2_lsl_r_r_or: - si_SInst_sisisi_or <"lsl", int_hexagon_S2_lsl_r_r_or>; -def HEXAGON_S2_lsr_r_r_or: - si_SInst_sisisi_or <"lsr", int_hexagon_S2_lsr_r_r_or>; - -def HEXAGON_S2_asl_r_p_and: - di_SInst_didisi_and <"asl", int_hexagon_S2_asl_r_p_and>; -def HEXAGON_S2_asr_r_p_and: - di_SInst_didisi_and <"asr", int_hexagon_S2_asr_r_p_and>; -def HEXAGON_S2_lsl_r_p_and: - di_SInst_didisi_and <"lsl", int_hexagon_S2_lsl_r_p_and>; -def HEXAGON_S2_lsr_r_p_and: - di_SInst_didisi_and <"lsr", int_hexagon_S2_lsr_r_p_and>; - -def HEXAGON_S2_asl_r_p_or: - di_SInst_didisi_or <"asl", int_hexagon_S2_asl_r_p_or>; -def HEXAGON_S2_asr_r_p_or: - di_SInst_didisi_or <"asr", int_hexagon_S2_asr_r_p_or>; -def HEXAGON_S2_lsl_r_p_or: - di_SInst_didisi_or <"lsl", int_hexagon_S2_lsl_r_p_or>; -def HEXAGON_S2_lsr_r_p_or: - di_SInst_didisi_or <"lsr", int_hexagon_S2_lsr_r_p_or>; - -// STYPE / SHIFT / Shift by register with saturation. -def HEXAGON_S2_asl_r_r_sat: - si_SInst_sisi_sat <"asl", int_hexagon_S2_asl_r_r_sat>; -def HEXAGON_S2_asr_r_r_sat: - si_SInst_sisi_sat <"asr", int_hexagon_S2_asr_r_r_sat>; - -// STYPE / SHIFT / Table Index. -def Hexagon_S2_tableidxb_goodsyntax: - si_MInst_sisiu4u5 <"tableidxb",int_hexagon_S2_tableidxb_goodsyntax>; -def Hexagon_S2_tableidxd_goodsyntax: - si_MInst_sisiu4u5 <"tableidxd",int_hexagon_S2_tableidxd_goodsyntax>; -def Hexagon_S2_tableidxh_goodsyntax: - si_MInst_sisiu4u5 <"tableidxh",int_hexagon_S2_tableidxh_goodsyntax>; -def Hexagon_S2_tableidxw_goodsyntax: - si_MInst_sisiu4u5 <"tableidxw",int_hexagon_S2_tableidxw_goodsyntax>; +def : T_PI_pat <S2_asr_i_p, int_hexagon_S2_asr_i_p>; +def : T_PI_pat <S2_lsr_i_p, int_hexagon_S2_lsr_i_p>; +def : T_PI_pat <S2_asl_i_p, int_hexagon_S2_asl_i_p>; + +def : T_PR_pat <S2_asr_r_p, int_hexagon_S2_asr_r_p>; +def : T_PR_pat <S2_lsr_r_p, int_hexagon_S2_lsr_r_p>; +def : T_PR_pat <S2_asl_r_p, int_hexagon_S2_asl_r_p>; +def : T_PR_pat <S2_lsl_r_p, int_hexagon_S2_lsl_r_p>; + +def : T_RR_pat <S2_asr_r_r, int_hexagon_S2_asr_r_r>; +def : T_RR_pat <S2_lsr_r_r, int_hexagon_S2_lsr_r_r>; +def : T_RR_pat <S2_asl_r_r, int_hexagon_S2_asl_r_r>; +def : T_RR_pat <S2_lsl_r_r, int_hexagon_S2_lsl_r_r>; + +def : T_RR_pat <S2_asr_r_r_sat, int_hexagon_S2_asr_r_r_sat>; +def : T_RR_pat <S2_asl_r_r_sat, int_hexagon_S2_asl_r_r_sat>; + +def : T_R_pat <S2_vsxtbh, int_hexagon_S2_vsxtbh>; +def : T_R_pat <S2_vzxtbh, int_hexagon_S2_vzxtbh>; +def : T_R_pat <S2_vsxthw, int_hexagon_S2_vsxthw>; +def : T_R_pat <S2_vzxthw, int_hexagon_S2_vzxthw>; +def : T_R_pat <S2_vsplatrh, int_hexagon_S2_vsplatrh>; +def : T_R_pat <A2_sxtw, int_hexagon_A2_sxtw>; + +// Vector saturate and pack +def : T_R_pat <S2_svsathb, int_hexagon_S2_svsathb>; +def : T_R_pat <S2_svsathub, int_hexagon_S2_svsathub>; +def : T_P_pat <S2_vsathub, int_hexagon_S2_vsathub>; +def : T_P_pat <S2_vsatwh, int_hexagon_S2_vsatwh>; +def : T_P_pat <S2_vsatwuh, int_hexagon_S2_vsatwuh>; +def : T_P_pat <S2_vsathb, int_hexagon_S2_vsathb>; + +def : T_P_pat <S2_vtrunohb, int_hexagon_S2_vtrunohb>; +def : T_P_pat <S2_vtrunehb, int_hexagon_S2_vtrunehb>; +def : T_P_pat <S2_vrndpackwh, int_hexagon_S2_vrndpackwh>; +def : T_P_pat <S2_vrndpackwhs, int_hexagon_S2_vrndpackwhs>; +def : T_R_pat <S2_brev, int_hexagon_S2_brev>; +def : T_R_pat <S2_vsplatrb, int_hexagon_S2_vsplatrb>; + +def : T_R_pat <A2_abs, int_hexagon_A2_abs>; +def : T_R_pat <A2_abssat, int_hexagon_A2_abssat>; +def : T_R_pat <A2_negsat, int_hexagon_A2_negsat>; + +def : T_R_pat <A2_swiz, int_hexagon_A2_swiz>; + +def : T_P_pat <A2_sat, int_hexagon_A2_sat>; +def : T_R_pat <A2_sath, int_hexagon_A2_sath>; +def : T_R_pat <A2_satuh, int_hexagon_A2_satuh>; +def : T_R_pat <A2_satub, int_hexagon_A2_satub>; +def : T_R_pat <A2_satb, int_hexagon_A2_satb>; + +// Vector arithmetic shift right by immediate with truncate and pack. +def : T_PI_pat<S2_asr_i_svw_trun, int_hexagon_S2_asr_i_svw_trun>; + +def : T_RI_pat <S2_asr_i_r, int_hexagon_S2_asr_i_r>; +def : T_RI_pat <S2_lsr_i_r, int_hexagon_S2_lsr_i_r>; +def : T_RI_pat <S2_asl_i_r, int_hexagon_S2_asl_i_r>; +def : T_RI_pat <S2_asr_i_r_rnd, int_hexagon_S2_asr_i_r_rnd>; +def : T_RI_pat <S2_asr_i_r_rnd_goodsyntax, + int_hexagon_S2_asr_i_r_rnd_goodsyntax>; + +// Shift left by immediate with saturation. +def : T_RI_pat <S2_asl_i_r_sat, int_hexagon_S2_asl_i_r_sat>; +//===----------------------------------------------------------------------===// +// Template 'def pat' to map tableidx[bhwd] intrinsics to :raw instructions. +//===----------------------------------------------------------------------===// +class S2op_tableidx_pat <Intrinsic IntID, InstHexagon OutputInst, + SDNodeXForm XformImm> + : Pat <(IntID IntRegs:$src1, IntRegs:$src2, u4ImmPred:$src3, u5ImmPred:$src4), + (OutputInst IntRegs:$src1, IntRegs:$src2, u4ImmPred:$src3, + (XformImm u5ImmPred:$src4))>; + + +// Table Index : Extract and insert bits. +// Map to the real hardware instructions after subtracting appropriate +// values from the 4th input operand. Please note that subtraction is not +// needed for int_hexagon_S2_tableidxb_goodsyntax. + +def : Pat <(int_hexagon_S2_tableidxb_goodsyntax IntRegs:$src1, IntRegs:$src2, + u4ImmPred:$src3, u5ImmPred:$src4), + (S2_tableidxb IntRegs:$src1, IntRegs:$src2, + u4ImmPred:$src3, u5ImmPred:$src4)>; + +def : S2op_tableidx_pat <int_hexagon_S2_tableidxh_goodsyntax, S2_tableidxh, + DEC_CONST_SIGNED>; +def : S2op_tableidx_pat <int_hexagon_S2_tableidxw_goodsyntax, S2_tableidxw, + DEC2_CONST_SIGNED>; +def : S2op_tableidx_pat <int_hexagon_S2_tableidxd_goodsyntax, S2_tableidxd, + DEC3_CONST_SIGNED>; /******************************************************************** * STYPE/VH * *********************************************************************/ -// STYPE / VH / Vector absolute value halfwords. -// Rdd64=vabsh(Rss64) -def HEXAGON_A2_vabsh: - di_SInst_di <"vabsh", int_hexagon_A2_vabsh>; -def HEXAGON_A2_vabshsat: - di_SInst_di_sat <"vabsh", int_hexagon_A2_vabshsat>; - -// STYPE / VH / Vector shift halfwords by immediate. -// Rdd64=v[asl/asr/lsr]h(Rss64,Rt32) -def HEXAGON_S2_asl_i_vh: - di_SInst_disi <"vaslh", int_hexagon_S2_asl_i_vh>; -def HEXAGON_S2_asr_i_vh: - di_SInst_disi <"vasrh", int_hexagon_S2_asr_i_vh>; -def HEXAGON_S2_lsr_i_vh: - di_SInst_disi <"vlsrh", int_hexagon_S2_lsr_i_vh>; - -// STYPE / VH / Vector shift halfwords by register. -// Rdd64=v[asl/asr/lsl/lsr]w(Rss64,Rt32) -def HEXAGON_S2_asl_r_vh: - di_SInst_disi <"vaslh", int_hexagon_S2_asl_r_vh>; -def HEXAGON_S2_asr_r_vh: - di_SInst_disi <"vasrh", int_hexagon_S2_asr_r_vh>; -def HEXAGON_S2_lsl_r_vh: - di_SInst_disi <"vlslh", int_hexagon_S2_lsl_r_vh>; -def HEXAGON_S2_lsr_r_vh: - di_SInst_disi <"vlsrh", int_hexagon_S2_lsr_r_vh>; +// Vector absolute value halfwords with and without saturation +// Rdd64=vabsh(Rss64)[:sat] +def : T_P_pat <A2_vabsh, int_hexagon_A2_vabsh>; +def : T_P_pat <A2_vabshsat, int_hexagon_A2_vabshsat>; +// Vector shift halfwords by immediate +// Rdd64=[vaslh/vasrh/vlsrh](Rss64,u4) +def : T_PI_pat <S2_asr_i_vh, int_hexagon_S2_asr_i_vh>; +def : T_PI_pat <S2_lsr_i_vh, int_hexagon_S2_lsr_i_vh>; +def : T_PI_pat <S2_asl_i_vh, int_hexagon_S2_asl_i_vh>; + +// Vector shift halfwords by register +// Rdd64=[vaslw/vasrw/vlslw/vlsrw](Rss64,Rt32) +def : T_PR_pat <S2_asr_r_vh, int_hexagon_S2_asr_r_vh>; +def : T_PR_pat <S2_lsr_r_vh, int_hexagon_S2_lsr_r_vh>; +def : T_PR_pat <S2_asl_r_vh, int_hexagon_S2_asl_r_vh>; +def : T_PR_pat <S2_lsl_r_vh, int_hexagon_S2_lsl_r_vh>; /******************************************************************** * STYPE/VW * *********************************************************************/ -// STYPE / VW / Vector absolute value words. -def HEXAGON_A2_vabsw: - di_SInst_di <"vabsw", int_hexagon_A2_vabsw>; -def HEXAGON_A2_vabswsat: - di_SInst_di_sat <"vabsw", int_hexagon_A2_vabswsat>; - -// STYPE / VW / Vector shift words by immediate. -// Rdd64=v[asl/vsl]w(Rss64,Rt32) -def HEXAGON_S2_asl_i_vw: - di_SInst_disi <"vaslw", int_hexagon_S2_asl_i_vw>; -def HEXAGON_S2_asr_i_vw: - di_SInst_disi <"vasrw", int_hexagon_S2_asr_i_vw>; -def HEXAGON_S2_lsr_i_vw: - di_SInst_disi <"vlsrw", int_hexagon_S2_lsr_i_vw>; - -// STYPE / VW / Vector shift words by register. -// Rdd64=v[asl/vsl]w(Rss64,Rt32) -def HEXAGON_S2_asl_r_vw: - di_SInst_disi <"vaslw", int_hexagon_S2_asl_r_vw>; -def HEXAGON_S2_asr_r_vw: - di_SInst_disi <"vasrw", int_hexagon_S2_asr_r_vw>; -def HEXAGON_S2_lsl_r_vw: - di_SInst_disi <"vlslw", int_hexagon_S2_lsl_r_vw>; -def HEXAGON_S2_lsr_r_vw: - di_SInst_disi <"vlsrw", int_hexagon_S2_lsr_r_vw>; - -// STYPE / VW / Vector shift words with truncate and pack. -def HEXAGON_S2_asr_r_svw_trun: - si_SInst_disi <"vasrw", int_hexagon_S2_asr_r_svw_trun>; -def HEXAGON_S2_asr_i_svw_trun: - si_SInst_diu5 <"vasrw", int_hexagon_S2_asr_i_svw_trun>; - -// LD / Circular loads. -def HEXAGON_circ_ldd: - di_LDInstPI_diu4 <"circ_ldd", int_hexagon_circ_ldd>; +// Vector absolute value words with and without saturation +def : T_P_pat <A2_vabsw, int_hexagon_A2_vabsw>; +def : T_P_pat <A2_vabswsat, int_hexagon_A2_vabswsat>; + +// Vector shift words by immediate. +// Rdd64=[vasrw/vlsrw|vaslw](Rss64,u5) +def : T_PI_pat <S2_asr_i_vw, int_hexagon_S2_asr_i_vw>; +def : T_PI_pat <S2_lsr_i_vw, int_hexagon_S2_lsr_i_vw>; +def : T_PI_pat <S2_asl_i_vw, int_hexagon_S2_asl_i_vw>; + +// Vector shift words by register. +// Rdd64=[vasrw/vlsrw|vaslw|vlslw](Rss64,Rt32) +def : T_PR_pat <S2_asr_r_vw, int_hexagon_S2_asr_r_vw>; +def : T_PR_pat <S2_lsr_r_vw, int_hexagon_S2_lsr_r_vw>; +def : T_PR_pat <S2_asl_r_vw, int_hexagon_S2_asl_r_vw>; +def : T_PR_pat <S2_lsl_r_vw, int_hexagon_S2_lsl_r_vw>; + +// Vector shift words with truncate and pack + +def : T_PR_pat <S2_asr_r_svw_trun, int_hexagon_S2_asr_r_svw_trun>; + +def : T_R_pat<L2_loadw_locked, int_hexagon_L2_loadw_locked>; +def : T_R_pat<L4_loadd_locked, int_hexagon_L4_loadd_locked>; + +def: Pat<(i32 (int_hexagon_S2_storew_locked (I32:$Rs), (I32:$Rt))), + (i32 (C2_tfrpr (S2_storew_locked (I32:$Rs), (I32:$Rt))))>; +def: Pat<(i32 (int_hexagon_S4_stored_locked (I32:$Rs), (I64:$Rt))), + (i32 (C2_tfrpr (S4_stored_locked (I32:$Rs), (I64:$Rt))))>; + +/******************************************************************** +* ST +*********************************************************************/ + +class T_stb_pat <InstHexagon MI, Intrinsic IntID, PatLeaf Val> + : Pat<(IntID I32:$Rs, Val:$Rt, I32:$Ru), + (MI I32:$Rs, Val:$Rt, I32:$Ru)>; + +def : T_stb_pat <S2_storerh_pbr_pseudo, int_hexagon_brev_sth, I32>; +def : T_stb_pat <S2_storerb_pbr_pseudo, int_hexagon_brev_stb, I32>; +def : T_stb_pat <S2_storeri_pbr_pseudo, int_hexagon_brev_stw, I32>; +def : T_stb_pat <S2_storerf_pbr_pseudo, int_hexagon_brev_sthhi, I32>; +def : T_stb_pat <S2_storerd_pbr_pseudo, int_hexagon_brev_std, I64>; + +class T_stc_pat <InstHexagon MI, Intrinsic IntID, PatLeaf Imm, PatLeaf Val> + : Pat<(IntID I32:$Rs, Val:$Rt, I32:$Ru, Imm:$s), + (MI I32:$Rs, Val:$Rt, I32:$Ru, Imm:$s)>; + +def: T_stc_pat<S2_storerb_pci_pseudo, int_hexagon_circ_stb, s4_0ImmPred, I32>; +def: T_stc_pat<S2_storerh_pci_pseudo, int_hexagon_circ_sth, s4_1ImmPred, I32>; +def: T_stc_pat<S2_storeri_pci_pseudo, int_hexagon_circ_stw, s4_2ImmPred, I32>; +def: T_stc_pat<S2_storerd_pci_pseudo, int_hexagon_circ_std, s4_3ImmPred, I64>; +def: T_stc_pat<S2_storerf_pci_pseudo, int_hexagon_circ_sthhi, s4_1ImmPred, I32>; include "HexagonIntrinsicsV3.td" include "HexagonIntrinsicsV4.td" diff --git a/lib/Target/Hexagon/HexagonIntrinsicsDerived.td b/lib/Target/Hexagon/HexagonIntrinsicsDerived.td index df89378603a4..4c28b28337f4 100644 --- a/lib/Target/Hexagon/HexagonIntrinsicsDerived.td +++ b/lib/Target/Hexagon/HexagonIntrinsicsDerived.td @@ -14,8 +14,8 @@ def : Pat <(mul DoubleRegs:$src1, DoubleRegs:$src2), (i64 (A2_combinew - (HEXAGON_M2_maci - (HEXAGON_M2_maci + (M2_maci + (M2_maci (i32 (EXTRACT_SUBREG (i64 diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV3.td b/lib/Target/Hexagon/HexagonIntrinsicsV3.td index 2a54e62d20ae..6152cb098825 100644 --- a/lib/Target/Hexagon/HexagonIntrinsicsV3.td +++ b/lib/Target/Hexagon/HexagonIntrinsicsV3.td @@ -11,40 +11,17 @@ // //===----------------------------------------------------------------------===// - - - -// MTYPE / COMPLEX / Vector reduce complex multiply real or imaginary. -def Hexagon_M2_vrcmpys_s1: - di_MInst_disi_s1_sat <"vrcmpys", int_hexagon_M2_vrcmpys_s1>; -def Hexagon_M2_vrcmpys_acc_s1: - di_MInst_didisi_acc_s1_sat <"vrcmpys", int_hexagon_M2_vrcmpys_acc_s1>; -def Hexagon_M2_vrcmpys_s1rp: - si_MInst_disi_s1_rnd_sat <"vrcmpys", int_hexagon_M2_vrcmpys_s1rp>; - - - - -/******************************************************************** -* MTYPE/VB * -*********************************************************************/ - -// MTYPE / VB / Vector reduce add unsigned bytes. -def Hexagon_M2_vradduh: - si_MInst_didi <"vradduh", int_hexagon_M2_vradduh>; - - -/******************************************************************** -* ALU64/ALU * -*********************************************************************/ - -// ALU64 / ALU / Add. -def Hexagon_A2_addsp: - di_ALU64_sidi <"add", int_hexagon_A2_addsp>; -def Hexagon_A2_addpsat: - di_ALU64_didi <"add", int_hexagon_A2_addpsat>; - -def Hexagon_A2_maxp: - di_ALU64_didi <"max", int_hexagon_A2_maxp>; -def Hexagon_A2_maxup: - di_ALU64_didi <"maxu", int_hexagon_A2_maxup>; +// Vector reduce complex multiply real or imaginary +def : T_PR_pat <M2_vrcmpys_s1, int_hexagon_M2_vrcmpys_s1>; +def : T_PPR_pat<M2_vrcmpys_acc_s1, int_hexagon_M2_vrcmpys_acc_s1>; +def : T_PR_pat <M2_vrcmpys_s1rp, int_hexagon_M2_vrcmpys_s1rp>; + +// Vector reduce add unsigned halfwords +def : T_PP_pat<M2_vradduh, int_hexagon_M2_vradduh>; + +def: T_RP_pat<A2_addsp, int_hexagon_A2_addsp>; +def: T_PP_pat<A2_addpsat, int_hexagon_A2_addpsat>; +def: T_PP_pat<A2_minp, int_hexagon_A2_minp>; +def: T_PP_pat<A2_minup, int_hexagon_A2_minup>; +def: T_PP_pat<A2_maxp, int_hexagon_A2_maxp>; +def: T_PP_pat<A2_maxup, int_hexagon_A2_maxup>; diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV4.td b/lib/Target/Hexagon/HexagonIntrinsicsV4.td index 77b148b9f2bd..c80a188d82e7 100644 --- a/lib/Target/Hexagon/HexagonIntrinsicsV4.td +++ b/lib/Target/Hexagon/HexagonIntrinsicsV4.td @@ -12,359 +12,307 @@ // 80-V9418-12 Rev. A // June 15, 2010 +// Vector reduce multiply word by signed half (32x16) +//Rdd=vrmpyweh(Rss,Rtt)[:<<1] +def : T_PP_pat <M4_vrmpyeh_s0, int_hexagon_M4_vrmpyeh_s0>; +def : T_PP_pat <M4_vrmpyeh_s1, int_hexagon_M4_vrmpyeh_s1>; + +//Rdd=vrmpywoh(Rss,Rtt)[:<<1] +def : T_PP_pat <M4_vrmpyoh_s0, int_hexagon_M4_vrmpyoh_s0>; +def : T_PP_pat <M4_vrmpyoh_s1, int_hexagon_M4_vrmpyoh_s1>; + +//Rdd+=vrmpyweh(Rss,Rtt)[:<<1] +def : T_PPP_pat <M4_vrmpyeh_acc_s0, int_hexagon_M4_vrmpyeh_acc_s0>; +def : T_PPP_pat <M4_vrmpyeh_acc_s1, int_hexagon_M4_vrmpyeh_acc_s1>; + +//Rdd=vrmpywoh(Rss,Rtt)[:<<1] +def : T_PPP_pat <M4_vrmpyoh_acc_s0, int_hexagon_M4_vrmpyoh_acc_s0>; +def : T_PPP_pat <M4_vrmpyoh_acc_s1, int_hexagon_M4_vrmpyoh_acc_s1>; + +// Vector multiply halfwords, signed by unsigned +// Rdd=vmpyhsu(Rs,Rt)[:<<1]:sat +def : T_RR_pat <M2_vmpy2su_s0, int_hexagon_M2_vmpy2su_s0>; +def : T_RR_pat <M2_vmpy2su_s1, int_hexagon_M2_vmpy2su_s1>; + +// Rxx+=vmpyhsu(Rs,Rt)[:<<1]:sat +def : T_PRR_pat <M2_vmac2su_s0, int_hexagon_M2_vmac2su_s0>; +def : T_PRR_pat <M2_vmac2su_s1, int_hexagon_M2_vmac2su_s1>; + +// Vector polynomial multiply halfwords +// Rdd=vpmpyh(Rs,Rt) +def : T_RR_pat <M4_vpmpyh, int_hexagon_M4_vpmpyh>; +// Rxx[^]=vpmpyh(Rs,Rt) +def : T_PRR_pat <M4_vpmpyh_acc, int_hexagon_M4_vpmpyh_acc>; + +// Polynomial multiply words +// Rdd=pmpyw(Rs,Rt) +def : T_RR_pat <M4_pmpyw, int_hexagon_M4_pmpyw>; +// Rxx^=pmpyw(Rs,Rt) +def : T_PRR_pat <M4_pmpyw_acc, int_hexagon_M4_pmpyw_acc>; + +//Rxx^=asr(Rss,Rt) +def : T_PPR_pat <S2_asr_r_p_xor, int_hexagon_S2_asr_r_p_xor>; +//Rxx^=asl(Rss,Rt) +def : T_PPR_pat <S2_asl_r_p_xor, int_hexagon_S2_asl_r_p_xor>; +//Rxx^=lsr(Rss,Rt) +def : T_PPR_pat <S2_lsr_r_p_xor, int_hexagon_S2_lsr_r_p_xor>; +//Rxx^=lsl(Rss,Rt) +def : T_PPR_pat <S2_lsl_r_p_xor, int_hexagon_S2_lsl_r_p_xor>; + +// Multiply and use upper result +def : MType_R32_pat <int_hexagon_M2_mpysu_up, M2_mpysu_up>; +def : MType_R32_pat <int_hexagon_M2_mpy_up_s1, M2_mpy_up_s1>; +def : MType_R32_pat <int_hexagon_M2_hmmpyh_s1, M2_hmmpyh_s1>; +def : MType_R32_pat <int_hexagon_M2_hmmpyl_s1, M2_hmmpyl_s1>; +def : MType_R32_pat <int_hexagon_M2_mpy_up_s1_sat, M2_mpy_up_s1_sat>; + +// Vector reduce add unsigned halfwords +def : Pat <(int_hexagon_M2_vraddh DoubleRegs:$src1, DoubleRegs:$src2), + (M2_vraddh DoubleRegs:$src1, DoubleRegs:$src2)>; + +def : T_P_pat <S2_brevp, int_hexagon_S2_brevp>; + +def: T_P_pat <S2_ct0p, int_hexagon_S2_ct0p>; +def: T_P_pat <S2_ct1p, int_hexagon_S2_ct1p>; +def: T_RR_pat<C4_nbitsset, int_hexagon_C4_nbitsset>; +def: T_RR_pat<C4_nbitsclr, int_hexagon_C4_nbitsclr>; +def: T_RI_pat<C4_nbitsclri, int_hexagon_C4_nbitsclri>; + + +class vcmpImm_pat <InstHexagon MI, Intrinsic IntID, PatLeaf immPred> : + Pat <(IntID (i64 DoubleRegs:$src1), immPred:$src2), + (MI (i64 DoubleRegs:$src1), immPred:$src2)>; + +def : vcmpImm_pat <A4_vcmpbeqi, int_hexagon_A4_vcmpbeqi, u8ImmPred>; +def : vcmpImm_pat <A4_vcmpbgti, int_hexagon_A4_vcmpbgti, s8ImmPred>; +def : vcmpImm_pat <A4_vcmpbgtui, int_hexagon_A4_vcmpbgtui, u7ImmPred>; + +def : vcmpImm_pat <A4_vcmpheqi, int_hexagon_A4_vcmpheqi, s8ImmPred>; +def : vcmpImm_pat <A4_vcmphgti, int_hexagon_A4_vcmphgti, s8ImmPred>; +def : vcmpImm_pat <A4_vcmphgtui, int_hexagon_A4_vcmphgtui, u7ImmPred>; + +def : vcmpImm_pat <A4_vcmpweqi, int_hexagon_A4_vcmpweqi, s8ImmPred>; +def : vcmpImm_pat <A4_vcmpwgti, int_hexagon_A4_vcmpwgti, s8ImmPred>; +def : vcmpImm_pat <A4_vcmpwgtui, int_hexagon_A4_vcmpwgtui, u7ImmPred>; + +def : T_PP_pat<A4_vcmpbeq_any, int_hexagon_A4_vcmpbeq_any>; + +def : T_RR_pat<A4_cmpbeq, int_hexagon_A4_cmpbeq>; +def : T_RR_pat<A4_cmpbgt, int_hexagon_A4_cmpbgt>; +def : T_RR_pat<A4_cmpbgtu, int_hexagon_A4_cmpbgtu>; +def : T_RR_pat<A4_cmpheq, int_hexagon_A4_cmpheq>; +def : T_RR_pat<A4_cmphgt, int_hexagon_A4_cmphgt>; +def : T_RR_pat<A4_cmphgtu, int_hexagon_A4_cmphgtu>; + +def : T_RI_pat<A4_cmpbeqi, int_hexagon_A4_cmpbeqi>; +def : T_RI_pat<A4_cmpbgti, int_hexagon_A4_cmpbgti>; +def : T_RI_pat<A4_cmpbgtui, int_hexagon_A4_cmpbgtui>; + +def : T_RI_pat<A4_cmpheqi, int_hexagon_A4_cmpheqi>; +def : T_RI_pat<A4_cmphgti, int_hexagon_A4_cmphgti>; +def : T_RI_pat<A4_cmphgtui, int_hexagon_A4_cmphgtui>; + +def : T_RP_pat <A4_boundscheck, int_hexagon_A4_boundscheck>; + +def : T_PR_pat<A4_tlbmatch, int_hexagon_A4_tlbmatch>; + +def : Pat <(int_hexagon_M4_mpyrr_addr IntRegs:$src1, IntRegs:$src2, + IntRegs:$src3), + (M4_mpyrr_addr IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; + +def : T_IRR_pat <M4_mpyrr_addi, int_hexagon_M4_mpyrr_addi>; +def : T_IRI_pat <M4_mpyri_addi, int_hexagon_M4_mpyri_addi>; +def : T_RIR_pat <M4_mpyri_addr_u2, int_hexagon_M4_mpyri_addr_u2>; +def : T_RRI_pat <M4_mpyri_addr, int_hexagon_M4_mpyri_addr>; +// Multiply 32x32 and use upper result +def : T_RRR_pat <M4_mac_up_s1_sat, int_hexagon_M4_mac_up_s1_sat>; +def : T_RRR_pat <M4_nac_up_s1_sat, int_hexagon_M4_nac_up_s1_sat>; + +// Complex multiply 32x16 +def : T_PR_pat <M4_cmpyi_wh, int_hexagon_M4_cmpyi_wh>; +def : T_PR_pat <M4_cmpyr_wh, int_hexagon_M4_cmpyr_wh>; + +def : T_PR_pat <M4_cmpyi_whc, int_hexagon_M4_cmpyi_whc>; +def : T_PR_pat <M4_cmpyr_whc, int_hexagon_M4_cmpyr_whc>; + +def : T_PP_pat<A4_andnp, int_hexagon_A4_andnp>; +def : T_PP_pat<A4_ornp, int_hexagon_A4_ornp>; + +// Complex add/sub halfwords/words +def : T_PP_pat <S4_vxaddsubw, int_hexagon_S4_vxaddsubw>; +def : T_PP_pat <S4_vxsubaddw, int_hexagon_S4_vxsubaddw>; +def : T_PP_pat <S4_vxaddsubh, int_hexagon_S4_vxaddsubh>; +def : T_PP_pat <S4_vxsubaddh, int_hexagon_S4_vxsubaddh>; + +def : T_PP_pat <S4_vxaddsubhr, int_hexagon_S4_vxaddsubhr>; +def : T_PP_pat <S4_vxsubaddhr, int_hexagon_S4_vxsubaddhr>; + +// Extract bitfield +def : T_PP_pat <S4_extractp_rp, int_hexagon_S4_extractp_rp>; +def : T_RP_pat <S4_extract_rp, int_hexagon_S4_extract_rp>; +def : T_PII_pat <S4_extractp, int_hexagon_S4_extractp>; +def : T_RII_pat <S4_extract, int_hexagon_S4_extract>; + +// Vector conditional negate +// Rdd=vcnegh(Rss,Rt) +def : T_PR_pat <S2_vcnegh, int_hexagon_S2_vcnegh>; + +// Shift an immediate left by register amount +def : T_IR_pat<S4_lsli, int_hexagon_S4_lsli>; + +// Vector reduce maximum halfwords +def : T_PPR_pat <A4_vrmaxh, int_hexagon_A4_vrmaxh>; +def : T_PPR_pat <A4_vrmaxuh, int_hexagon_A4_vrmaxuh>; -// -// ALU 32 types. -// - -class si_ALU32_sisi_not<string opc, Intrinsic IntID> - : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, ~$src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_ALU32_s8si<string opc, Intrinsic IntID> - : ALU32_rr<(outs DoubleRegs:$dst), (ins s8Imm:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "(#$src1, $src2)")), - [(set DoubleRegs:$dst, (IntID imm:$src1, IntRegs:$src2))]>; +// Vector reduce maximum words +def : T_PPR_pat <A4_vrmaxw, int_hexagon_A4_vrmaxw>; +def : T_PPR_pat <A4_vrmaxuw, int_hexagon_A4_vrmaxuw>; -class di_ALU32_sis8<string opc, Intrinsic IntID> - : ALU32_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; +// Vector reduce minimum halfwords +def : T_PPR_pat <A4_vrminh, int_hexagon_A4_vrminh>; +def : T_PPR_pat <A4_vrminuh, int_hexagon_A4_vrminuh>; -class qi_neg_ALU32_sisi<string opc, Intrinsic IntID> - : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = !", !strconcat(opc , "($src1, $src2)")), - [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; +// Vector reduce minimum words +def : T_PPR_pat <A4_vrminw, int_hexagon_A4_vrminw>; +def : T_PPR_pat <A4_vrminuw, int_hexagon_A4_vrminuw>; -class qi_neg_ALU32_sis10<string opc, Intrinsic IntID> - : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2), - !strconcat("$dst = !", !strconcat(opc , "($src1, #$src2)")), - [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; +// Rotate and reduce bytes +def : Pat <(int_hexagon_S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, + u2ImmPred:$src3), + (S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2ImmPred:$src3)>; + +// Rotate and reduce bytes with accumulation +// Rxx+=vrcrotate(Rss,Rt,#u2) +def : Pat <(int_hexagon_S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2, + IntRegs:$src3, u2ImmPred:$src4), + (S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2, + IntRegs:$src3, u2ImmPred:$src4)>; + +// Vector conditional negate +def : T_PPR_pat<S2_vrcnegh, int_hexagon_S2_vrcnegh>; -class qi_neg_ALU32_siu9<string opc, Intrinsic IntID> - : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, u9Imm:$src2), - !strconcat("$dst = !", !strconcat(opc , "($src1, #$src2)")), - [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; +// Logical xor with xor accumulation +def : T_PPP_pat<M4_xor_xacc, int_hexagon_M4_xor_xacc>; + +// ALU64 - Vector min/max byte +def : T_PP_pat <A2_vminb, int_hexagon_A2_vminb>; +def : T_PP_pat <A2_vmaxb, int_hexagon_A2_vmaxb>; + +// Shift and add/sub/and/or +def : T_IRI_pat <S4_andi_asl_ri, int_hexagon_S4_andi_asl_ri>; +def : T_IRI_pat <S4_ori_asl_ri, int_hexagon_S4_ori_asl_ri>; +def : T_IRI_pat <S4_addi_asl_ri, int_hexagon_S4_addi_asl_ri>; +def : T_IRI_pat <S4_subi_asl_ri, int_hexagon_S4_subi_asl_ri>; +def : T_IRI_pat <S4_andi_lsr_ri, int_hexagon_S4_andi_lsr_ri>; +def : T_IRI_pat <S4_ori_lsr_ri, int_hexagon_S4_ori_lsr_ri>; +def : T_IRI_pat <S4_addi_lsr_ri, int_hexagon_S4_addi_lsr_ri>; +def : T_IRI_pat <S4_subi_lsr_ri, int_hexagon_S4_subi_lsr_ri>; -class si_neg_ALU32_sisi<string opc, Intrinsic IntID> - : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = !", !strconcat(opc , "($src1, $src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_neg_ALU32_sis8<string opc, Intrinsic IntID> - : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2), - !strconcat("$dst = !", !strconcat(opc , "($src1, #$src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; - -class si_ALU32_sis8<string opc, Intrinsic IntID> - : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; - - -// -// SInst Classes. -// -class qi_neg_SInst_qiqi<string opc, Intrinsic IntID> - : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = !", !strconcat(opc , "($src1, $src2)")), - [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class qi_SInst_qi_andqiqi_neg<string opc, Intrinsic IntID> - : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, - IntRegs:$src3), - !strconcat("$dst = ", !strconcat(opc , - "($src1, and($src2, !$src3)")), - [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, - IntRegs:$src3))]>; - -class qi_SInst_qi_andqiqi<string opc, Intrinsic IntID> - : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, - IntRegs:$src3), - !strconcat("$dst = ", !strconcat(opc , - "($src1, and($src2, $src3)")), - [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, - IntRegs:$src3))]>; - -class qi_SInst_qi_orqiqi_neg<string opc, Intrinsic IntID> - : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, - IntRegs:$src3), - !strconcat("$dst = ", !strconcat(opc , - "($src1, or($src2, !$src3)")), - [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, - IntRegs:$src3))]>; - -class qi_SInst_qi_orqiqi<string opc, Intrinsic IntID> - : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, - IntRegs:$src3), - !strconcat("$dst = ", !strconcat(opc , - "($src1, or($src2, $src3)")), - [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, - IntRegs:$src3))]>; - -class si_SInst_si_addsis6<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, s6Imm:$src3), - !strconcat("$dst = ", !strconcat(opc , - "($src1, add($src2, #$src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, - imm:$src3))]>; - -class si_SInst_si_subs6si<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3), - !strconcat("$dst = ", !strconcat(opc , - "($src1, sub(#$src2, $src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2, - IntRegs:$src3))]>; - -class di_ALU64_didi_neg<string opc, Intrinsic IntID> - : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, ~$src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; - -class di_MInst_dididi_xacc<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, - DoubleRegs:$src2), - !strconcat("$dst ^= ", !strconcat(opc , "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, - DoubleRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_and<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, - IntRegs:$src3), - !strconcat("$dst &= ", !strconcat(opc , "($src2, $src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2, - IntRegs:$src3))]>; - -class si_MInst_sisisi_andn<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, - IntRegs:$src3), - !strconcat("$dst &= ", !strconcat(opc , "($src2, ~$src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2, - IntRegs:$src3))]>; - -class si_SInst_sisis10_andi<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, s10Imm:$src3), - !strconcat("$dst = ", !strconcat(opc , - "($src1, and($src2, #$src3))")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, - imm:$src3))]>; - -class si_MInst_sisisi_xor<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, - IntRegs:$src3), - !strconcat("$dst ^= ", !strconcat(opc , "($src2, $src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2, - IntRegs:$src3))]>; - -class si_MInst_sisisi_xorn<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, - IntRegs:$src3), - !strconcat("$dst ^= ", !strconcat(opc , "($src2, ~$src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2, - IntRegs:$src3))]>; - -class si_SInst_sisis10_or<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, s10Imm:$src3), - !strconcat("$dst |= ", !strconcat(opc , "($src2, #$src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2, - imm:$src3))]>; - -class si_MInst_sisisi_or<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, - IntRegs:$src3), - !strconcat("$dst |= ", !strconcat(opc , "($src2, $src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2, - IntRegs:$src3))]>; - -class si_MInst_sisisi_orn<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, - IntRegs:$src3), - !strconcat("$dst |= ", !strconcat(opc , "($src2, ~$src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2, - IntRegs:$src3))]>; - -class si_SInst_siu5_sat<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; +// Split bitfield +def : T_RI_pat <A4_bitspliti, int_hexagon_A4_bitspliti>; +def : T_RR_pat <A4_bitsplit, int_hexagon_A4_bitsplit>; +def: T_RR_pat<S4_parity, int_hexagon_S4_parity>; + +def: T_RI_pat<S4_ntstbit_i, int_hexagon_S4_ntstbit_i>; +def: T_RR_pat<S4_ntstbit_r, int_hexagon_S4_ntstbit_r>; + +def: T_RI_pat<S4_clbaddi, int_hexagon_S4_clbaddi>; +def: T_PI_pat<S4_clbpaddi, int_hexagon_S4_clbpaddi>; +def: T_P_pat <S4_clbpnorm, int_hexagon_S4_clbpnorm>; /******************************************************************** * ALU32/ALU * *********************************************************************/ // ALU32 / ALU / Logical Operations. -def Hexagon_A4_orn : si_ALU32_sisi_not <"or", int_hexagon_A4_orn>; -def Hexagon_A4_andn : si_ALU32_sisi_not <"and", int_hexagon_A4_andn>; - +def: T_RR_pat<A4_andn, int_hexagon_A4_andn>; +def: T_RR_pat<A4_orn, int_hexagon_A4_orn>; /******************************************************************** * ALU32/PERM * *********************************************************************/ -// ALU32 / PERM / Combine Words Into Doublewords. -def Hexagon_A4_combineir : di_ALU32_s8si <"combine", int_hexagon_A4_combineir>; -def Hexagon_A4_combineri : di_ALU32_sis8 <"combine", int_hexagon_A4_combineri>; - +// Combine Words Into Doublewords. +def: T_RI_pat<A4_combineri, int_hexagon_A4_combineri, s32ImmPred>; +def: T_IR_pat<A4_combineir, int_hexagon_A4_combineir, s32ImmPred>; /******************************************************************** * ALU32/PRED * *********************************************************************/ -// ALU32 / PRED / Conditional Shift Halfword. -// ALU32 / PRED / Conditional Sign Extend. -// ALU32 / PRED / Conditional Zero Extend. -// ALU32 / PRED / Compare. -def Hexagon_C4_cmpltei : qi_neg_ALU32_sis10 <"cmp.gt", int_hexagon_C4_cmpltei>; -def Hexagon_C4_cmplte : qi_neg_ALU32_sisi <"cmp.gt", int_hexagon_C4_cmplte>; -def Hexagon_C4_cmplteu : qi_neg_ALU32_sisi <"cmp.gtu",int_hexagon_C4_cmplteu>; +// Compare +def : T_RI_pat<C4_cmpneqi, int_hexagon_C4_cmpneqi, s32ImmPred>; +def : T_RI_pat<C4_cmpltei, int_hexagon_C4_cmpltei, s32ImmPred>; +def : T_RI_pat<C4_cmplteui, int_hexagon_C4_cmplteui, u32ImmPred>; -def: T_RI_pat<C4_cmpneqi, int_hexagon_C4_cmpneqi>; -def: T_RI_pat<C4_cmpltei, int_hexagon_C4_cmpltei>; -def: T_RI_pat<C4_cmplteui, int_hexagon_C4_cmplteui>; - -// ALU32 / PRED / cmpare To General Register. -def Hexagon_A4_rcmpneq : si_neg_ALU32_sisi <"cmp.eq", int_hexagon_A4_rcmpneq>; -def Hexagon_A4_rcmpneqi: si_neg_ALU32_sis8 <"cmp.eq", int_hexagon_A4_rcmpneqi>; -def Hexagon_A4_rcmpeq : si_ALU32_sisi <"cmp.eq", int_hexagon_A4_rcmpeq>; -def Hexagon_A4_rcmpeqi : si_ALU32_sis8 <"cmp.eq", int_hexagon_A4_rcmpeqi>; +def: T_RR_pat<A4_rcmpeq, int_hexagon_A4_rcmpeq>; +def: T_RR_pat<A4_rcmpneq, int_hexagon_A4_rcmpneq>; +def: T_RI_pat<A4_rcmpeqi, int_hexagon_A4_rcmpeqi>; +def: T_RI_pat<A4_rcmpneqi, int_hexagon_A4_rcmpneqi>; /******************************************************************** * CR * *********************************************************************/ -// CR / Corner Detection Acceleration. -def Hexagon_C4_fastcorner9: - qi_SInst_qiqi<"fastcorner9", int_hexagon_C4_fastcorner9>; -def Hexagon_C4_fastcorner9_not: - qi_neg_SInst_qiqi<"fastcorner9",int_hexagon_C4_fastcorner9_not>; - // CR / Logical Operations On Predicates. -def Hexagon_C4_and_andn: - qi_SInst_qi_andqiqi_neg <"and", int_hexagon_C4_and_andn>; -def Hexagon_C4_and_and: - qi_SInst_qi_andqiqi <"and", int_hexagon_C4_and_and>; -def Hexagon_C4_and_orn: - qi_SInst_qi_orqiqi_neg <"and", int_hexagon_C4_and_orn>; -def Hexagon_C4_and_or: - qi_SInst_qi_orqiqi <"and", int_hexagon_C4_and_or>; -def Hexagon_C4_or_andn: - qi_SInst_qi_andqiqi_neg <"or", int_hexagon_C4_or_andn>; -def Hexagon_C4_or_and: - qi_SInst_qi_andqiqi <"or", int_hexagon_C4_or_and>; -def Hexagon_C4_or_orn: - qi_SInst_qi_orqiqi_neg <"or", int_hexagon_C4_or_orn>; -def Hexagon_C4_or_or: - qi_SInst_qi_orqiqi <"or", int_hexagon_C4_or_or>; +class qi_CRInst_qiqiqi_pat<Intrinsic IntID, InstHexagon Inst> : + Pat<(i32 (IntID IntRegs:$Rs, IntRegs:$Rt, IntRegs:$Ru)), + (i32 (C2_tfrpr (Inst (C2_tfrrp IntRegs:$Rs), + (C2_tfrrp IntRegs:$Rt), + (C2_tfrrp IntRegs:$Ru))))>; + +def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_and_and, C4_and_and>; +def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_and_andn, C4_and_andn>; +def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_and_or, C4_and_or>; +def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_and_orn, C4_and_orn>; +def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_or_and, C4_or_and>; +def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_or_andn, C4_or_andn>; +def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_or_or, C4_or_or>; +def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_or_orn, C4_or_orn>; /******************************************************************** * XTYPE/ALU * *********************************************************************/ -// XTYPE / ALU / Add And Accumulate. -def Hexagon_S4_addaddi: - si_SInst_si_addsis6 <"add", int_hexagon_S4_addaddi>; -def Hexagon_S4_subaddi: - si_SInst_si_subs6si <"add", int_hexagon_S4_subaddi>; +// Add And Accumulate. -// XTYPE / ALU / Logical Doublewords. -def Hexagon_S4_andnp: - di_ALU64_didi_neg <"and", int_hexagon_A4_andnp>; -def Hexagon_S4_ornp: - di_ALU64_didi_neg <"or", int_hexagon_A4_ornp>; +def : T_RRI_pat <S4_addaddi, int_hexagon_S4_addaddi>; +def : T_RIR_pat <S4_subaddi, int_hexagon_S4_subaddi>; -// XTYPE / ALU / Logical-logical Doublewords. -def Hexagon_M4_xor_xacc: - di_MInst_dididi_xacc <"xor", int_hexagon_M4_xor_xacc>; // XTYPE / ALU / Logical-logical Words. -def HEXAGON_M4_and_and: - si_MInst_sisisi_and <"and", int_hexagon_M4_and_and>; -def HEXAGON_M4_and_or: - si_MInst_sisisi_and <"or", int_hexagon_M4_and_or>; -def HEXAGON_M4_and_xor: - si_MInst_sisisi_and <"xor", int_hexagon_M4_and_xor>; -def HEXAGON_M4_and_andn: - si_MInst_sisisi_andn <"and", int_hexagon_M4_and_andn>; -def HEXAGON_M4_xor_and: - si_MInst_sisisi_xor <"and", int_hexagon_M4_xor_and>; -def HEXAGON_M4_xor_or: - si_MInst_sisisi_xor <"or", int_hexagon_M4_xor_or>; -def HEXAGON_M4_xor_andn: - si_MInst_sisisi_xorn <"and", int_hexagon_M4_xor_andn>; -def HEXAGON_M4_or_and: - si_MInst_sisisi_or <"and", int_hexagon_M4_or_and>; -def HEXAGON_M4_or_or: - si_MInst_sisisi_or <"or", int_hexagon_M4_or_or>; -def HEXAGON_M4_or_xor: - si_MInst_sisisi_or <"xor", int_hexagon_M4_or_xor>; -def HEXAGON_M4_or_andn: - si_MInst_sisisi_orn <"and", int_hexagon_M4_or_andn>; -def HEXAGON_S4_or_andix: - si_SInst_sisis10_andi <"or", int_hexagon_S4_or_andix>; -def HEXAGON_S4_or_andi: - si_SInst_sisis10_or <"and", int_hexagon_S4_or_andi>; -def HEXAGON_S4_or_ori: - si_SInst_sisis10_or <"or", int_hexagon_S4_or_ori>; - -// XTYPE / ALU / Modulo wrap. -def HEXAGON_A4_modwrapu: - si_ALU64_sisi <"modwrap", int_hexagon_A4_modwrapu>; - -// XTYPE / ALU / Round. -def HEXAGON_A4_cround_ri: - si_SInst_siu5 <"cround", int_hexagon_A4_cround_ri>; -def HEXAGON_A4_cround_rr: - si_SInst_sisi <"cround", int_hexagon_A4_cround_rr>; -def HEXAGON_A4_round_ri: - si_SInst_siu5 <"round", int_hexagon_A4_round_ri>; -def HEXAGON_A4_round_rr: - si_SInst_sisi <"round", int_hexagon_A4_round_rr>; -def HEXAGON_A4_round_ri_sat: - si_SInst_siu5_sat <"round", int_hexagon_A4_round_ri_sat>; -def HEXAGON_A4_round_rr_sat: - si_SInst_sisi_sat <"round", int_hexagon_A4_round_rr_sat>; - -// XTYPE / ALU / Vector reduce add unsigned halfwords. -// XTYPE / ALU / Vector add bytes. -// XTYPE / ALU / Vector conditional negate. -// XTYPE / ALU / Vector maximum bytes. -// XTYPE / ALU / Vector reduce maximum halfwords. -// XTYPE / ALU / Vector reduce maximum words. -// XTYPE / ALU / Vector minimum bytes. -// XTYPE / ALU / Vector reduce minimum halfwords. -// XTYPE / ALU / Vector reduce minimum words. -// XTYPE / ALU / Vector subtract bytes. - - -/******************************************************************** -* XTYPE/BIT * -*********************************************************************/ - -// XTYPE / BIT / Count leading. -// XTYPE / BIT / Count trailing. -// XTYPE / BIT / Extract bitfield. -// XTYPE / BIT / Masked parity. -// XTYPE / BIT / Bit reverse. -// XTYPE / BIT / Split bitfield. - - -/******************************************************************** -* XTYPE/COMPLEX * -*********************************************************************/ - -// XTYPE / COMPLEX / Complex add/sub halfwords. -// XTYPE / COMPLEX / Complex add/sub words. -// XTYPE / COMPLEX / Complex multiply 32x16. -// XTYPE / COMPLEX / Vector reduce complex rotate. - - -/******************************************************************** -* XTYPE/MPY * -*********************************************************************/ - -// XTYPE / COMPLEX / Complex add/sub halfwords. +def : T_RRR_pat <M4_or_xor, int_hexagon_M4_or_xor>; +def : T_RRR_pat <M4_and_xor, int_hexagon_M4_and_xor>; +def : T_RRR_pat <M4_or_and, int_hexagon_M4_or_and>; +def : T_RRR_pat <M4_and_and, int_hexagon_M4_and_and>; +def : T_RRR_pat <M4_xor_and, int_hexagon_M4_xor_and>; +def : T_RRR_pat <M4_or_or, int_hexagon_M4_or_or>; +def : T_RRR_pat <M4_and_or, int_hexagon_M4_and_or>; +def : T_RRR_pat <M4_xor_or, int_hexagon_M4_xor_or>; +def : T_RRR_pat <M4_or_andn, int_hexagon_M4_or_andn>; +def : T_RRR_pat <M4_and_andn, int_hexagon_M4_and_andn>; +def : T_RRR_pat <M4_xor_andn, int_hexagon_M4_xor_andn>; + +def : T_RRI_pat <S4_or_andi, int_hexagon_S4_or_andi>; +def : T_RRI_pat <S4_or_andix, int_hexagon_S4_or_andix>; +def : T_RRI_pat <S4_or_ori, int_hexagon_S4_or_ori>; + +// Modulo wrap. +def : T_RR_pat <A4_modwrapu, int_hexagon_A4_modwrapu>; + +// Arithmetic/Convergent round +// Rd=[cround|round](Rs,Rt)[:sat] +// Rd=[cround|round](Rs,#u5)[:sat] +def : T_RI_pat <A4_cround_ri, int_hexagon_A4_cround_ri>; +def : T_RR_pat <A4_cround_rr, int_hexagon_A4_cround_rr>; + +def : T_RI_pat <A4_round_ri, int_hexagon_A4_round_ri>; +def : T_RR_pat <A4_round_rr, int_hexagon_A4_round_rr>; + +def : T_RI_pat <A4_round_ri_sat, int_hexagon_A4_round_ri_sat>; +def : T_RR_pat <A4_round_rr_sat, int_hexagon_A4_round_rr_sat>; + +def : T_P_pat <A2_roundsat, int_hexagon_A2_roundsat>; diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV5.td b/lib/Target/Hexagon/HexagonIntrinsicsV5.td index 1d44b526d298..60e6b1eb4479 100644 --- a/lib/Target/Hexagon/HexagonIntrinsicsV5.td +++ b/lib/Target/Hexagon/HexagonIntrinsicsV5.td @@ -1,395 +1,111 @@ -class sf_SInst_sf<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1), - !strconcat("$dst = ", !strconcat(opc , "($src1)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1))]>; - -class si_SInst_sf<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1), - !strconcat("$dst = ", !strconcat(opc , "($src1)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1))]>; - -class sf_SInst_si<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1), - !strconcat("$dst = ", !strconcat(opc , "($src1)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1))]>; - -class sf_SInst_di<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1), - !strconcat("$dst = ", !strconcat(opc , "($src1)")), - [(set IntRegs:$dst, (IntID DoubleRegs:$src1))]>; - -class sf_SInst_df<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1), - !strconcat("$dst = ", !strconcat(opc , "($src1)")), - [(set IntRegs:$dst, (IntID DoubleRegs:$src1))]>; - -class si_SInst_df<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1), - !strconcat("$dst = ", !strconcat(opc , "($src1)")), - [(set IntRegs:$dst, (IntID DoubleRegs:$src1))]>; - -class df_SInst_sf<string opc, Intrinsic IntID> - : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1), - !strconcat("$dst = ", !strconcat(opc , "($src1)")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1))]>; - -class di_SInst_sf<string opc, Intrinsic IntID> - : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1), - !strconcat("$dst = ", !strconcat(opc , "($src1)")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1))]>; - -class df_SInst_si<string opc, Intrinsic IntID> - : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1), - !strconcat("$dst = ", !strconcat(opc , "($src1)")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1))]>; - -class df_SInst_df<string opc, Intrinsic IntID> - : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1), - !strconcat("$dst = ", !strconcat(opc , "($src1)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1))]>; - -class di_SInst_df<string opc, Intrinsic IntID> - : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1), - !strconcat("$dst = ", !strconcat(opc , "($src1)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1))]>; - - -class df_SInst_di<string opc, Intrinsic IntID> - : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1), - !strconcat("$dst = ", !strconcat(opc , "($src1)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1))]>; - -class sf_MInst_sfsf<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class df_MInst_dfdf<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; - -class qi_ALU64_dfdf<string opc, Intrinsic IntID> - : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set PredRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; - -class qi_ALU64_dfu5<string opc, Intrinsic IntID> - : ALU64_ri<(outs PredRegs:$dst), (ins DoubleRegs:$src1, u5Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), - [(set PredRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>; - - -class sf_MInst_sfsfsf_acc<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, - IntRegs:$dst2), - !strconcat("$dst += ", !strconcat(opc , - "($src1, $src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, - IntRegs:$src2, IntRegs:$dst2))], - "$dst2 = $dst">; - -class sf_MInst_sfsfsf_nac<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, - IntRegs:$dst2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1, $src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, - IntRegs:$src2, IntRegs:$dst2))], - "$dst2 = $dst">; - - -class sf_MInst_sfsfsfsi_sc<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2, IntRegs:$src3), - !strconcat("$dst += ", !strconcat(opc , - "($src1, $src2, $src3):scale")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2, IntRegs:$src3))], - "$dst2 = $dst">; - -class sf_MInst_sfsfsf_acc_lib<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, - IntRegs:$dst2), - !strconcat("$dst += ", !strconcat(opc , - "($src1, $src2):lib")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, - IntRegs:$src2, IntRegs:$dst2))], - "$dst2 = $dst">; - -class sf_MInst_sfsfsf_nac_lib<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, - IntRegs:$dst2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1, $src2):lib")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, - IntRegs:$src2, IntRegs:$dst2))], - "$dst2 = $dst">; - -class df_MInst_dfdfdf_acc<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2, - DoubleRegs:$dst2), - !strconcat("$dst += ", !strconcat(opc , - "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, - DoubleRegs:$src2, DoubleRegs:$dst2))], - "$dst2 = $dst">; - -class df_MInst_dfdfdf_nac<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2, - DoubleRegs:$dst2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, - DoubleRegs:$src2, DoubleRegs:$dst2))], - "$dst2 = $dst">; - - -class df_MInst_dfdfdfsi_sc<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, - DoubleRegs:$src2, IntRegs:$src3), - !strconcat("$dst += ", !strconcat(opc , - "($src1, $src2, $src3):scale")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, - DoubleRegs:$src2, IntRegs:$src3))], - "$dst2 = $dst">; - -class df_MInst_dfdfdf_acc_lib<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2, - DoubleRegs:$dst2), - !strconcat("$dst += ", !strconcat(opc , - "($src1, $src2):lib")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, - DoubleRegs:$src2, DoubleRegs:$dst2))], - "$dst2 = $dst">; - -class df_MInst_dfdfdf_nac_lib<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2, - DoubleRegs:$dst2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1, $src2):lib")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, - DoubleRegs:$src2, DoubleRegs:$dst2))], - "$dst2 = $dst">; - -class qi_SInst_sfsf<string opc, Intrinsic IntID> - : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class qi_SInst_sfu5<string opc, Intrinsic IntID> - : MInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), - [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; - -class sf_ALU64_u10_pos<string opc, Intrinsic IntID> - : ALU64_ri<(outs IntRegs:$dst), (ins u10Imm:$src1), - !strconcat("$dst = ", !strconcat(opc , "#$src1):pos")), - [(set IntRegs:$dst, (IntID imm:$src1))]>; - -class sf_ALU64_u10_neg<string opc, Intrinsic IntID> - : ALU64_ri<(outs IntRegs:$dst), (ins u10Imm:$src1), - !strconcat("$dst = ", !strconcat(opc , "#$src1):neg")), - [(set IntRegs:$dst, (IntID imm:$src1))]>; - -class df_ALU64_u10_pos<string opc, Intrinsic IntID> - : ALU64_ri<(outs DoubleRegs:$dst), (ins u10Imm:$src1), - !strconcat("$dst = ", !strconcat(opc , "#$src1):pos")), - [(set DoubleRegs:$dst, (IntID imm:$src1))]>; - -class df_ALU64_u10_neg<string opc, Intrinsic IntID> - : ALU64_ri<(outs DoubleRegs:$dst), (ins u10Imm:$src1), - !strconcat("$dst = ", !strconcat(opc , "#$src1):neg")), - [(set DoubleRegs:$dst, (IntID imm:$src1))]>; - -class di_MInst_diu6<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>; - -class di_MInst_diu4_rnd<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u4Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):rnd")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>; - -class si_MInst_diu4_rnd_sat<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, u4Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):rnd:sat")), - [(set IntRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>; - -class si_SInst_diu4_sat<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, u4Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):sat")), - [(set IntRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>; - - -def HEXAGON_C4_fastcorner9: - qi_SInst_qiqi <"fastcorner9", int_hexagon_C4_fastcorner9>; -def HEXAGON_C4_fastcorner9_not: - qi_SInst_qiqi <"!fastcorner9", int_hexagon_C4_fastcorner9_not>; -def HEXAGON_M5_vrmpybuu: - di_MInst_didi <"vrmpybu", int_hexagon_M5_vrmpybuu>; -def HEXAGON_M5_vrmacbuu: - di_MInst_dididi_acc <"vrmpybu", int_hexagon_M5_vrmacbuu>; -def HEXAGON_M5_vrmpybsu: - di_MInst_didi <"vrmpybsu", int_hexagon_M5_vrmpybsu>; -def HEXAGON_M5_vrmacbsu: - di_MInst_dididi_acc <"vrmpybsu", int_hexagon_M5_vrmacbsu>; -def HEXAGON_M5_vmpybuu: - di_MInst_sisi <"vmpybu", int_hexagon_M5_vmpybuu>; -def HEXAGON_M5_vmpybsu: - di_MInst_sisi <"vmpybsu", int_hexagon_M5_vmpybsu>; -def HEXAGON_M5_vmacbuu: - di_MInst_disisi_acc <"vmpybu", int_hexagon_M5_vmacbuu>; -def HEXAGON_M5_vmacbsu: - di_MInst_disisi_acc <"vmpybsu", int_hexagon_M5_vmacbsu>; -def HEXAGON_M5_vdmpybsu: - di_MInst_didi_sat <"vdmpybsu", int_hexagon_M5_vdmpybsu>; -def HEXAGON_M5_vdmacbsu: - di_MInst_dididi_acc_sat <"vdmpybsu", int_hexagon_M5_vdmacbsu>; -def HEXAGON_A5_vaddhubs: - si_SInst_didi_sat <"vaddhub", int_hexagon_A5_vaddhubs>; -def HEXAGON_S5_popcountp: - si_SInst_di <"popcount", int_hexagon_S5_popcountp>; -def HEXAGON_S5_asrhub_rnd_sat_goodsyntax: - si_MInst_diu4_rnd_sat <"vasrhub", int_hexagon_S5_asrhub_rnd_sat_goodsyntax>; -def HEXAGON_S5_asrhub_sat: - si_SInst_diu4_sat <"vasrhub", int_hexagon_S5_asrhub_sat>; -def HEXAGON_S5_vasrhrnd_goodsyntax: - di_MInst_diu4_rnd <"vasrh", int_hexagon_S5_vasrhrnd_goodsyntax>; -def HEXAGON_S2_asr_i_p_rnd: - di_SInst_diu6 <"asr", int_hexagon_S2_asr_i_p_rnd>; -def HEXAGON_S2_asr_i_p_rnd_goodsyntax: - di_MInst_diu6 <"asrrnd", int_hexagon_S2_asr_i_p_rnd_goodsyntax>; -def HEXAGON_F2_sfadd: - sf_MInst_sfsf <"sfadd", int_hexagon_F2_sfadd>; -def HEXAGON_F2_sfsub: - sf_MInst_sfsf <"sfsub", int_hexagon_F2_sfsub>; -def HEXAGON_F2_sfmpy: - sf_MInst_sfsf <"sfmpy", int_hexagon_F2_sfmpy>; -def HEXAGON_F2_sffma: - sf_MInst_sfsfsf_acc <"sfmpy", int_hexagon_F2_sffma>; -def HEXAGON_F2_sffma_sc: - sf_MInst_sfsfsfsi_sc <"sfmpy", int_hexagon_F2_sffma_sc>; -def HEXAGON_F2_sffms: - sf_MInst_sfsfsf_nac <"sfmpy", int_hexagon_F2_sffms>; -def HEXAGON_F2_sffma_lib: - sf_MInst_sfsfsf_acc_lib <"sfmpy", int_hexagon_F2_sffma_lib>; -def HEXAGON_F2_sffms_lib: - sf_MInst_sfsfsf_nac_lib <"sfmpy", int_hexagon_F2_sffms_lib>; -def HEXAGON_F2_sfcmpeq: - qi_SInst_sfsf <"sfcmp.eq", int_hexagon_F2_sfcmpeq>; -def HEXAGON_F2_sfcmpgt: - qi_SInst_sfsf <"sfcmp.gt", int_hexagon_F2_sfcmpgt>; -def HEXAGON_F2_sfcmpge: - qi_SInst_sfsf <"sfcmp.ge", int_hexagon_F2_sfcmpge>; -def HEXAGON_F2_sfcmpuo: - qi_SInst_sfsf <"sfcmp.uo", int_hexagon_F2_sfcmpuo>; -def HEXAGON_F2_sfmax: - sf_MInst_sfsf <"sfmax", int_hexagon_F2_sfmax>; -def HEXAGON_F2_sfmin: - sf_MInst_sfsf <"sfmin", int_hexagon_F2_sfmin>; -def HEXAGON_F2_sfclass: - qi_SInst_sfu5 <"sfclass", int_hexagon_F2_sfclass>; -def HEXAGON_F2_sfimm_p: - sf_ALU64_u10_pos <"sfmake", int_hexagon_F2_sfimm_p>; -def HEXAGON_F2_sfimm_n: - sf_ALU64_u10_neg <"sfmake", int_hexagon_F2_sfimm_n>; -def HEXAGON_F2_sffixupn: - sf_MInst_sfsf <"sffixupn", int_hexagon_F2_sffixupn>; -def HEXAGON_F2_sffixupd: - sf_MInst_sfsf <"sffixupd", int_hexagon_F2_sffixupd>; -def HEXAGON_F2_sffixupr: - sf_SInst_sf <"sffixupr", int_hexagon_F2_sffixupr>; -def HEXAGON_F2_dfadd: - df_MInst_dfdf <"dfadd", int_hexagon_F2_dfadd>; -def HEXAGON_F2_dfsub: - df_MInst_dfdf <"dfsub", int_hexagon_F2_dfsub>; -def HEXAGON_F2_dfmpy: - df_MInst_dfdf <"dfmpy", int_hexagon_F2_dfmpy>; -def HEXAGON_F2_dffma: - df_MInst_dfdfdf_acc <"dfmpy", int_hexagon_F2_dffma>; -def HEXAGON_F2_dffms: - df_MInst_dfdfdf_nac <"dfmpy", int_hexagon_F2_dffms>; -def HEXAGON_F2_dffma_lib: - df_MInst_dfdfdf_acc_lib <"dfmpy", int_hexagon_F2_dffma_lib>; -def HEXAGON_F2_dffms_lib: - df_MInst_dfdfdf_nac_lib <"dfmpy", int_hexagon_F2_dffms_lib>; -def HEXAGON_F2_dffma_sc: - df_MInst_dfdfdfsi_sc <"dfmpy", int_hexagon_F2_dffma_sc>; -def HEXAGON_F2_dfmax: - df_MInst_dfdf <"dfmax", int_hexagon_F2_dfmax>; -def HEXAGON_F2_dfmin: - df_MInst_dfdf <"dfmin", int_hexagon_F2_dfmin>; -def HEXAGON_F2_dfcmpeq: - qi_ALU64_dfdf <"dfcmp.eq", int_hexagon_F2_dfcmpeq>; -def HEXAGON_F2_dfcmpgt: - qi_ALU64_dfdf <"dfcmp.gt", int_hexagon_F2_dfcmpgt>; -def HEXAGON_F2_dfcmpge: - qi_ALU64_dfdf <"dfcmp.ge", int_hexagon_F2_dfcmpge>; -def HEXAGON_F2_dfcmpuo: - qi_ALU64_dfdf <"dfcmp.uo", int_hexagon_F2_dfcmpuo>; -def HEXAGON_F2_dfclass: - qi_ALU64_dfu5 <"dfclass", int_hexagon_F2_dfclass>; -def HEXAGON_F2_dfimm_p: - df_ALU64_u10_pos <"dfmake", int_hexagon_F2_dfimm_p>; -def HEXAGON_F2_dfimm_n: - df_ALU64_u10_neg <"dfmake", int_hexagon_F2_dfimm_n>; -def HEXAGON_F2_dffixupn: - df_MInst_dfdf <"dffixupn", int_hexagon_F2_dffixupn>; -def HEXAGON_F2_dffixupd: - df_MInst_dfdf <"dffixupd", int_hexagon_F2_dffixupd>; -def HEXAGON_F2_dffixupr: - df_SInst_df <"dffixupr", int_hexagon_F2_dffixupr>; -def HEXAGON_F2_conv_sf2df: - df_SInst_sf <"convert_sf2df", int_hexagon_F2_conv_sf2df>; -def HEXAGON_F2_conv_df2sf: - sf_SInst_df <"convert_df2sf", int_hexagon_F2_conv_df2sf>; -def HEXAGON_F2_conv_uw2sf: - sf_SInst_si <"convert_uw2sf", int_hexagon_F2_conv_uw2sf>; -def HEXAGON_F2_conv_uw2df: - df_SInst_si <"convert_uw2df", int_hexagon_F2_conv_uw2df>; -def HEXAGON_F2_conv_w2sf: - sf_SInst_si <"convert_w2sf", int_hexagon_F2_conv_w2sf>; -def HEXAGON_F2_conv_w2df: - df_SInst_si <"convert_w2df", int_hexagon_F2_conv_w2df>; -def HEXAGON_F2_conv_ud2sf: - sf_SInst_di <"convert_ud2sf", int_hexagon_F2_conv_ud2sf>; -def HEXAGON_F2_conv_ud2df: - df_SInst_di <"convert_ud2df", int_hexagon_F2_conv_ud2df>; -def HEXAGON_F2_conv_d2sf: - sf_SInst_di <"convert_d2sf", int_hexagon_F2_conv_d2sf>; -def HEXAGON_F2_conv_d2df: - df_SInst_di <"convert_d2df", int_hexagon_F2_conv_d2df>; -def HEXAGON_F2_conv_sf2uw: - si_SInst_sf <"convert_sf2uw", int_hexagon_F2_conv_sf2uw>; -def HEXAGON_F2_conv_sf2w: - si_SInst_sf <"convert_sf2w", int_hexagon_F2_conv_sf2w>; -def HEXAGON_F2_conv_sf2ud: - di_SInst_sf <"convert_sf2ud", int_hexagon_F2_conv_sf2ud>; -def HEXAGON_F2_conv_sf2d: - di_SInst_sf <"convert_sf2d", int_hexagon_F2_conv_sf2d>; -def HEXAGON_F2_conv_df2uw: - si_SInst_df <"convert_df2uw", int_hexagon_F2_conv_df2uw>; -def HEXAGON_F2_conv_df2w: - si_SInst_df <"convert_df2w", int_hexagon_F2_conv_df2w>; -def HEXAGON_F2_conv_df2ud: - di_SInst_df <"convert_df2ud", int_hexagon_F2_conv_df2ud>; -def HEXAGON_F2_conv_df2d: - di_SInst_df <"convert_df2d", int_hexagon_F2_conv_df2d>; -def HEXAGON_F2_conv_sf2uw_chop: - si_SInst_sf <"convert_sf2uw", int_hexagon_F2_conv_sf2uw_chop>; -def HEXAGON_F2_conv_sf2w_chop: - si_SInst_sf <"convert_sf2w", int_hexagon_F2_conv_sf2w_chop>; -def HEXAGON_F2_conv_sf2ud_chop: - di_SInst_sf <"convert_sf2ud", int_hexagon_F2_conv_sf2ud_chop>; -def HEXAGON_F2_conv_sf2d_chop: - di_SInst_sf <"convert_sf2d", int_hexagon_F2_conv_sf2d_chop>; -def HEXAGON_F2_conv_df2uw_chop: - si_SInst_df <"convert_df2uw", int_hexagon_F2_conv_df2uw_chop>; -def HEXAGON_F2_conv_df2w_chop: - si_SInst_df <"convert_df2w", int_hexagon_F2_conv_df2w_chop>; -def HEXAGON_F2_conv_df2ud_chop: - di_SInst_df <"convert_df2ud", int_hexagon_F2_conv_df2ud_chop>; -def HEXAGON_F2_conv_df2d_chop: - di_SInst_df <"convert_df2d", int_hexagon_F2_conv_df2d_chop>; +//===- HexagonIntrinsicsV5.td - V5 Instruction intrinsics --*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//Rdd[+]=vrmpybsu(Rss,Rtt) +//Rdd[+]=vrmpybuu(Rss,Rtt) +let Predicates = [HasV5T] in { +def : T_PP_pat <M5_vrmpybsu, int_hexagon_M5_vrmpybsu>; +def : T_PP_pat <M5_vrmpybuu, int_hexagon_M5_vrmpybuu>; + +def : T_PP_pat <M5_vdmpybsu, int_hexagon_M5_vdmpybsu>; + +def : T_PPP_pat <M5_vrmacbsu, int_hexagon_M5_vrmacbsu>; +def : T_PPP_pat <M5_vrmacbuu, int_hexagon_M5_vrmacbuu>; +//Rxx+=vdmpybsu(Rss,Rtt):sat +def : T_PPP_pat <M5_vdmacbsu, int_hexagon_M5_vdmacbsu>; + +// Vector multiply bytes +// Rdd=vmpyb[s]u(Rs,Rt) +def : T_RR_pat <M5_vmpybsu, int_hexagon_M5_vmpybsu>; +def : T_RR_pat <M5_vmpybuu, int_hexagon_M5_vmpybuu>; + +// Rxx+=vmpyb[s]u(Rs,Rt) +def : T_PRR_pat <M5_vmacbsu, int_hexagon_M5_vmacbsu>; +def : T_PRR_pat <M5_vmacbuu, int_hexagon_M5_vmacbuu>; + +// Rd=vaddhub(Rss,Rtt):sat +def : T_PP_pat <A5_vaddhubs, int_hexagon_A5_vaddhubs>; +} + +def : T_FF_pat<F2_sfadd, int_hexagon_F2_sfadd>; +def : T_FF_pat<F2_sfsub, int_hexagon_F2_sfsub>; +def : T_FF_pat<F2_sfmpy, int_hexagon_F2_sfmpy>; +def : T_FF_pat<F2_sfmax, int_hexagon_F2_sfmax>; +def : T_FF_pat<F2_sfmin, int_hexagon_F2_sfmin>; + +def : T_FF_pat<F2_sffixupn, int_hexagon_F2_sffixupn>; +def : T_FF_pat<F2_sffixupd, int_hexagon_F2_sffixupd>; +def : T_F_pat <F2_sffixupr, int_hexagon_F2_sffixupr>; + +def: qi_CRInst_qiqi_pat<C4_fastcorner9, int_hexagon_C4_fastcorner9>; +def: qi_CRInst_qiqi_pat<C4_fastcorner9_not, int_hexagon_C4_fastcorner9_not>; + +def : T_P_pat <S5_popcountp, int_hexagon_S5_popcountp>; +def : T_PI_pat <S5_asrhub_sat, int_hexagon_S5_asrhub_sat>; + +def : T_PI_pat <S2_asr_i_p_rnd, int_hexagon_S2_asr_i_p_rnd>; +def : T_PI_pat <S2_asr_i_p_rnd_goodsyntax, + int_hexagon_S2_asr_i_p_rnd_goodsyntax>; + +def : T_PI_pat <S5_asrhub_rnd_sat_goodsyntax, + int_hexagon_S5_asrhub_rnd_sat_goodsyntax>; + +def : T_PI_pat <S5_vasrhrnd_goodsyntax, int_hexagon_S5_vasrhrnd_goodsyntax>; + +def : T_FFF_pat <F2_sffma, int_hexagon_F2_sffma>; +def : T_FFF_pat <F2_sffms, int_hexagon_F2_sffms>; +def : T_FFF_pat <F2_sffma_lib, int_hexagon_F2_sffma_lib>; +def : T_FFF_pat <F2_sffms_lib, int_hexagon_F2_sffms_lib>; +def : T_FFFQ_pat <F2_sffma_sc, int_hexagon_F2_sffma_sc>; + +// Compare floating-point value +def : T_FF_pat <F2_sfcmpge, int_hexagon_F2_sfcmpge>; +def : T_FF_pat <F2_sfcmpuo, int_hexagon_F2_sfcmpuo>; +def : T_FF_pat <F2_sfcmpeq, int_hexagon_F2_sfcmpeq>; +def : T_FF_pat <F2_sfcmpgt, int_hexagon_F2_sfcmpgt>; + +def : T_DD_pat <F2_dfcmpeq, int_hexagon_F2_dfcmpeq>; +def : T_DD_pat <F2_dfcmpgt, int_hexagon_F2_dfcmpgt>; +def : T_DD_pat <F2_dfcmpge, int_hexagon_F2_dfcmpge>; +def : T_DD_pat <F2_dfcmpuo, int_hexagon_F2_dfcmpuo>; + +// Create floating-point value +def : T_I_pat <F2_sfimm_p, int_hexagon_F2_sfimm_p>; +def : T_I_pat <F2_sfimm_n, int_hexagon_F2_sfimm_n>; +def : T_I_pat <F2_dfimm_p, int_hexagon_F2_dfimm_p>; +def : T_I_pat <F2_dfimm_n, int_hexagon_F2_dfimm_n>; + +def : T_DI_pat <F2_dfclass, int_hexagon_F2_dfclass>; +def : T_FI_pat <F2_sfclass, int_hexagon_F2_sfclass>; +def : T_F_pat <F2_conv_sf2df, int_hexagon_F2_conv_sf2df>; +def : T_D_pat <F2_conv_df2sf, int_hexagon_F2_conv_df2sf>; +def : T_R_pat <F2_conv_uw2sf, int_hexagon_F2_conv_uw2sf>; +def : T_R_pat <F2_conv_uw2df, int_hexagon_F2_conv_uw2df>; +def : T_R_pat <F2_conv_w2sf, int_hexagon_F2_conv_w2sf>; +def : T_R_pat <F2_conv_w2df, int_hexagon_F2_conv_w2df>; +def : T_P_pat <F2_conv_ud2sf, int_hexagon_F2_conv_ud2sf>; +def : T_P_pat <F2_conv_ud2df, int_hexagon_F2_conv_ud2df>; +def : T_P_pat <F2_conv_d2sf, int_hexagon_F2_conv_d2sf>; +def : T_P_pat <F2_conv_d2df, int_hexagon_F2_conv_d2df>; +def : T_F_pat <F2_conv_sf2uw, int_hexagon_F2_conv_sf2uw>; +def : T_F_pat <F2_conv_sf2w, int_hexagon_F2_conv_sf2w>; +def : T_F_pat <F2_conv_sf2ud, int_hexagon_F2_conv_sf2ud>; +def : T_F_pat <F2_conv_sf2d, int_hexagon_F2_conv_sf2d>; +def : T_D_pat <F2_conv_df2uw, int_hexagon_F2_conv_df2uw>; +def : T_D_pat <F2_conv_df2w, int_hexagon_F2_conv_df2w>; +def : T_D_pat <F2_conv_df2ud, int_hexagon_F2_conv_df2ud>; +def : T_D_pat <F2_conv_df2d, int_hexagon_F2_conv_df2d>; +def : T_F_pat <F2_conv_sf2uw_chop, int_hexagon_F2_conv_sf2uw_chop>; +def : T_F_pat <F2_conv_sf2w_chop, int_hexagon_F2_conv_sf2w_chop>; +def : T_F_pat <F2_conv_sf2ud_chop, int_hexagon_F2_conv_sf2ud_chop>; +def : T_F_pat <F2_conv_sf2d_chop, int_hexagon_F2_conv_sf2d_chop>; +def : T_D_pat <F2_conv_df2uw_chop, int_hexagon_F2_conv_df2uw_chop>; +def : T_D_pat <F2_conv_df2w_chop, int_hexagon_F2_conv_df2w_chop>; +def : T_D_pat <F2_conv_df2ud_chop, int_hexagon_F2_conv_df2ud_chop>; +def : T_D_pat <F2_conv_df2d_chop, int_hexagon_F2_conv_df2d_chop>; diff --git a/lib/Target/Hexagon/HexagonMCInstLower.cpp b/lib/Target/Hexagon/HexagonMCInstLower.cpp index 3f5d6d84a108..535d1f91b493 100644 --- a/lib/Target/Hexagon/HexagonMCInstLower.cpp +++ b/lib/Target/Hexagon/HexagonMCInstLower.cpp @@ -15,7 +15,6 @@ #include "Hexagon.h" #include "HexagonAsmPrinter.h" #include "HexagonMachineFunctionInfo.h" -#include "MCTargetDesc/HexagonMCInst.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Mangler.h" @@ -35,11 +34,11 @@ static MCOperand GetSymbolRef(const MachineOperand& MO, const MCSymbol* Symbol, ME = MCBinaryExpr::CreateAdd(ME, MCConstantExpr::Create(MO.getOffset(), MC), MC); - return (MCOperand::CreateExpr(ME)); + return (MCOperand::createExpr(ME)); } // Create an MCInst from a MachineInstr -void llvm::HexagonLowerToMC(const MachineInstr* MI, HexagonMCInst& MCI, +void llvm::HexagonLowerToMC(MachineInstr const* MI, MCInst& MCI, HexagonAsmPrinter& AP) { MCI.setOpcode(MI->getOpcode()); @@ -54,20 +53,20 @@ void llvm::HexagonLowerToMC(const MachineInstr* MI, HexagonMCInst& MCI, case MachineOperand::MO_Register: // Ignore all implicit register operands. if (MO.isImplicit()) continue; - MCO = MCOperand::CreateReg(MO.getReg()); + MCO = MCOperand::createReg(MO.getReg()); break; case MachineOperand::MO_FPImmediate: { APFloat Val = MO.getFPImm()->getValueAPF(); // FP immediates are used only when setting GPRs, so they may be dealt // with like regular immediates from this point on. - MCO = MCOperand::CreateImm(*Val.bitcastToAPInt().getRawData()); + MCO = MCOperand::createImm(*Val.bitcastToAPInt().getRawData()); break; } case MachineOperand::MO_Immediate: - MCO = MCOperand::CreateImm(MO.getImm()); + MCO = MCOperand::createImm(MO.getImm()); break; case MachineOperand::MO_MachineBasicBlock: - MCO = MCOperand::CreateExpr + MCO = MCOperand::createExpr (MCSymbolRefExpr::Create(MO.getMBB()->getSymbol(), AP.OutContext)); break; diff --git a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h index cb18df6ed198..76723586c66e 100644 --- a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h +++ b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h @@ -27,6 +27,7 @@ class HexagonMachineFunctionInfo : public MachineFunctionInfo { // returning the value of the returned struct in a register. This field // holds the virtual register into which the sret argument is passed. unsigned SRetReturnReg; + unsigned StackAlignBaseReg; std::vector<MachineInstr*> AllocaAdjustInsts; int VarArgsFrameIndex; bool HasClobberLR; @@ -35,10 +36,11 @@ class HexagonMachineFunctionInfo : public MachineFunctionInfo { virtual void anchor(); public: - HexagonMachineFunctionInfo() : SRetReturnReg(0), HasClobberLR(0), - HasEHReturn(false) {} + HexagonMachineFunctionInfo() : SRetReturnReg(0), StackAlignBaseReg(0), + HasClobberLR(0), HasEHReturn(false) {} HexagonMachineFunctionInfo(MachineFunction &MF) : SRetReturnReg(0), + StackAlignBaseReg(0), HasClobberLR(0), HasEHReturn(false) {} @@ -74,6 +76,9 @@ public: bool hasEHReturn() const { return HasEHReturn; }; void setHasEHReturn(bool H = true) { HasEHReturn = H; }; + + void setStackAlignBaseVReg(unsigned R) { StackAlignBaseReg = R; } + unsigned getStackAlignBaseVReg() const { return StackAlignBaseReg; } }; } // End llvm namespace diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp index 97c626fdf7af..35f732cd6207 100644 --- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp +++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp @@ -205,20 +205,17 @@ void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) { // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or // are disabled, then these HazardRecs will be disabled. const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries(); - const TargetMachine &TM = DAG->MF.getTarget(); + const TargetSubtargetInfo &STI = DAG->MF.getSubtarget(); + const TargetInstrInfo *TII = STI.getInstrInfo(); delete Top.HazardRec; delete Bot.HazardRec; - Top.HazardRec = - TM.getSubtargetImpl()->getInstrInfo()->CreateTargetMIHazardRecognizer( - Itin, DAG); - Bot.HazardRec = - TM.getSubtargetImpl()->getInstrInfo()->CreateTargetMIHazardRecognizer( - Itin, DAG); + Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG); + Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG); delete Top.ResourceModel; delete Bot.ResourceModel; - Top.ResourceModel = new VLIWResourceModel(TM, DAG->getSchedModel()); - Bot.ResourceModel = new VLIWResourceModel(TM, DAG->getSchedModel()); + Top.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel()); + Bot.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel()); assert((!llvm::ForceTopDown || !llvm::ForceBottomUp) && "-misched-topdown incompatible with -misched-bottomup"); diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.h b/lib/Target/Hexagon/HexagonMachineScheduler.h index 1e023c32bb8c..60343442e327 100644 --- a/lib/Target/Hexagon/HexagonMachineScheduler.h +++ b/lib/Target/Hexagon/HexagonMachineScheduler.h @@ -54,11 +54,9 @@ class VLIWResourceModel { unsigned TotalPackets; public: -VLIWResourceModel(const TargetMachine &TM, const TargetSchedModel *SM) : - SchedModel(SM), TotalPackets(0) { - ResourcesModel = - TM.getSubtargetImpl()->getInstrInfo()->CreateTargetScheduleState( - *TM.getSubtargetImpl()); + VLIWResourceModel(const TargetSubtargetInfo &STI, const TargetSchedModel *SM) + : SchedModel(SM), TotalPackets(0) { + ResourcesModel = STI.getInstrInfo()->CreateTargetScheduleState(STI); // This hard requirement could be relaxed, // but for now do not let it proceed. diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp index 7edba92e7e0d..81af4db912cc 100644 --- a/lib/Target/Hexagon/HexagonNewValueJump.cpp +++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp @@ -40,6 +40,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -199,10 +200,7 @@ static bool commonChecksToProhibitNewValueJump(bool afterRA, // of registers by individual passes in the backend. At this time, // we don't know the scope of usage and definitions of these // instructions. - if (MII->getOpcode() == Hexagon::TFR_condset_ii || - MII->getOpcode() == Hexagon::TFR_condset_ri || - MII->getOpcode() == Hexagon::TFR_condset_ir || - MII->getOpcode() == Hexagon::LDriw_pred || + if (MII->getOpcode() == Hexagon::LDriw_pred || MII->getOpcode() == Hexagon::STriw_pred) return false; } @@ -299,48 +297,48 @@ static unsigned getNewValueJumpOpcode(MachineInstr *MI, int reg, switch (MI->getOpcode()) { case Hexagon::C2_cmpeq: - return taken ? Hexagon::CMPEQrr_t_Jumpnv_t_V4 - : Hexagon::CMPEQrr_t_Jumpnv_nt_V4; + return taken ? Hexagon::J4_cmpeq_t_jumpnv_t + : Hexagon::J4_cmpeq_t_jumpnv_nt; case Hexagon::C2_cmpeqi: { if (reg >= 0) - return taken ? Hexagon::CMPEQri_t_Jumpnv_t_V4 - : Hexagon::CMPEQri_t_Jumpnv_nt_V4; + return taken ? Hexagon::J4_cmpeqi_t_jumpnv_t + : Hexagon::J4_cmpeqi_t_jumpnv_nt; else - return taken ? Hexagon::CMPEQn1_t_Jumpnv_t_V4 - : Hexagon::CMPEQn1_t_Jumpnv_nt_V4; + return taken ? Hexagon::J4_cmpeqn1_t_jumpnv_t + : Hexagon::J4_cmpeqn1_t_jumpnv_nt; } case Hexagon::C2_cmpgt: { if (secondRegNewified) - return taken ? Hexagon::CMPLTrr_t_Jumpnv_t_V4 - : Hexagon::CMPLTrr_t_Jumpnv_nt_V4; + return taken ? Hexagon::J4_cmplt_t_jumpnv_t + : Hexagon::J4_cmplt_t_jumpnv_nt; else - return taken ? Hexagon::CMPGTrr_t_Jumpnv_t_V4 - : Hexagon::CMPGTrr_t_Jumpnv_nt_V4; + return taken ? Hexagon::J4_cmpgt_t_jumpnv_t + : Hexagon::J4_cmpgt_t_jumpnv_nt; } case Hexagon::C2_cmpgti: { if (reg >= 0) - return taken ? Hexagon::CMPGTri_t_Jumpnv_t_V4 - : Hexagon::CMPGTri_t_Jumpnv_nt_V4; + return taken ? Hexagon::J4_cmpgti_t_jumpnv_t + : Hexagon::J4_cmpgti_t_jumpnv_nt; else - return taken ? Hexagon::CMPGTn1_t_Jumpnv_t_V4 - : Hexagon::CMPGTn1_t_Jumpnv_nt_V4; + return taken ? Hexagon::J4_cmpgtn1_t_jumpnv_t + : Hexagon::J4_cmpgtn1_t_jumpnv_nt; } case Hexagon::C2_cmpgtu: { if (secondRegNewified) - return taken ? Hexagon::CMPLTUrr_t_Jumpnv_t_V4 - : Hexagon::CMPLTUrr_t_Jumpnv_nt_V4; + return taken ? Hexagon::J4_cmpltu_t_jumpnv_t + : Hexagon::J4_cmpltu_t_jumpnv_nt; else - return taken ? Hexagon::CMPGTUrr_t_Jumpnv_t_V4 - : Hexagon::CMPGTUrr_t_Jumpnv_nt_V4; + return taken ? Hexagon::J4_cmpgtu_t_jumpnv_t + : Hexagon::J4_cmpgtu_t_jumpnv_nt; } case Hexagon::C2_cmpgtui: - return taken ? Hexagon::CMPGTUri_t_Jumpnv_t_V4 - : Hexagon::CMPGTUri_t_Jumpnv_nt_V4; + return taken ? Hexagon::J4_cmpgtui_t_jumpnv_t + : Hexagon::J4_cmpgtui_t_jumpnv_nt; default: llvm_unreachable("Could not find matching New Value Jump instruction."); @@ -355,19 +353,15 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { << "********** Function: " << MF.getName() << "\n"); -#if 0 - // for now disable this, if we move NewValueJump before register - // allocation we need this information. - LiveVariables &LVs = getAnalysis<LiveVariables>(); -#endif + // If we move NewValueJump before register allocation we'll need live variable + // analysis here too. QII = static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo()); QRI = static_cast<const HexagonRegisterInfo *>( MF.getSubtarget().getRegisterInfo()); MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); - if (!QRI->Subtarget.hasV4TOps() || - DisableNewValueJumps) { + if (DisableNewValueJumps) { return false; } diff --git a/lib/Target/Hexagon/HexagonOperands.td b/lib/Target/Hexagon/HexagonOperands.td index 5a6de0ae2746..b7f364ef0751 100644 --- a/lib/Target/Hexagon/HexagonOperands.td +++ b/lib/Target/Hexagon/HexagonOperands.td @@ -66,162 +66,131 @@ def nOneImm : Operand<i32>; // Immediate predicates // def s32ImmPred : PatLeaf<(i32 imm), [{ - // s32ImmPred predicate - True if the immediate fits in a 32-bit sign extended - // field. int64_t v = (int64_t)N->getSExtValue(); return isInt<32>(v); }]>; -def s32_24ImmPred : PatLeaf<(i32 imm), [{ - // s32_24ImmPred predicate - True if the immediate fits in a 32-bit sign - // extended field that is a multiple of 0x1000000. +def s32_0ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isInt<32>(v); +}]>; + +def s31_1ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<31,1>(v); +}]>; + +def s30_2ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<31,1>(v); +}]>; + +def s29_3ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<31,1>(v); +}]>; + +def s22_10ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<22,10>(v); +}]>; + +def s8_24ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); - return isShiftedInt<32,24>(v); + return isShiftedInt<8,24>(v); }]>; -def s32_16s8ImmPred : PatLeaf<(i32 imm), [{ - // s32_16s8ImmPred predicate - True if the immediate fits in a 32-bit sign - // extended field that is a multiple of 0x10000. +def s16_16ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); - return isShiftedInt<24,16>(v); + return isShiftedInt<16,16>(v); }]>; def s26_6ImmPred : PatLeaf<(i32 imm), [{ - // s26_6ImmPred predicate - True if the immediate fits in a 32-bit - // sign extended field. int64_t v = (int64_t)N->getSExtValue(); return isShiftedInt<26,6>(v); }]>; - def s16ImmPred : PatLeaf<(i32 imm), [{ - // s16ImmPred predicate - True if the immediate fits in a 16-bit sign extended - // field. int64_t v = (int64_t)N->getSExtValue(); return isInt<16>(v); }]>; - def s13ImmPred : PatLeaf<(i32 imm), [{ - // s13ImmPred predicate - True if the immediate fits in a 13-bit sign extended - // field. int64_t v = (int64_t)N->getSExtValue(); return isInt<13>(v); }]>; - def s12ImmPred : PatLeaf<(i32 imm), [{ - // s12ImmPred predicate - True if the immediate fits in a 12-bit - // sign extended field. int64_t v = (int64_t)N->getSExtValue(); return isInt<12>(v); }]>; def s11_0ImmPred : PatLeaf<(i32 imm), [{ - // s11_0ImmPred predicate - True if the immediate fits in a 11-bit - // sign extended field. int64_t v = (int64_t)N->getSExtValue(); return isInt<11>(v); }]>; - def s11_1ImmPred : PatLeaf<(i32 imm), [{ - // s11_1ImmPred predicate - True if the immediate fits in a 12-bit - // sign extended field and is a multiple of 2. int64_t v = (int64_t)N->getSExtValue(); return isShiftedInt<11,1>(v); }]>; - def s11_2ImmPred : PatLeaf<(i32 imm), [{ - // s11_2ImmPred predicate - True if the immediate fits in a 13-bit - // sign extended field and is a multiple of 4. int64_t v = (int64_t)N->getSExtValue(); return isShiftedInt<11,2>(v); }]>; - def s11_3ImmPred : PatLeaf<(i32 imm), [{ - // s11_3ImmPred predicate - True if the immediate fits in a 14-bit - // sign extended field and is a multiple of 8. int64_t v = (int64_t)N->getSExtValue(); return isShiftedInt<11,3>(v); }]>; - def s10ImmPred : PatLeaf<(i32 imm), [{ - // s10ImmPred predicate - True if the immediate fits in a 10-bit sign extended - // field. int64_t v = (int64_t)N->getSExtValue(); return isInt<10>(v); }]>; - def s9ImmPred : PatLeaf<(i32 imm), [{ - // s9ImmPred predicate - True if the immediate fits in a 9-bit sign extended - // field. int64_t v = (int64_t)N->getSExtValue(); return isInt<9>(v); }]>; def m9ImmPred : PatLeaf<(i32 imm), [{ - // m9ImmPred predicate - True if the immediate fits in a 9-bit magnitude - // field. The range of m9 is -255 to 255. int64_t v = (int64_t)N->getSExtValue(); return isInt<9>(v) && (v != -256); }]>; def s8ImmPred : PatLeaf<(i32 imm), [{ - // s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended - // field. int64_t v = (int64_t)N->getSExtValue(); return isInt<8>(v); }]>; - def s8Imm64Pred : PatLeaf<(i64 imm), [{ - // s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended - // field. int64_t v = (int64_t)N->getSExtValue(); return isInt<8>(v); }]>; - def s6ImmPred : PatLeaf<(i32 imm), [{ - // s6ImmPred predicate - True if the immediate fits in a 6-bit sign extended - // field. int64_t v = (int64_t)N->getSExtValue(); return isInt<6>(v); }]>; - def s4_0ImmPred : PatLeaf<(i32 imm), [{ - // s4_0ImmPred predicate - True if the immediate fits in a 4-bit sign extended - // field. int64_t v = (int64_t)N->getSExtValue(); return isInt<4>(v); }]>; - def s4_1ImmPred : PatLeaf<(i32 imm), [{ - // s4_1ImmPred predicate - True if the immediate fits in a 4-bit sign extended - // field of 2. int64_t v = (int64_t)N->getSExtValue(); return isShiftedInt<4,1>(v); }]>; - def s4_2ImmPred : PatLeaf<(i32 imm), [{ - // s4_2ImmPred predicate - True if the immediate fits in a 4-bit sign extended - // field that is a multiple of 4. int64_t v = (int64_t)N->getSExtValue(); return isShiftedInt<4,2>(v); }]>; - def s4_3ImmPred : PatLeaf<(i32 imm), [{ - // s4_3ImmPred predicate - True if the immediate fits in a 4-bit sign extended - // field that is a multiple of 8. int64_t v = (int64_t)N->getSExtValue(); return isShiftedInt<4,3>(v); }]>; @@ -233,50 +202,61 @@ def u64ImmPred : PatLeaf<(i64 imm), [{ }]>; def u32ImmPred : PatLeaf<(i32 imm), [{ - // u32ImmPred predicate - True if the immediate fits in a 32-bit field. int64_t v = (int64_t)N->getSExtValue(); return isUInt<32>(v); }]>; +def u32_0ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<32>(v); +}]>; + +def u31_1ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<31,1>(v); +}]>; + +def u30_2ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<30,2>(v); +}]>; + +def u29_3ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<29,3>(v); +}]>; + def u26_6ImmPred : PatLeaf<(i32 imm), [{ - // u26_6ImmPred - True if the immediate fits in a 32-bit field and - // is a multiple of 64. int64_t v = (int64_t)N->getSExtValue(); return isShiftedUInt<26,6>(v); }]>; def u16ImmPred : PatLeaf<(i32 imm), [{ - // u16ImmPred predicate - True if the immediate fits in a 16-bit unsigned - // field. int64_t v = (int64_t)N->getSExtValue(); return isUInt<16>(v); }]>; def u16_s8ImmPred : PatLeaf<(i32 imm), [{ - // u16_s8ImmPred predicate - True if the immediate fits in a 16-bit sign - // extended s8 field. int64_t v = (int64_t)N->getSExtValue(); return isShiftedUInt<16,8>(v); }]>; +def u16_0ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<16>(v); +}]>; + def u11_3ImmPred : PatLeaf<(i32 imm), [{ - // True if the immediate fits in a 14-bit unsigned field, and the lowest - // three bits are 0. int64_t v = (int64_t)N->getSExtValue(); return isShiftedUInt<11,3>(v); }]>; def u9ImmPred : PatLeaf<(i32 imm), [{ - // u9ImmPred predicate - True if the immediate fits in a 9-bit unsigned - // field. int64_t v = (int64_t)N->getSExtValue(); return isUInt<9>(v); }]>; - def u8ImmPred : PatLeaf<(i32 imm), [{ - // u8ImmPred predicate - True if the immediate fits in a 8-bit unsigned - // field. int64_t v = (int64_t)N->getSExtValue(); return isUInt<8>(v); }]>; @@ -288,75 +268,56 @@ def u7StrictPosImmPred : ImmLeaf<i32, [{ }]>; def u7ImmPred : PatLeaf<(i32 imm), [{ - // u7ImmPred predicate - True if the immediate fits in a 7-bit unsigned - // field. int64_t v = (int64_t)N->getSExtValue(); return isUInt<7>(v); }]>; - def u6ImmPred : PatLeaf<(i32 imm), [{ - // u6ImmPred predicate - True if the immediate fits in a 6-bit unsigned - // field. int64_t v = (int64_t)N->getSExtValue(); return isUInt<6>(v); }]>; def u6_0ImmPred : PatLeaf<(i32 imm), [{ - // u6_0ImmPred predicate - True if the immediate fits in a 6-bit unsigned - // field. Same as u6ImmPred. int64_t v = (int64_t)N->getSExtValue(); return isUInt<6>(v); }]>; def u6_1ImmPred : PatLeaf<(i32 imm), [{ - // u6_1ImmPred predicate - True if the immediate fits in a 7-bit unsigned - // field that is 1 bit alinged - multiple of 2. int64_t v = (int64_t)N->getSExtValue(); return isShiftedUInt<6,1>(v); }]>; def u6_2ImmPred : PatLeaf<(i32 imm), [{ - // u6_2ImmPred predicate - True if the immediate fits in a 8-bit unsigned - // field that is 2 bits alinged - multiple of 4. int64_t v = (int64_t)N->getSExtValue(); return isShiftedUInt<6,2>(v); }]>; def u6_3ImmPred : PatLeaf<(i32 imm), [{ - // u6_3ImmPred predicate - True if the immediate fits in a 9-bit unsigned - // field that is 3 bits alinged - multiple of 8. int64_t v = (int64_t)N->getSExtValue(); return isShiftedUInt<6,3>(v); }]>; def u5ImmPred : PatLeaf<(i32 imm), [{ - // u5ImmPred predicate - True if the immediate fits in a 5-bit unsigned - // field. int64_t v = (int64_t)N->getSExtValue(); return isUInt<5>(v); }]>; +def u4ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<4>(v); +}]>; def u3ImmPred : PatLeaf<(i32 imm), [{ - // u3ImmPred predicate - True if the immediate fits in a 3-bit unsigned - // field. int64_t v = (int64_t)N->getSExtValue(); return isUInt<3>(v); }]>; - def u2ImmPred : PatLeaf<(i32 imm), [{ - // u2ImmPred predicate - True if the immediate fits in a 2-bit unsigned - // field. int64_t v = (int64_t)N->getSExtValue(); return isUInt<2>(v); }]>; - def u1ImmPred : PatLeaf<(i1 imm), [{ - // u1ImmPred predicate - True if the immediate fits in a 1-bit unsigned - // field. int64_t v = (int64_t)N->getSExtValue(); return isUInt<1>(v); }]>; @@ -499,356 +460,20 @@ let PrintMethod = "printExtOperand" in { def u6_3Ext : Operand<i32>; } -let PrintMethod = "printImmOperand" in -def u0AlwaysExt : Operand<i32>; - -// Predicates for constant extendable operands -def s16ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (!Subtarget.hasV4TOps()) - // Return true if the immediate can fit in a 16-bit sign extended field. - return isInt<16>(v); - else { - if (isInt<16>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit signed field. - return isConstExtProfitable(Node) && isInt<32>(v); - } -}]>; - -def s10ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (!Subtarget.hasV4TOps()) - // Return true if the immediate can fit in a 10-bit sign extended field. - return isInt<10>(v); - else { - if (isInt<10>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit signed field. - return isConstExtProfitable(Node) && isInt<32>(v); - } -}]>; - -def s9ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (!Subtarget.hasV4TOps()) - // Return true if the immediate can fit in a 9-bit sign extended field. - return isInt<9>(v); - else { - if (isInt<9>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit unsigned field. - return isConstExtProfitable(Node) && isInt<32>(v); - } -}]>; - -def s8ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (!Subtarget.hasV4TOps()) - // Return true if the immediate can fit in a 8-bit sign extended field. - return isInt<8>(v); - else { - if (isInt<8>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit signed field. - return isConstExtProfitable(Node) && isInt<32>(v); - } -}]>; - -def s8_16ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (!Subtarget.hasV4TOps()) - // Return true if the immediate fits in a 8-bit sign extended field. - return isInt<8>(v); - else { - if (isInt<8>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can't fit in a 16-bit signed field. This is required to avoid - // unnecessary constant extenders. - return isConstExtProfitable(Node) && !isInt<16>(v); - } -}]>; - -def s6ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (!Subtarget.hasV4TOps()) - // Return true if the immediate can fit in a 6-bit sign extended field. - return isInt<6>(v); - else { - if (isInt<6>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit unsigned field. - return isConstExtProfitable(Node) && isInt<32>(v); - } -}]>; - -def s6_16ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (!Subtarget.hasV4TOps()) - // Return true if the immediate fits in a 6-bit sign extended field. - return isInt<6>(v); - else { - if (isInt<6>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can't fit in a 16-bit signed field. This is required to avoid - // unnecessary constant extenders. - return isConstExtProfitable(Node) && !isInt<16>(v); - } -}]>; - -def s6_10ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (!Subtarget.hasV4TOps()) - // Return true if the immediate can fit in a 6-bit sign extended field. - return isInt<6>(v); - else { - if (isInt<6>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can't fit in a 10-bit signed field. This is required to avoid - // unnecessary constant extenders. - return isConstExtProfitable(Node) && !isInt<10>(v); - } -}]>; - -def s11_0ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (!Subtarget.hasV4TOps()) - // Return true if the immediate can fit in a 11-bit sign extended field. - return isShiftedInt<11,0>(v); - else { - if (isInt<11>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit signed field. - return isConstExtProfitable(Node) && isInt<32>(v); - } -}]>; - -def s11_1ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (!Subtarget.hasV4TOps()) - // Return true if the immediate can fit in a 12-bit sign extended field and - // is 2 byte aligned. - return isShiftedInt<11,1>(v); - else { - if (isInt<12>(v)) - return isShiftedInt<11,1>(v); - - // Return true if extending this immediate is profitable and the low 1 bit - // is zero (2-byte aligned). - return isConstExtProfitable(Node) && isInt<32>(v) && ((v % 2) == 0); - } -}]>; - -def s11_2ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (!Subtarget.hasV4TOps()) - // Return true if the immediate can fit in a 13-bit sign extended field and - // is 4-byte aligned. - return isShiftedInt<11,2>(v); - else { - if (isInt<13>(v)) - return isShiftedInt<11,2>(v); - - // Return true if extending this immediate is profitable and the low 2-bits - // are zero (4-byte aligned). - return isConstExtProfitable(Node) && isInt<32>(v) && ((v % 4) == 0); - } -}]>; - -def s11_3ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (!Subtarget.hasV4TOps()) - // Return true if the immediate can fit in a 14-bit sign extended field and - // is 8-byte aligned. - return isShiftedInt<11,3>(v); - else { - if (isInt<14>(v)) - return isShiftedInt<11,3>(v); - - // Return true if extending this immediate is profitable and the low 3-bits - // are zero (8-byte aligned). - return isConstExtProfitable(Node) && isInt<32>(v) && ((v % 8) == 0); - } -}]>; - -def u0AlwaysExtPred : PatLeaf<(i32 imm), [{ - // Predicate for an unsigned 32-bit value that always needs to be extended. - if (Subtarget.hasV4TOps()) { - if (isConstExtProfitable(Node)) { - int64_t v = (int64_t)N->getSExtValue(); - return isUInt<32>(v); - } - } - return false; -}]>; - -def u6ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (!Subtarget.hasV4TOps()) - // Return true if the immediate can fit in a 6-bit unsigned field. - return isUInt<6>(v); - else { - if (isUInt<6>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit unsigned field. - return isConstExtProfitable(Node) && isUInt<32>(v); - } -}]>; - -def u7ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (!Subtarget.hasV4TOps()) - // Return true if the immediate can fit in a 7-bit unsigned field. - return isUInt<7>(v); - else { - if (isUInt<7>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit unsigned field. - return isConstExtProfitable(Node) && isUInt<32>(v); - } -}]>; - -def u8ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (!Subtarget.hasV4TOps()) - // Return true if the immediate can fit in a 8-bit unsigned field. - return isUInt<8>(v); - else { - if (isUInt<8>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit unsigned field. - return isConstExtProfitable(Node) && isUInt<32>(v); - } -}]>; - -def u9ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (!Subtarget.hasV4TOps()) - // Return true if the immediate can fit in a 9-bit unsigned field. - return isUInt<9>(v); - else { - if (isUInt<9>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit unsigned field. - return isConstExtProfitable(Node) && isUInt<32>(v); - } -}]>; - -def u6_1ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (!Subtarget.hasV4TOps()) - // Return true if the immediate can fit in a 7-bit unsigned field and - // is 2-byte aligned. - return isShiftedUInt<6,1>(v); - else { - if (isUInt<7>(v)) - return isShiftedUInt<6,1>(v); - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit unsigned field. - return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 2) == 0); - } -}]>; - -def u6_2ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (!Subtarget.hasV4TOps()) - // Return true if the immediate can fit in a 8-bit unsigned field and - // is 4-byte aligned. - return isShiftedUInt<6,2>(v); - else { - if (isUInt<8>(v)) - return isShiftedUInt<6,2>(v); - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit unsigned field. - return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 4) == 0); - } -}]>; - -def u6_3ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (!Subtarget.hasV4TOps()) - // Return true if the immediate can fit in a 9-bit unsigned field and - // is 8-byte aligned. - return isShiftedUInt<6,3>(v); - else { - if (isUInt<9>(v)) - return isShiftedUInt<6,3>(v); - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit unsigned field. - return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 8) == 0); - } -}]>; - // This complex pattern exists only to create a machine instruction operand // of type "frame index". There doesn't seem to be a way to do that directly // in the patterns. def AddrFI : ComplexPattern<i32, 1, "SelectAddrFI", [frameindex], []>; -// Addressing modes. - -def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>; -def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], []>; -def ADDRriS11_0 : ComplexPattern<i32, 2, "SelectADDRriS11_0", [frameindex], []>; -def ADDRriS11_1 : ComplexPattern<i32, 2, "SelectADDRriS11_1", [frameindex], []>; -def ADDRriS11_2 : ComplexPattern<i32, 2, "SelectADDRriS11_2", [frameindex], []>; -def ADDRriS11_3 : ComplexPattern<i32, 2, "SelectADDRriS11_3", [frameindex], []>; -def ADDRriU6_0 : ComplexPattern<i32, 2, "SelectADDRriU6_0", [frameindex], []>; -def ADDRriU6_1 : ComplexPattern<i32, 2, "SelectADDRriU6_1", [frameindex], []>; -def ADDRriU6_2 : ComplexPattern<i32, 2, "SelectADDRriU6_2", [frameindex], []>; +// These complex patterns are not strictly necessary, since global address +// folding will happen during DAG combining. For distinguishing between GA +// and GP, pat frags with HexagonCONST32 and HexagonCONST32_GP can be used. +def AddrGA : ComplexPattern<i32, 1, "SelectAddrGA", [], []>; +def AddrGP : ComplexPattern<i32, 1, "SelectAddrGP", [], []>; // Address operands. -def MEMrr : Operand<i32> { - let PrintMethod = "printMEMrrOperand"; - let MIOperandInfo = (ops IntRegs, IntRegs); -} - -def MEMri : Operand<i32> { - let PrintMethod = "printMEMriOperand"; - let MIOperandInfo = (ops IntRegs, IntRegs); -} - -def MEMri_s11_2 : Operand<i32>, - ComplexPattern<i32, 2, "SelectMEMriS11_2", []> { - let PrintMethod = "printMEMriOperand"; - let MIOperandInfo = (ops IntRegs, s11Imm); -} - -def FrameIndex : Operand<i32> { - let PrintMethod = "printFrameIndexOperand"; - let MIOperandInfo = (ops IntRegs, s11Imm); -} - let PrintMethod = "printGlobalOperand" in { def globaladdress : Operand<i32>; def globaladdressExt : Operand<i32>; @@ -858,7 +483,9 @@ let PrintMethod = "printJumpTable" in def jumptablebase : Operand<i32>; def brtarget : Operand<OtherVT>; -def brtargetExt : Operand<OtherVT>; +def brtargetExt : Operand<OtherVT> { + let PrintMethod = "printExtBrtarget"; +} def calltarget : Operand<i32>; def bblabel : Operand<i32>; diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp index e9b2ef6b3911..503bfdb6b3eb 100644 --- a/lib/Target/Hexagon/HexagonPeephole.cpp +++ b/lib/Target/Hexagon/HexagonPeephole.cpp @@ -112,7 +112,7 @@ INITIALIZE_PASS(HexagonPeephole, "hexagon-peephole", "Hexagon Peephole", bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { QII = static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo()); - QRI = MF.getTarget().getSubtarget<HexagonSubtarget>().getRegisterInfo(); + QRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); MRI = &MF.getRegInfo(); DenseMap<unsigned, unsigned> PeepholeMap; @@ -271,15 +271,8 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { switch (Op) { case Hexagon::C2_mux: case Hexagon::C2_muxii: - case Hexagon::TFR_condset_ii: NewOp = Op; break; - case Hexagon::TFR_condset_ri: - NewOp = Hexagon::TFR_condset_ir; - break; - case Hexagon::TFR_condset_ir: - NewOp = Hexagon::TFR_condset_ri; - break; case Hexagon::C2_muxri: NewOp = Hexagon::C2_muxir; break; diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp index a64c9df9a047..8f255a08f534 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp +++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -30,36 +30,58 @@ #include "llvm/IR/Type.h" #include "llvm/MC/MachineLocation.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; +HexagonRegisterInfo::HexagonRegisterInfo() + : HexagonGenRegisterInfo(Hexagon::R31) {} -HexagonRegisterInfo::HexagonRegisterInfo(HexagonSubtarget &st) - : HexagonGenRegisterInfo(Hexagon::R31), - Subtarget(st) { + +bool HexagonRegisterInfo::isEHReturnCalleeSaveReg(unsigned R) const { + return R == Hexagon::R0 || R == Hexagon::R1 || R == Hexagon::R2 || + R == Hexagon::R3 || R == Hexagon::D0 || R == Hexagon::D1; +} + +bool HexagonRegisterInfo::isCalleeSaveReg(unsigned Reg) const { + return Hexagon::R16 <= Reg && Reg <= Hexagon::R27; } + const MCPhysReg * -HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - static const MCPhysReg CalleeSavedRegsV2[] = { - Hexagon::R24, Hexagon::R25, Hexagon::R26, Hexagon::R27, 0 +HexagonRegisterInfo::getCallerSavedRegs(const MachineFunction *MF) const { + static const MCPhysReg CallerSavedRegsV4[] = { + Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4, + Hexagon::R5, Hexagon::R6, Hexagon::R7, Hexagon::R8, Hexagon::R9, + Hexagon::R10, Hexagon::R11, Hexagon::R12, Hexagon::R13, Hexagon::R14, + Hexagon::R15, 0 }; + + auto &HST = static_cast<const HexagonSubtarget&>(MF->getSubtarget()); + switch (HST.getHexagonArchVersion()) { + case HexagonSubtarget::V4: + case HexagonSubtarget::V5: + return CallerSavedRegsV4; + } + llvm_unreachable( + "Callee saved registers requested for unknown archtecture version"); +} + + +const MCPhysReg * +HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { static const MCPhysReg CalleeSavedRegsV3[] = { Hexagon::R16, Hexagon::R17, Hexagon::R18, Hexagon::R19, Hexagon::R20, Hexagon::R21, Hexagon::R22, Hexagon::R23, Hexagon::R24, Hexagon::R25, Hexagon::R26, Hexagon::R27, 0 }; - switch(Subtarget.getHexagonArchVersion()) { - case HexagonSubtarget::V1: - break; - case HexagonSubtarget::V2: - return CalleeSavedRegsV2; - case HexagonSubtarget::V3: + switch (MF->getSubtarget<HexagonSubtarget>().getHexagonArchVersion()) { case HexagonSubtarget::V4: case HexagonSubtarget::V5: return CalleeSavedRegsV3; @@ -86,212 +108,153 @@ BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF) } -const TargetRegisterClass* const* -HexagonRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { - static const TargetRegisterClass * const CalleeSavedRegClassesV2[] = { - &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass, - &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass, - }; - static const TargetRegisterClass * const CalleeSavedRegClassesV3[] = { - &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass, - &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass, - &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass, - &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass, - &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass, - &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass, - }; - - switch(Subtarget.getHexagonArchVersion()) { - case HexagonSubtarget::V1: - break; - case HexagonSubtarget::V2: - return CalleeSavedRegClassesV2; - case HexagonSubtarget::V3: - case HexagonSubtarget::V4: - case HexagonSubtarget::V5: - return CalleeSavedRegClassesV3; - } - llvm_unreachable("Callee saved register classes requested for unknown " - "architecture version"); -} - void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, unsigned FIOperandNum, + int SPAdj, unsigned FIOp, RegScavenger *RS) const { // // Hexagon_TODO: Do we need to enforce this for Hexagon? assert(SPAdj == 0 && "Unexpected"); MachineInstr &MI = *II; - int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); - // Addressable stack objects are accessed using neg. offsets from %fp. - MachineFunction &MF = *MI.getParent()->getParent(); - const HexagonInstrInfo &TII = - *static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo()); - int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex); + MachineBasicBlock &MB = *MI.getParent(); + MachineFunction &MF = *MB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); + auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget()); + auto &HII = *HST.getInstrInfo(); + auto &HFI = *HST.getFrameLowering(); - unsigned FrameReg = getFrameRegister(MF); - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); - if (!TFI->hasFP(MF)) { + int FI = MI.getOperand(FIOp).getIndex(); + int Offset = MFI.getObjectOffset(FI) + MI.getOperand(FIOp+1).getImm(); + bool HasAlloca = MFI.hasVarSizedObjects(); + bool HasAlign = needsStackRealignment(MF); + + // XXX: Fixed objects cannot be accessed through SP if there are aligned + // objects in the local frame, or if there are dynamically allocated objects. + // In such cases, there has to be FP available. + if (!HFI.hasFP(MF)) { + assert(!HasAlloca && !HasAlign && "This function must have frame pointer"); // We will not reserve space on the stack for the lr and fp registers. - Offset -= 2 * Hexagon_WordSize; + Offset -= 8; } - const unsigned FrameSize = MFI.getStackSize(); + unsigned SP = getStackRegister(), FP = getFrameRegister(); + unsigned AP = 0; + if (MachineInstr *AI = HFI.getAlignaInstr(MF)) + AP = AI->getOperand(0).getReg(); + unsigned FrameSize = MFI.getStackSize(); + + // Special handling of dbg_value instructions and INLINEASM. + if (MI.isDebugValue() || MI.isInlineAsm()) { + MI.getOperand(FIOp).ChangeToRegister(SP, false /*isDef*/); + MI.getOperand(FIOp+1).ChangeToImmediate(Offset+FrameSize); + return; + } - if (!MFI.hasVarSizedObjects() && - TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset)) && - !TII.isSpillPredRegOp(&MI)) { - // Replace frame index with a stack pointer reference. - MI.getOperand(FIOperandNum).ChangeToRegister(getStackRegister(), false, - false, true); - MI.getOperand(FIOperandNum + 1).ChangeToImmediate(FrameSize+Offset); + bool UseFP = false, UseAP = false; // Default: use SP. + if (MFI.isFixedObjectIndex(FI) || MFI.isObjectPreAllocated(FI)) { + UseFP = HasAlloca || HasAlign; } else { - // Replace frame index with a frame pointer reference. - if (!TII.isValidOffset(MI.getOpcode(), Offset)) { - - // If the offset overflows, then correct it. - // - // For loads, we do not need a reserved register - // r0 = memw(r30 + #10000) to: - // - // r0 = add(r30, #10000) - // r0 = memw(r0) - if ( (MI.getOpcode() == Hexagon::L2_loadri_io) || - (MI.getOpcode() == Hexagon::L2_loadrd_io) || - (MI.getOpcode() == Hexagon::L2_loadrh_io) || - (MI.getOpcode() == Hexagon::L2_loadruh_io) || - (MI.getOpcode() == Hexagon::L2_loadrb_io) || - (MI.getOpcode() == Hexagon::L2_loadrub_io) || - (MI.getOpcode() == Hexagon::LDriw_f) || - (MI.getOpcode() == Hexagon::LDrid_f)) { - unsigned dstReg = (MI.getOpcode() == Hexagon::L2_loadrd_io) ? - getSubReg(MI.getOperand(0).getReg(), Hexagon::subreg_loreg) : - MI.getOperand(0).getReg(); - - // Check if offset can fit in addi. - if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) { - BuildMI(*MI.getParent(), II, MI.getDebugLoc(), - TII.get(Hexagon::CONST32_Int_Real), dstReg).addImm(Offset); - BuildMI(*MI.getParent(), II, MI.getDebugLoc(), - TII.get(Hexagon::A2_add), - dstReg).addReg(FrameReg).addReg(dstReg); - } else { - BuildMI(*MI.getParent(), II, MI.getDebugLoc(), - TII.get(Hexagon::ADD_ri), - dstReg).addReg(FrameReg).addImm(Offset); - } - - MI.getOperand(FIOperandNum).ChangeToRegister(dstReg, false, false,true); - MI.getOperand(FIOperandNum+1).ChangeToImmediate(0); - } else if ((MI.getOpcode() == Hexagon::S2_storeri_io) || - (MI.getOpcode() == Hexagon::S2_storerd_io) || - (MI.getOpcode() == Hexagon::S2_storerh_io) || - (MI.getOpcode() == Hexagon::S2_storerb_io) || - (MI.getOpcode() == Hexagon::STrid_f) || - (MI.getOpcode() == Hexagon::STriw_f)) { - // For stores, we need a reserved register. Change - // memw(r30 + #10000) = r0 to: - // - // rs = add(r30, #10000); - // memw(rs) = r0 - unsigned resReg = HEXAGON_RESERVED_REG_1; - - // Check if offset can fit in addi. - if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) { - BuildMI(*MI.getParent(), II, MI.getDebugLoc(), - TII.get(Hexagon::CONST32_Int_Real), resReg).addImm(Offset); - BuildMI(*MI.getParent(), II, MI.getDebugLoc(), - TII.get(Hexagon::A2_add), - resReg).addReg(FrameReg).addReg(resReg); - } else { - BuildMI(*MI.getParent(), II, MI.getDebugLoc(), - TII.get(Hexagon::ADD_ri), - resReg).addReg(FrameReg).addImm(Offset); - } - MI.getOperand(FIOperandNum).ChangeToRegister(resReg, false, false,true); - MI.getOperand(FIOperandNum+1).ChangeToImmediate(0); - } else if (TII.isMemOp(&MI)) { - // use the constant extender if the instruction provides it - // and we are V4TOps. - if (Subtarget.hasV4TOps()) { - if (TII.isConstExtended(&MI)) { - MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false); - MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset); - TII.immediateExtend(&MI); - } else { - llvm_unreachable("Need to implement for memops"); - } - } else { - // Only V3 and older instructions here. - unsigned ResReg = HEXAGON_RESERVED_REG_1; - if (!MFI.hasVarSizedObjects() && - TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset))) { - MI.getOperand(FIOperandNum).ChangeToRegister(getStackRegister(), - false, false, false); - MI.getOperand(FIOperandNum+1).ChangeToImmediate(FrameSize+Offset); - } else if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) { - BuildMI(*MI.getParent(), II, MI.getDebugLoc(), - TII.get(Hexagon::CONST32_Int_Real), ResReg).addImm(Offset); - BuildMI(*MI.getParent(), II, MI.getDebugLoc(), - TII.get(Hexagon::A2_add), ResReg).addReg(FrameReg). - addReg(ResReg); - MI.getOperand(FIOperandNum).ChangeToRegister(ResReg, false, false, - true); - MI.getOperand(FIOperandNum+1).ChangeToImmediate(0); - } else { - BuildMI(*MI.getParent(), II, MI.getDebugLoc(), - TII.get(Hexagon::ADD_ri), ResReg).addReg(FrameReg). - addImm(Offset); - MI.getOperand(FIOperandNum).ChangeToRegister(ResReg, false, false, - true); - MI.getOperand(FIOperandNum+1).ChangeToImmediate(0); - } - } - } else { - unsigned dstReg = MI.getOperand(0).getReg(); - BuildMI(*MI.getParent(), II, MI.getDebugLoc(), - TII.get(Hexagon::CONST32_Int_Real), dstReg).addImm(Offset); - BuildMI(*MI.getParent(), II, MI.getDebugLoc(), - TII.get(Hexagon::A2_add), - dstReg).addReg(FrameReg).addReg(dstReg); - // Can we delete MI??? r2 = add (r2, #0). - MI.getOperand(FIOperandNum).ChangeToRegister(dstReg, false, false,true); - MI.getOperand(FIOperandNum+1).ChangeToImmediate(0); - } - } else { - // If the offset is small enough to fit in the immediate field, directly - // encode it. - MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false); - MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset); + if (HasAlloca) { + if (HasAlign) + UseAP = true; + else + UseFP = true; } } + unsigned Opc = MI.getOpcode(); + bool ValidSP = HII.isValidOffset(Opc, FrameSize+Offset); + bool ValidFP = HII.isValidOffset(Opc, Offset); + + // Calculate the actual offset in the instruction. + int64_t RealOffset = Offset; + if (!UseFP && !UseAP) + RealOffset = FrameSize+Offset; + + switch (Opc) { + case Hexagon::TFR_FIA: + MI.setDesc(HII.get(Hexagon::A2_addi)); + MI.getOperand(FIOp).ChangeToImmediate(RealOffset); + MI.RemoveOperand(FIOp+1); + return; + case Hexagon::TFR_FI: + // Set up the instruction for updating below. + MI.setDesc(HII.get(Hexagon::A2_addi)); + break; + } + + unsigned BP = 0; + bool Valid = false; + if (UseFP) { + BP = FP; + Valid = ValidFP; + } else if (UseAP) { + BP = AP; + Valid = ValidFP; + } else { + BP = SP; + Valid = ValidSP; + } + + if (Valid) { + MI.getOperand(FIOp).ChangeToRegister(BP, false); + MI.getOperand(FIOp+1).ChangeToImmediate(RealOffset); + return; + } + +#ifndef NDEBUG + const Function *F = MF.getFunction(); + dbgs() << "In function "; + if (F) dbgs() << F->getName(); + else dbgs() << "<?>"; + dbgs() << ", BB#" << MB.getNumber() << "\n" << MI; +#endif + llvm_unreachable("Unhandled instruction"); } + unsigned HexagonRegisterInfo::getRARegister() const { return Hexagon::R31; } + unsigned HexagonRegisterInfo::getFrameRegister(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); - if (TFI->hasFP(MF)) { + if (TFI->hasFP(MF)) return Hexagon::R30; - } - return Hexagon::R29; } + unsigned HexagonRegisterInfo::getFrameRegister() const { return Hexagon::R30; } + unsigned HexagonRegisterInfo::getStackRegister() const { return Hexagon::R29; } + +bool +HexagonRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const { + return MF.getSubtarget().getFrameLowering()->hasFP(MF); +} + + +bool +HexagonRegisterInfo::needsStackRealignment(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + return MFI->getMaxAlignment() > 8; +} + + +unsigned HexagonRegisterInfo::getFirstCallerSavedNonParamReg() const { + return Hexagon::R6; +} + + #define GET_REGINFO_TARGET_DESC #include "HexagonGenRegisterInfo.inc" diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h index a83b5026467a..7edefee93993 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.h +++ b/lib/Target/Hexagon/HexagonRegisterInfo.h @@ -37,39 +37,37 @@ #define HEXAGON_RESERVED_REG_2 Hexagon::R11 namespace llvm { - -class HexagonSubtarget; -class HexagonInstrInfo; -class Type; - -struct HexagonRegisterInfo : public HexagonGenRegisterInfo { - HexagonSubtarget &Subtarget; - - HexagonRegisterInfo(HexagonSubtarget &st); +class HexagonRegisterInfo : public HexagonGenRegisterInfo { +public: + HexagonRegisterInfo(); /// Code Generation virtual methods... - const MCPhysReg * - getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override; + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) + const override; - const TargetRegisterClass* const* - getCalleeSavedRegClasses(const MachineFunction *MF = nullptr) const; BitVector getReservedRegs(const MachineFunction &MF) const override; - void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, unsigned FIOperandNum, - RegScavenger *RS = nullptr) const override; + void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, + unsigned FIOperandNum, RegScavenger *RS = nullptr) const override; - /// determineFrameLayout - Determine the size of the frame and maximum call - /// frame size. - void determineFrameLayout(MachineFunction &MF) const; - - /// requiresRegisterScavenging - returns true since we may need scavenging for - /// a temporary register when generating hardware loop instructions. + /// Returns true since we may need scavenging for a temporary register + /// when generating hardware loop instructions. bool requiresRegisterScavenging(const MachineFunction &MF) const override { return true; } + /// Returns true. Spill code for predicate registers might need an extra + /// register. + bool requiresFrameIndexScavenging(const MachineFunction &MF) const override { + return true; + } + + bool needsStackRealignment(const MachineFunction &MF) const override; + + /// Returns true if the frame pointer is valid. + bool useFPForScavengingIndex(const MachineFunction &MF) const override; + bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override { return true; } @@ -79,6 +77,13 @@ struct HexagonRegisterInfo : public HexagonGenRegisterInfo { unsigned getFrameRegister(const MachineFunction &MF) const override; unsigned getFrameRegister() const; unsigned getStackRegister() const; + + const MCPhysReg *getCallerSavedRegs(const MachineFunction *MF) const; + + unsigned getFirstCallerSavedNonParamReg() const; + + bool isEHReturnCalleeSaveReg(unsigned Reg) const; + bool isCalleeSaveReg(unsigned Reg) const; }; } // end namespace llvm diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.td b/lib/Target/Hexagon/HexagonRegisterInfo.td index decd94722da1..edf1c251ac77 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.td +++ b/lib/Target/Hexagon/HexagonRegisterInfo.td @@ -163,18 +163,20 @@ let Namespace = "Hexagon" in { // FIXME: the register order should be defined in terms of the preferred // allocation order... // -def IntRegs : RegisterClass<"Hexagon", [i32,f32], 32, +def IntRegs : RegisterClass<"Hexagon", [i32, f32, v4i8, v2i16], 32, (add (sequence "R%u", 0, 9), (sequence "R%u", 12, 28), R10, R11, R29, R30, R31)> { } -def DoubleRegs : RegisterClass<"Hexagon", [i64,f64], 64, +def DoubleRegs : RegisterClass<"Hexagon", [i64, f64, v8i8, v4i16, v2i32], 64, (add (sequence "D%u", 0, 4), (sequence "D%u", 6, 13), D5, D14, D15)>; -def PredRegs : RegisterClass<"Hexagon", [i1], 32, (add (sequence "P%u", 0, 3))> +def PredRegs : RegisterClass<"Hexagon", + [i1, v2i1, v4i1, v8i1, v4i8, v2i16, i32], 32, + (add (sequence "P%u", 0, 3))> { let Size = 32; } diff --git a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp index 8fdd493a75dc..4efb5f75af62 100644 --- a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp +++ b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp @@ -48,12 +48,9 @@ using namespace llvm; namespace { class HexagonSplitConst32AndConst64 : public MachineFunctionPass { - const HexagonTargetMachine &QTM; - public: static char ID; - HexagonSplitConst32AndConst64(const HexagonTargetMachine &TM) - : MachineFunctionPass(ID), QTM(TM) {} + HexagonSplitConst32AndConst64() : MachineFunctionPass(ID) {} const char *getPassName() const override { return "Hexagon Split Const32s and Const64s"; @@ -68,13 +65,13 @@ char HexagonSplitConst32AndConst64::ID = 0; bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) { const HexagonTargetObjectFile &TLOF = - (const HexagonTargetObjectFile &)QTM.getSubtargetImpl() - ->getTargetLowering() - ->getObjFileLowering(); + *static_cast<const HexagonTargetObjectFile *>( + Fn.getTarget().getObjFileLowering()); if (TLOF.IsSmallDataEnabled()) return true; - const TargetInstrInfo *TII = QTM.getSubtargetImpl()->getInstrInfo(); + const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo(); + const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); // Loop over all of the basic blocks for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end(); @@ -86,7 +83,8 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) { while (MII != MIE) { MachineInstr *MI = MII; int Opc = MI->getOpcode(); - if (Opc == Hexagon::CONST32_set) { + if (Opc == Hexagon::CONST32_Int_Real && + MI->getOperand(1).isBlockAddress()) { int DestReg = MI->getOperand(0).getReg(); MachineOperand &Symbol = MI->getOperand (1); @@ -99,69 +97,53 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) { MII = MBB->erase (MI); continue; } - else if (Opc == Hexagon::CONST32_set_jt) { - int DestReg = MI->getOperand(0).getReg(); - MachineOperand &Symbol = MI->getOperand (1); - BuildMI (*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::LO_jt), DestReg).addOperand(Symbol); - BuildMI (*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::HI_jt), DestReg).addOperand(Symbol); - // MBB->erase returns the iterator to the next instruction, which is the - // one we want to process next - MII = MBB->erase (MI); - continue; - } - else if (Opc == Hexagon::CONST32_Label) { + else if (Opc == Hexagon::CONST32_Int_Real || + Opc == Hexagon::CONST32_Float_Real) { int DestReg = MI->getOperand(0).getReg(); - MachineOperand &Symbol = MI->getOperand (1); - BuildMI (*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::LO_label), DestReg).addOperand(Symbol); - BuildMI (*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::HI_label), DestReg).addOperand(Symbol); - // MBB->erase returns the iterator to the next instruction, which is the - // one we want to process next + // We have to convert an FP immediate into its corresponding integer + // representation + int64_t ImmValue; + if (Opc == Hexagon::CONST32_Float_Real) { + APFloat Val = MI->getOperand(1).getFPImm()->getValueAPF(); + ImmValue = *Val.bitcastToAPInt().getRawData(); + } + else + ImmValue = MI->getOperand(1).getImm(); + + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::A2_tfrsi), DestReg).addImm(ImmValue); MII = MBB->erase (MI); continue; } - else if (Opc == Hexagon::CONST32_Int_Real) { + else if (Opc == Hexagon::CONST64_Int_Real || + Opc == Hexagon::CONST64_Float_Real) { int DestReg = MI->getOperand(0).getReg(); - int64_t ImmValue = MI->getOperand(1).getImm (); - BuildMI (*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::LOi), DestReg).addImm(ImmValue); - BuildMI (*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::HIi), DestReg).addImm(ImmValue); - MII = MBB->erase (MI); - continue; - } - else if (Opc == Hexagon::CONST64_Int_Real) { - int DestReg = MI->getOperand(0).getReg(); - int64_t ImmValue = MI->getOperand(1).getImm (); - unsigned DestLo = QTM.getSubtargetImpl()->getRegisterInfo()->getSubReg( - DestReg, Hexagon::subreg_loreg); - unsigned DestHi = QTM.getSubtargetImpl()->getRegisterInfo()->getSubReg( - DestReg, Hexagon::subreg_hireg); + // We have to convert an FP immediate into its corresponding integer + // representation + int64_t ImmValue; + if (Opc == Hexagon::CONST64_Float_Real) { + APFloat Val = MI->getOperand(1).getFPImm()->getValueAPF(); + ImmValue = *Val.bitcastToAPInt().getRawData(); + } + else + ImmValue = MI->getOperand(1).getImm(); + + unsigned DestLo = TRI->getSubReg(DestReg, Hexagon::subreg_loreg); + unsigned DestHi = TRI->getSubReg(DestReg, Hexagon::subreg_hireg); int32_t LowWord = (ImmValue & 0xFFFFFFFF); int32_t HighWord = (ImmValue >> 32) & 0xFFFFFFFF; - // Lower Registers Lower Half - BuildMI (*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::LOi), DestLo).addImm(LowWord); - // Lower Registers Higher Half + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::A2_tfrsi), DestLo).addImm(LowWord); BuildMI (*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::HIi), DestLo).addImm(LowWord); - // Higher Registers Lower Half - BuildMI (*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::LOi), DestHi).addImm(HighWord); - // Higher Registers Higher Half. - BuildMI (*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::HIi), DestHi).addImm(HighWord); + TII->get(Hexagon::A2_tfrsi), DestHi).addImm(HighWord); MII = MBB->erase (MI); continue; - } + } ++MII; } } @@ -176,6 +158,6 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) { //===----------------------------------------------------------------------===// FunctionPass * -llvm::createHexagonSplitConst32AndConst64(const HexagonTargetMachine &TM) { - return new HexagonSplitConst32AndConst64(TM); +llvm::createHexagonSplitConst32AndConst64() { + return new HexagonSplitConst32AndConst64(); } diff --git a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp deleted file mode 100644 index a304e655f0b1..000000000000 --- a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp +++ /dev/null @@ -1,235 +0,0 @@ -//===-- HexagonSplitTFRCondSets.cpp - split TFR condsets into xfers -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -// -//===----------------------------------------------------------------------===// -// This pass tries to provide opportunities for better optimization of muxes. -// The default code generated for something like: flag = (a == b) ? 1 : 3; -// would be: -// -// {p0 = cmp.eq(r0,r1)} -// {r3 = mux(p0,#1,#3)} -// -// This requires two packets. If we use .new predicated immediate transfers, -// then we can do this in a single packet, e.g.: -// -// {p0 = cmp.eq(r0,r1) -// if (p0.new) r3 = #1 -// if (!p0.new) r3 = #3} -// -// Note that the conditional assignments are not generated in .new form here. -// We assume opptimisically that they will be formed later. -// -//===----------------------------------------------------------------------===// - -#include "Hexagon.h" -#include "HexagonMachineFunctionInfo.h" -#include "HexagonSubtarget.h" -#include "HexagonTargetMachine.h" -#include "llvm/CodeGen/LatencyPriorityQueue.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/ScheduleHazardRecognizer.h" -#include "llvm/CodeGen/SchedulerRegistry.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" - -using namespace llvm; - -#define DEBUG_TYPE "xfer" - -namespace llvm { - void initializeHexagonSplitTFRCondSetsPass(PassRegistry&); -} - - -namespace { - -class HexagonSplitTFRCondSets : public MachineFunctionPass { - const HexagonTargetMachine &QTM; - const HexagonSubtarget &QST; - - public: - static char ID; - HexagonSplitTFRCondSets(const HexagonTargetMachine& TM) : - MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) { - initializeHexagonSplitTFRCondSetsPass(*PassRegistry::getPassRegistry()); - } - - const char *getPassName() const override { - return "Hexagon Split TFRCondSets"; - } - bool runOnMachineFunction(MachineFunction &Fn) override; -}; - - -char HexagonSplitTFRCondSets::ID = 0; - - -bool HexagonSplitTFRCondSets::runOnMachineFunction(MachineFunction &Fn) { - - const TargetInstrInfo *TII = QTM.getSubtargetImpl()->getInstrInfo(); - - // Loop over all of the basic blocks. - for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end(); - MBBb != MBBe; ++MBBb) { - MachineBasicBlock* MBB = MBBb; - // Traverse the basic block. - for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end(); - ++MII) { - MachineInstr *MI = MII; - int Opc1, Opc2; - switch(MI->getOpcode()) { - case Hexagon::TFR_condset_rr_f: - case Hexagon::TFR_condset_rr64_f: { - int DestReg = MI->getOperand(0).getReg(); - int SrcReg1 = MI->getOperand(2).getReg(); - int SrcReg2 = MI->getOperand(3).getReg(); - - if (MI->getOpcode() == Hexagon::TFR_condset_rr_f) { - Opc1 = Hexagon::A2_tfrt; - Opc2 = Hexagon::A2_tfrf; - } - else if (MI->getOpcode() == Hexagon::TFR_condset_rr64_f) { - Opc1 = Hexagon::A2_tfrpt; - Opc2 = Hexagon::A2_tfrpf; - } - - // Minor optimization: do not emit the predicated copy if the source - // and the destination is the same register. - if (DestReg != SrcReg1) { - BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Opc1), - DestReg).addReg(MI->getOperand(1).getReg()).addReg(SrcReg1); - } - if (DestReg != SrcReg2) { - BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Opc2), - DestReg).addReg(MI->getOperand(1).getReg()).addReg(SrcReg2); - } - MII = MBB->erase(MI); - --MII; - break; - } - case Hexagon::TFR_condset_ri: - case Hexagon::TFR_condset_ri_f: { - int DestReg = MI->getOperand(0).getReg(); - int SrcReg1 = MI->getOperand(2).getReg(); - - // Do not emit the predicated copy if the source and the destination - // is the same register. - if (DestReg != SrcReg1) { - BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::A2_tfrt), DestReg). - addReg(MI->getOperand(1).getReg()).addReg(SrcReg1); - } - if (MI->getOpcode() == Hexagon::TFR_condset_ri ) { - BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::C2_cmoveif), DestReg). - addReg(MI->getOperand(1).getReg()). - addImm(MI->getOperand(3).getImm()); - } else if (MI->getOpcode() == Hexagon::TFR_condset_ri_f ) { - BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::TFRI_cNotPt_f), DestReg). - addReg(MI->getOperand(1).getReg()). - addFPImm(MI->getOperand(3).getFPImm()); - } - - MII = MBB->erase(MI); - --MII; - break; - } - case Hexagon::TFR_condset_ir: - case Hexagon::TFR_condset_ir_f: { - int DestReg = MI->getOperand(0).getReg(); - int SrcReg2 = MI->getOperand(3).getReg(); - - if (MI->getOpcode() == Hexagon::TFR_condset_ir ) { - BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::C2_cmoveit), DestReg). - addReg(MI->getOperand(1).getReg()). - addImm(MI->getOperand(2).getImm()); - } else if (MI->getOpcode() == Hexagon::TFR_condset_ir_f ) { - BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::TFRI_cPt_f), DestReg). - addReg(MI->getOperand(1).getReg()). - addFPImm(MI->getOperand(2).getFPImm()); - } - - // Do not emit the predicated copy if the source and - // the destination is the same register. - if (DestReg != SrcReg2) { - BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::A2_tfrf), DestReg). - addReg(MI->getOperand(1).getReg()).addReg(SrcReg2); - } - MII = MBB->erase(MI); - --MII; - break; - } - case Hexagon::TFR_condset_ii: - case Hexagon::TFR_condset_ii_f: { - int DestReg = MI->getOperand(0).getReg(); - int SrcReg1 = MI->getOperand(1).getReg(); - - if (MI->getOpcode() == Hexagon::TFR_condset_ii ) { - int Immed1 = MI->getOperand(2).getImm(); - int Immed2 = MI->getOperand(3).getImm(); - BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::C2_cmoveit), - DestReg).addReg(SrcReg1).addImm(Immed1); - BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::C2_cmoveif), - DestReg).addReg(SrcReg1).addImm(Immed2); - } else if (MI->getOpcode() == Hexagon::TFR_condset_ii_f ) { - BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::TFRI_cPt_f), DestReg). - addReg(SrcReg1). - addFPImm(MI->getOperand(2).getFPImm()); - BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::TFRI_cNotPt_f), DestReg). - addReg(SrcReg1). - addFPImm(MI->getOperand(3).getFPImm()); - } - MII = MBB->erase(MI); - --MII; - break; - } - } - } - } - return true; -} - -} - -//===----------------------------------------------------------------------===// -// Public Constructor Functions -//===----------------------------------------------------------------------===// - -static void initializePassOnce(PassRegistry &Registry) { - const char *Name = "Hexagon Split TFRCondSets"; - PassInfo *PI = new PassInfo(Name, "hexagon-split-tfr", - &HexagonSplitTFRCondSets::ID, nullptr, false, - false); - Registry.registerPass(*PI, true); -} - -void llvm::initializeHexagonSplitTFRCondSetsPass(PassRegistry &Registry) { - CALL_ONCE_INITIALIZATION(initializePassOnce) -} - -FunctionPass* -llvm::createHexagonSplitTFRCondSets(const HexagonTargetMachine &TM) { - return new HexagonSplitTFRCondSets(TM); -} diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp index 657893f32fee..d61cc5418a4a 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -48,18 +48,17 @@ EnableIEEERndNear( cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Generate non-chopped conversion from fp to int.")); +static cl::opt<bool> DisableHexagonMISched("disable-hexagon-misched", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Disable Hexagon MI Scheduling")); + HexagonSubtarget & HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { // If the programmer has not specified a Hexagon version, default to -mv4. if (CPUString.empty()) CPUString = "hexagonv4"; - if (CPUString == "hexagonv2") { - HexagonArchVersion = V2; - } else if (CPUString == "hexagonv3") { - EnableV3 = true; - HexagonArchVersion = V3; - } else if (CPUString == "hexagonv4") { + if (CPUString == "hexagonv4") { HexagonArchVersion = V4; } else if (CPUString == "hexagonv5") { HexagonArchVersion = V5; @@ -73,10 +72,9 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS, const TargetMachine &TM) - : HexagonGenSubtargetInfo(TT, CPU, FS), CPUString(CPU.str()), - DL("e-m:e-p:32:32-i1:32-i64:64-a:0-n32"), - InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM), - TSInfo(DL), FrameLowering() { + : HexagonGenSubtargetInfo(TT, CPU, FS), CPUString(CPU), + InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), + TSInfo(*TM.getDataLayout()), FrameLowering() { // Initialize scheduling itinerary for the specified CPU. InstrItins = getInstrItineraryForCPU(CPUString); @@ -97,3 +95,9 @@ HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS, // Pin the vtable to this file. void HexagonSubtarget::anchor() {} + +bool HexagonSubtarget::enableMachineScheduler() const { + if (DisableHexagonMISched.getNumOccurrences()) + return !DisableHexagonMISched; + return true; +} diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h index 34e327f7c3a1..780567bcd36b 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.h +++ b/lib/Target/Hexagon/HexagonSubtarget.h @@ -39,13 +39,12 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo { public: enum HexagonArchEnum { - V1, V2, V3, V4, V5 + V4, V5 }; HexagonArchEnum HexagonArchVersion; private: std::string CPUString; - const DataLayout DL; // Calculates type size & alignment. HexagonInstrInfo InstrInfo; HexagonTargetLowering TLInfo; HexagonSelectionDAGInfo TSInfo; @@ -74,7 +73,6 @@ public: const HexagonSelectionDAGInfo *getSelectionDAGInfo() const override { return &TSInfo; } - const DataLayout *getDataLayout() const override { return &DL; } HexagonSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS); @@ -83,18 +81,16 @@ public: /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); - bool hasV2TOps () const { return HexagonArchVersion >= V2; } - bool hasV2TOpsOnly () const { return HexagonArchVersion == V2; } - bool hasV3TOps () const { return HexagonArchVersion >= V3; } - bool hasV3TOpsOnly () const { return HexagonArchVersion == V3; } - bool hasV4TOps () const { return HexagonArchVersion >= V4; } - bool hasV4TOpsOnly () const { return HexagonArchVersion == V4; } - bool useMemOps () const { return HexagonArchVersion >= V4 && UseMemOps; } - bool hasV5TOps () const { return HexagonArchVersion >= V5; } - bool hasV5TOpsOnly () const { return HexagonArchVersion == V5; } - bool modeIEEERndNear () const { return ModeIEEERndNear; } - - bool isSubtargetV2() const { return HexagonArchVersion == V2;} + bool useMemOps() const { return UseMemOps; } + bool hasV5TOps() const { return getHexagonArchVersion() >= V5; } + bool hasV5TOpsOnly() const { return getHexagonArchVersion() == V5; } + bool modeIEEERndNear() const { return ModeIEEERndNear; } + bool enableMachineScheduler() const override; + // Always use the TargetLowering default scheduler. + // FIXME: This will use the vliw scheduler which is probably just hurting + // compiler time and will be removed eventually anyway. + bool enableMachineSchedDefaultSched() const override { return false; } + const std::string &getCPUString () const { return CPUString; } // Threshold for small data section diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index 52aff2787bac..06798665cb05 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -17,8 +17,8 @@ #include "HexagonMachineScheduler.h" #include "HexagonTargetObjectFile.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" -#include "llvm/PassManager.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" @@ -27,15 +27,15 @@ using namespace llvm; static cl:: opt<bool> DisableHardwareLoops("disable-hexagon-hwloops", - cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target")); - -static cl::opt<bool> DisableHexagonMISched("disable-hexagon-misched", - cl::Hidden, cl::ZeroOrMore, cl::init(false), - cl::desc("Disable Hexagon MI Scheduling")); + cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target")); static cl::opt<bool> DisableHexagonCFGOpt("disable-hexagon-cfgopt", - cl::Hidden, cl::ZeroOrMore, cl::init(false), - cl::desc("Disable Hexagon CFG Optimization")); + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Disable Hexagon CFG Optimization")); + +static cl::opt<bool> EnableExpandCondsets("hexagon-expand-condsets", + cl::init(true), cl::Hidden, cl::ZeroOrMore, + cl::desc("Early expansion of MUX")); /// HexagonTargetMachineModule - Note that this is used on hosts that @@ -59,6 +59,10 @@ static MachineSchedRegistry SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler", createVLIWMachineSched); +namespace llvm { + FunctionPass *createHexagonExpandCondsets(); +} + /// HexagonTargetMachine ctor - Create an ILP32 architecture model. /// @@ -69,7 +73,8 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + : LLVMTargetMachine(T, "e-m:e-p:32:32-i1:32-i64:64-a:0-n32", TT, CPU, FS, + Options, RM, CM, OL), TLOF(make_unique<HexagonTargetObjectFile>()), Subtarget(TT, CPU, FS, *this) { initAsmInfo(); @@ -83,14 +88,13 @@ class HexagonPassConfig : public TargetPassConfig { public: HexagonPassConfig(HexagonTargetMachine *TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) { - // FIXME: Rather than calling enablePass(&MachineSchedulerID) below, define - // HexagonSubtarget::enableMachineScheduler() { return true; }. - // That will bypass the SelectionDAG VLIW scheduler, which is probably just - // hurting compile time and will be removed eventually anyway. - if (DisableHexagonMISched) - disablePass(&MachineSchedulerID); - else - enablePass(&MachineSchedulerID); + bool NoOpt = (TM->getOptLevel() == CodeGenOpt::None); + if (!NoOpt) { + if (EnableExpandCondsets) { + Pass *Exp = createHexagonExpandCondsets(); + insertPass(&RegisterCoalescerID, IdentifyingPassPtr(Exp)); + } + } } HexagonTargetMachine &getHexagonTargetMachine() const { @@ -138,33 +142,26 @@ void HexagonPassConfig::addPreRegAlloc() { } void HexagonPassConfig::addPostRegAlloc() { - const HexagonTargetMachine &TM = getHexagonTargetMachine(); if (getOptLevel() != CodeGenOpt::None) if (!DisableHexagonCFGOpt) - addPass(createHexagonCFGOptimizer(TM), false); + addPass(createHexagonCFGOptimizer(), false); } void HexagonPassConfig::addPreSched2() { - const HexagonTargetMachine &TM = getHexagonTargetMachine(); - addPass(createHexagonCopyToCombine(), false); if (getOptLevel() != CodeGenOpt::None) addPass(&IfConverterID, false); - addPass(createHexagonSplitConst32AndConst64(TM)); + addPass(createHexagonSplitConst32AndConst64()); } void HexagonPassConfig::addPreEmitPass() { - const HexagonTargetMachine &TM = getHexagonTargetMachine(); bool NoOpt = (getOptLevel() == CodeGenOpt::None); if (!NoOpt) addPass(createHexagonNewValueJump(), false); // Expand Spill code for predicate registers. - addPass(createHexagonExpandPredSpillCode(TM), false); - - // Split up TFRcondsets into conditional transfers. - addPass(createHexagonSplitTFRCondSets(TM), false); + addPass(createHexagonExpandPredSpillCode(), false); // Create Packets. if (!NoOpt) { diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h index 4a9f44732a6b..5774f7e195b0 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.h +++ b/lib/Target/Hexagon/HexagonTargetMachine.h @@ -32,8 +32,7 @@ public: Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); ~HexagonTargetMachine() override; - - const HexagonSubtarget *getSubtargetImpl() const override { + const HexagonSubtarget *getSubtargetImpl(const Function &) const override { return &Subtarget; } static unsigned getModuleMatchQuality(const Module &M); diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp index f4ab5e2b5c42..4ea0e0d11998 100644 --- a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp +++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp @@ -33,14 +33,10 @@ void HexagonTargetObjectFile::Initialize(MCContext &Ctx, TargetLoweringObjectFileELF::Initialize(Ctx, TM); InitializeELF(TM.Options.UseInitArray); - SmallDataSection = - getContext().getELFSection(".sdata", ELF::SHT_PROGBITS, - ELF::SHF_WRITE | ELF::SHF_ALLOC, - SectionKind::getDataRel()); - SmallBSSSection = - getContext().getELFSection(".sbss", ELF::SHT_NOBITS, - ELF::SHF_WRITE | ELF::SHF_ALLOC, - SectionKind::getBSS()); + SmallDataSection = getContext().getELFSection( + ".sdata", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC); + SmallBSSSection = getContext().getELFSection(".sbss", ELF::SHT_NOBITS, + ELF::SHF_WRITE | ELF::SHF_ALLOC); } // sdata/sbss support taken largely from the MIPS Backend. @@ -79,14 +75,13 @@ IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM, if (Kind.isBSS() || Kind.isDataNoRel() || Kind.isCommon()) { Type *Ty = GV->getType()->getElementType(); - return IsInSmallSection( - TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(Ty)); + return IsInSmallSection(TM.getDataLayout()->getTypeAllocSize(Ty)); } return false; } -const MCSection * +MCSection * HexagonTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler &Mang, const TargetMachine &TM) const { diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.h b/lib/Target/Hexagon/HexagonTargetObjectFile.h index c97420427240..da0eeeb3fd28 100644 --- a/lib/Target/Hexagon/HexagonTargetObjectFile.h +++ b/lib/Target/Hexagon/HexagonTargetObjectFile.h @@ -16,8 +16,9 @@ namespace llvm { class HexagonTargetObjectFile : public TargetLoweringObjectFileELF { - const MCSectionELF *SmallDataSection; - const MCSectionELF *SmallBSSSection; + MCSectionELF *SmallDataSection; + MCSectionELF *SmallBSSSection; + public: void Initialize(MCContext &Ctx, const TargetMachine &TM) override; @@ -30,9 +31,9 @@ namespace llvm { const TargetMachine &TM) const; bool IsSmallDataEnabled () const; - const MCSection *SelectSectionForGlobal(const GlobalValue *GV, - SectionKind Kind, Mangler &Mang, - const TargetMachine &TM) const override; + MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler &Mang, + const TargetMachine &TM) const override; }; } // namespace llvm diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index c9605278e045..0cc59bcc7671 100644 --- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -264,8 +264,7 @@ bool HexagonPacketizer::runOnMachineFunction(MachineFunction &Fn) { static bool IsIndirectCall(MachineInstr* MI) { - return ((MI->getOpcode() == Hexagon::J2_callr) || - (MI->getOpcode() == Hexagon::CALLRv3)); + return MI->getOpcode() == Hexagon::J2_callr; } // Reserve resources for constant extender. Trigure an assertion if @@ -371,7 +370,7 @@ static bool IsDirectJump(MachineInstr* MI) { static bool IsSchedBarrier(MachineInstr* MI) { switch (MI->getOpcode()) { - case Hexagon::BARRIER: + case Hexagon::Y2_barrier: return true; } return false; @@ -390,7 +389,9 @@ static bool IsLoopN(MachineInstr *MI) { /// callee-saved register. static bool DoesModifyCalleeSavedReg(MachineInstr *MI, const TargetRegisterInfo *TRI) { - for (const MCPhysReg *CSR = TRI->getCalleeSavedRegs(); *CSR; ++CSR) { + for (const MCPhysReg *CSR = + TRI->getCalleeSavedRegs(MI->getParent()->getParent()); + *CSR; ++CSR) { unsigned CalleeSavedReg = *CSR; if (MI->modifiesRegister(CalleeSavedReg, TRI)) return true; @@ -402,10 +403,7 @@ static bool DoesModifyCalleeSavedReg(MachineInstr *MI, // or new-value store. bool HexagonPacketizerList::isNewifiable(MachineInstr* MI) { const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - if ( isCondInst(MI) || QII->mayBeNewStore(MI)) - return true; - else - return false; + return isCondInst(MI) || QII->mayBeNewStore(MI); } bool HexagonPacketizerList::isCondInst (MachineInstr* MI) { @@ -721,10 +719,7 @@ bool HexagonPacketizerList::CanPromoteToNewValue( MachineBasicBlock::iterator &MII) { const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - const HexagonRegisterInfo *QRI = - (const HexagonRegisterInfo *)MF.getSubtarget().getRegisterInfo(); - if (!QRI->Subtarget.hasV4TOps() || - !QII->mayBeNewStore(MI)) + if (!QII->mayBeNewStore(MI)) return false; MachineInstr *PacketMI = PacketSU->getInstr(); @@ -955,6 +950,9 @@ bool HexagonPacketizerList::ignorePseudoInstruction(MachineInstr *MI, if (MI->isDebugValue()) return true; + if (MI->isCFIInstruction()) + return false; + // We must print out inline assembly if (MI->isInlineAsm()) return false; @@ -972,11 +970,10 @@ bool HexagonPacketizerList::ignorePseudoInstruction(MachineInstr *MI, // isSoloInstruction: - Returns true for instructions that must be // scheduled in their own packet. bool HexagonPacketizerList::isSoloInstruction(MachineInstr *MI) { - - if (MI->isInlineAsm()) + if (MI->isEHLabel() || MI->isCFIInstruction()) return true; - if (MI->isEHLabel()) + if (MI->isInlineAsm()) return true; // From Hexagon V4 Programmer's Reference Manual 3.4.4 Grouping constraints: @@ -1055,84 +1052,82 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { // first store is not in SLOT0. New value store, new value jump, // dealloc_return and memop always take SLOT0. // Arch spec 3.4.4.2 - if (QRI->Subtarget.hasV4TOps()) { - if (MCIDI.mayStore() && MCIDJ.mayStore() && - (QII->isNewValueInst(J) || QII->isMemOp(J) || QII->isMemOp(I))) { - Dependence = true; - return false; - } + if (MCIDI.mayStore() && MCIDJ.mayStore() && + (QII->isNewValueInst(J) || QII->isMemOp(J) || QII->isMemOp(I))) { + Dependence = true; + return false; + } - if ((QII->isMemOp(J) && MCIDI.mayStore()) - || (MCIDJ.mayStore() && QII->isMemOp(I)) - || (QII->isMemOp(J) && QII->isMemOp(I))) { - Dependence = true; - return false; - } + if ((QII->isMemOp(J) && MCIDI.mayStore()) + || (MCIDJ.mayStore() && QII->isMemOp(I)) + || (QII->isMemOp(J) && QII->isMemOp(I))) { + Dependence = true; + return false; + } - //if dealloc_return - if (MCIDJ.mayStore() && QII->isDeallocRet(I)) { - Dependence = true; - return false; - } + //if dealloc_return + if (MCIDJ.mayStore() && QII->isDeallocRet(I)) { + Dependence = true; + return false; + } - // If an instruction feeds new value jump, glue it. - MachineBasicBlock::iterator NextMII = I; - ++NextMII; - if (NextMII != I->getParent()->end() && QII->isNewValueJump(NextMII)) { - MachineInstr *NextMI = NextMII; + // If an instruction feeds new value jump, glue it. + MachineBasicBlock::iterator NextMII = I; + ++NextMII; + if (NextMII != I->getParent()->end() && QII->isNewValueJump(NextMII)) { + MachineInstr *NextMI = NextMII; - bool secondRegMatch = false; - bool maintainNewValueJump = false; + bool secondRegMatch = false; + bool maintainNewValueJump = false; - if (NextMI->getOperand(1).isReg() && - I->getOperand(0).getReg() == NextMI->getOperand(1).getReg()) { - secondRegMatch = true; - maintainNewValueJump = true; - } + if (NextMI->getOperand(1).isReg() && + I->getOperand(0).getReg() == NextMI->getOperand(1).getReg()) { + secondRegMatch = true; + maintainNewValueJump = true; + } - if (!secondRegMatch && - I->getOperand(0).getReg() == NextMI->getOperand(0).getReg()) { - maintainNewValueJump = true; - } + if (!secondRegMatch && + I->getOperand(0).getReg() == NextMI->getOperand(0).getReg()) { + maintainNewValueJump = true; + } - for (std::vector<MachineInstr*>::iterator - VI = CurrentPacketMIs.begin(), - VE = CurrentPacketMIs.end(); - (VI != VE && maintainNewValueJump); ++VI) { - SUnit *PacketSU = MIToSUnit.find(*VI)->second; + for (std::vector<MachineInstr*>::iterator + VI = CurrentPacketMIs.begin(), + VE = CurrentPacketMIs.end(); + (VI != VE && maintainNewValueJump); ++VI) { + SUnit *PacketSU = MIToSUnit.find(*VI)->second; - // NVJ can not be part of the dual jump - Arch Spec: section 7.8 - if (PacketSU->getInstr()->getDesc().isCall()) { - Dependence = true; - break; - } - // Validate - // 1. Packet does not have a store in it. - // 2. If the first operand of the nvj is newified, and the second - // operand is also a reg, it (second reg) is not defined in - // the same packet. - // 3. If the second operand of the nvj is newified, (which means - // first operand is also a reg), first reg is not defined in - // the same packet. - if (PacketSU->getInstr()->getDesc().mayStore() || - PacketSU->getInstr()->getOpcode() == Hexagon::S2_allocframe || - // Check #2. - (!secondRegMatch && NextMI->getOperand(1).isReg() && - PacketSU->getInstr()->modifiesRegister( - NextMI->getOperand(1).getReg(), QRI)) || - // Check #3. - (secondRegMatch && - PacketSU->getInstr()->modifiesRegister( - NextMI->getOperand(0).getReg(), QRI))) { - Dependence = true; - break; - } + // NVJ can not be part of the dual jump - Arch Spec: section 7.8 + if (PacketSU->getInstr()->getDesc().isCall()) { + Dependence = true; + break; + } + // Validate + // 1. Packet does not have a store in it. + // 2. If the first operand of the nvj is newified, and the second + // operand is also a reg, it (second reg) is not defined in + // the same packet. + // 3. If the second operand of the nvj is newified, (which means + // first operand is also a reg), first reg is not defined in + // the same packet. + if (PacketSU->getInstr()->getDesc().mayStore() || + PacketSU->getInstr()->getOpcode() == Hexagon::S2_allocframe || + // Check #2. + (!secondRegMatch && NextMI->getOperand(1).isReg() && + PacketSU->getInstr()->modifiesRegister( + NextMI->getOperand(1).getReg(), QRI)) || + // Check #3. + (secondRegMatch && + PacketSU->getInstr()->modifiesRegister( + NextMI->getOperand(0).getReg(), QRI))) { + Dependence = true; + break; } - if (!Dependence) - GlueToNewValueJump = true; - else - return false; } + if (!Dependence) + GlueToNewValueJump = true; + else + return false; } if (SUJ->isSucc(SUI)) { @@ -1254,9 +1249,7 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { else if ((DepType == SDep::Order) && !I->hasOrderedMemoryRef() && !J->hasOrderedMemoryRef()) { - if (QRI->Subtarget.hasV4TOps() && - // hexagonv4 allows dual store. - MCIDI.mayStore() && MCIDJ.mayStore()) { + if (MCIDI.mayStore() && MCIDJ.mayStore()) { /* do nothing */ } // store followed by store-- not OK on V2 @@ -1278,7 +1271,6 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { // packetized in a same packet. This implies that the store is using // caller's SP. Hence, offset needs to be updated accordingly. else if (DepType == SDep::Data - && QRI->Subtarget.hasV4TOps() && J->getOpcode() == Hexagon::S2_allocframe && (I->getOpcode() == Hexagon::S2_storerd_io || I->getOpcode() == Hexagon::S2_storeri_io diff --git a/lib/Target/Hexagon/HexagonVarargsCallingConvention.h b/lib/Target/Hexagon/HexagonVarargsCallingConvention.h deleted file mode 100644 index edbe29a5344a..000000000000 --- a/lib/Target/Hexagon/HexagonVarargsCallingConvention.h +++ /dev/null @@ -1,149 +0,0 @@ -//===-- HexagonVarargsCallingConvention.h - Calling Conventions -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the functions that assign locations to outgoing function -// arguments. Adapted from the target independent version but this handles -// calls to varargs functions -// -//===----------------------------------------------------------------------===// -// - - - - -static bool RetCC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT, - EVT LocVT, CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, - Hexagon_CCState &State, - int NonVarArgsParams, - int CurrentParam, - bool ForceMem); - - -static bool CC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT, - EVT LocVT, CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, - Hexagon_CCState &State, - int NonVarArgsParams, - int CurrentParam, - bool ForceMem) { - unsigned ByValSize = 0; - if (ArgFlags.isByVal() && - ((ByValSize = ArgFlags.getByValSize()) > - (MVT(MVT::i64).getSizeInBits() / 8))) { - ForceMem = true; - } - - - // Only assign registers for named (non-varargs) arguments - if ( !ForceMem && ((NonVarArgsParams == -1) || (CurrentParam <= - NonVarArgsParams))) { - - if (LocVT == MVT::i32 || - LocVT == MVT::i16 || - LocVT == MVT::i8 || - LocVT == MVT::f32) { - static const unsigned RegList1[] = { - Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4, - Hexagon::R5 - }; - if (unsigned Reg = State.AllocateReg(RegList1, 6)) { - State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg, - LocVT.getSimpleVT(), LocInfo)); - return false; - } - } - - if (LocVT == MVT::i64 || - LocVT == MVT::f64) { - static const unsigned RegList2[] = { - Hexagon::D0, Hexagon::D1, Hexagon::D2 - }; - if (unsigned Reg = State.AllocateReg(RegList2, 3)) { - State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg, - LocVT.getSimpleVT(), LocInfo)); - return false; - } - } - } - - const Type* ArgTy = LocVT.getTypeForEVT(State.getContext()); - unsigned Alignment = State.getTarget() - .getSubtargetImpl() - ->getDataLayout() - ->getABITypeAlignment(ArgTy); - unsigned Size = - State.getTarget().getSubtargetImpl()->getDataLayout()->getTypeSizeInBits( - ArgTy) / - 8; - - // If it's passed by value, then we need the size of the aggregate not of - // the pointer. - if (ArgFlags.isByVal()) { - Size = ByValSize; - - // Hexagon_TODO: Get the alignment of the contained type here. - Alignment = 8; - } - - unsigned Offset3 = State.AllocateStack(Size, Alignment); - State.addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset3, - LocVT.getSimpleVT(), LocInfo)); - return false; -} - - -static bool RetCC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT, - EVT LocVT, CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, - Hexagon_CCState &State, - int NonVarArgsParams, - int CurrentParam, - bool ForceMem) { - - if (LocVT == MVT::i32 || - LocVT == MVT::f32) { - static const unsigned RegList1[] = { - Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4, - Hexagon::R5 - }; - if (unsigned Reg = State.AllocateReg(RegList1, 6)) { - State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg, - LocVT.getSimpleVT(), LocInfo)); - return false; - } - } - - if (LocVT == MVT::i64 || - LocVT == MVT::f64) { - static const unsigned RegList2[] = { - Hexagon::D0, Hexagon::D1, Hexagon::D2 - }; - if (unsigned Reg = State.AllocateReg(RegList2, 3)) { - State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg, - LocVT.getSimpleVT(), LocInfo)); - return false; - } - } - - const Type* ArgTy = LocVT.getTypeForEVT(State.getContext()); - unsigned Alignment = State.getTarget() - .getSubtargetImpl() - ->getDataLayout() - ->getABITypeAlignment(ArgTy); - unsigned Size = - State.getTarget().getSubtargetImpl()->getDataLayout()->getTypeSizeInBits( - ArgTy) / - 8; - - unsigned Offset3 = State.AllocateStack(Size, Alignment); - State.addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset3, - LocVT.getSimpleVT(), LocInfo)); - return false; -} diff --git a/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt index 2a6124ee0c5a..4c987ed32a64 100644 --- a/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt @@ -4,7 +4,7 @@ add_llvm_library(LLVMHexagonDesc HexagonInstPrinter.cpp HexagonMCAsmInfo.cpp HexagonMCCodeEmitter.cpp - HexagonMCInst.cpp + HexagonMCInstrInfo.cpp HexagonMCTargetDesc.cpp ) diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp index bdccf880d65f..155aa9ef9557 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp @@ -57,7 +57,7 @@ public: ELFHexagonAsmBackend(Target const &T, uint8_t OSABI) : HexagonAsmBackend(T), OSABI(OSABI) {} - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { StringRef CPU("HexagonV4"); return createHexagonELFObjectWriter(OS, OSABI, CPU); } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h index 8e02f799d7e4..6a72f205e9d3 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h @@ -70,7 +70,7 @@ namespace HexagonII { PostInc = 6 // Post increment addressing mode }; - enum MemAccessSize { + enum class MemAccessSize { NoMemAccess = 0, // Not a memory acces instruction. ByteAccess = 1, // Byte access instruction (memb). HalfWordAccess = 2, // Half word access instruction (memh). diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp index 56c9dc712a6e..fde935b2758b 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp @@ -11,6 +11,7 @@ #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #define DEBUG_TYPE "hexagon-elf-writer" @@ -26,8 +27,8 @@ private: public: HexagonELFObjectWriter(uint8_t OSABI, StringRef C); - virtual unsigned GetRelocType(MCValue const &Target, MCFixup const &Fixup, - bool IsPCRel) const override; + unsigned GetRelocType(MCValue const &Target, MCFixup const &Fixup, + bool IsPCRel) const override; }; } @@ -54,9 +55,9 @@ unsigned HexagonELFObjectWriter::GetRelocType(MCValue const &/*Target*/, return Type; } -MCObjectWriter *llvm::createHexagonELFObjectWriter(raw_ostream &OS, +MCObjectWriter *llvm::createHexagonELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI, StringRef CPU) { MCELFObjectTargetWriter *MOTW = new HexagonELFObjectWriter(OSABI, CPU); return createELFObjectWriter(MOTW, OS, /*IsLittleEndian*/ true); -}
\ No newline at end of file +} diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h b/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h new file mode 100644 index 000000000000..4bbfbec883c4 --- /dev/null +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h @@ -0,0 +1,137 @@ +//===-- HexagonFixupKinds.h - Hexagon Specific Fixup Entries --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_HEXAGON_HEXAGONFIXUPKINDS_H +#define LLVM_HEXAGON_HEXAGONFIXUPKINDS_H + +#include "llvm/MC/MCFixup.h" + +namespace llvm { +namespace Hexagon { +enum Fixups { + // Branch fixups for R_HEX_B{22,15,7}_PCREL. + fixup_Hexagon_B22_PCREL = FirstTargetFixupKind, + fixup_Hexagon_B15_PCREL, + fixup_Hexagon_B7_PCREL, + fixup_Hexagon_LO16, + fixup_Hexagon_HI16, + fixup_Hexagon_32, + fixup_Hexagon_16, + fixup_Hexagon_8, + fixup_Hexagon_GPREL16_0, + fixup_Hexagon_GPREL16_1, + fixup_Hexagon_GPREL16_2, + fixup_Hexagon_GPREL16_3, + fixup_Hexagon_HL16, + fixup_Hexagon_B13_PCREL, + fixup_Hexagon_B9_PCREL, + fixup_Hexagon_B32_PCREL_X, + fixup_Hexagon_32_6_X, + fixup_Hexagon_B22_PCREL_X, + fixup_Hexagon_B15_PCREL_X, + fixup_Hexagon_B13_PCREL_X, + fixup_Hexagon_B9_PCREL_X, + fixup_Hexagon_B7_PCREL_X, + fixup_Hexagon_16_X, + fixup_Hexagon_12_X, + fixup_Hexagon_11_X, + fixup_Hexagon_10_X, + fixup_Hexagon_9_X, + fixup_Hexagon_8_X, + fixup_Hexagon_7_X, + fixup_Hexagon_6_X, + fixup_Hexagon_32_PCREL, + fixup_Hexagon_COPY, + fixup_Hexagon_GLOB_DAT, + fixup_Hexagon_JMP_SLOT, + fixup_Hexagon_RELATIVE, + fixup_Hexagon_PLT_B22_PCREL, + fixup_Hexagon_GOTREL_LO16, + fixup_Hexagon_GOTREL_HI16, + fixup_Hexagon_GOTREL_32, + fixup_Hexagon_GOT_LO16, + fixup_Hexagon_GOT_HI16, + fixup_Hexagon_GOT_32, + fixup_Hexagon_GOT_16, + fixup_Hexagon_DTPMOD_32, + fixup_Hexagon_DTPREL_LO16, + fixup_Hexagon_DTPREL_HI16, + fixup_Hexagon_DTPREL_32, + fixup_Hexagon_DTPREL_16, + fixup_Hexagon_GD_PLT_B22_PCREL, + fixup_Hexagon_LD_PLT_B22_PCREL, + fixup_Hexagon_GD_GOT_LO16, + fixup_Hexagon_GD_GOT_HI16, + fixup_Hexagon_GD_GOT_32, + fixup_Hexagon_GD_GOT_16, + fixup_Hexagon_LD_GOT_LO16, + fixup_Hexagon_LD_GOT_HI16, + fixup_Hexagon_LD_GOT_32, + fixup_Hexagon_LD_GOT_16, + fixup_Hexagon_IE_LO16, + fixup_Hexagon_IE_HI16, + fixup_Hexagon_IE_32, + fixup_Hexagon_IE_16, + fixup_Hexagon_IE_GOT_LO16, + fixup_Hexagon_IE_GOT_HI16, + fixup_Hexagon_IE_GOT_32, + fixup_Hexagon_IE_GOT_16, + fixup_Hexagon_TPREL_LO16, + fixup_Hexagon_TPREL_HI16, + fixup_Hexagon_TPREL_32, + fixup_Hexagon_TPREL_16, + fixup_Hexagon_6_PCREL_X, + fixup_Hexagon_GOTREL_32_6_X, + fixup_Hexagon_GOTREL_16_X, + fixup_Hexagon_GOTREL_11_X, + fixup_Hexagon_GOT_32_6_X, + fixup_Hexagon_GOT_16_X, + fixup_Hexagon_GOT_11_X, + fixup_Hexagon_DTPREL_32_6_X, + fixup_Hexagon_DTPREL_16_X, + fixup_Hexagon_DTPREL_11_X, + fixup_Hexagon_GD_GOT_32_6_X, + fixup_Hexagon_GD_GOT_16_X, + fixup_Hexagon_GD_GOT_11_X, + fixup_Hexagon_LD_GOT_32_6_X, + fixup_Hexagon_LD_GOT_16_X, + fixup_Hexagon_LD_GOT_11_X, + fixup_Hexagon_IE_32_6_X, + fixup_Hexagon_IE_16_X, + fixup_Hexagon_IE_GOT_32_6_X, + fixup_Hexagon_IE_GOT_16_X, + fixup_Hexagon_IE_GOT_11_X, + fixup_Hexagon_TPREL_32_6_X, + fixup_Hexagon_TPREL_16_X, + fixup_Hexagon_TPREL_11_X, + + LastTargetFixupKind, + NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind +}; +enum FixupBitmaps : unsigned { + Word8 = 0xff, + Word16 = 0xffff, + Word32 = 0xffffffff, + Word32_LO = 0x00c03fff, + Word32_HL = 0x0, // Not Implemented + Word32_GP = 0x0, // Not Implemented + Word32_B7 = 0x00001f18, + Word32_B9 = 0x003000fe, + Word32_B13 = 0x00202ffe, + Word32_B15 = 0x00df20fe, + Word32_B22 = 0x01ff3ffe, + Word32_R6 = 0x000007e0, + Word32_U6 = 0x0, // Not Implemented + Word32_U16 = 0x0, // Not Implemented + Word32_X26 = 0x0fff3fff +}; +} // namespace Hexagon +} // namespace llvm + +#endif // LLVM_HEXAGON_HEXAGONFIXUPKINDS_H diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp index 5c6f0363f78a..15cda717cf1c 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp @@ -14,7 +14,7 @@ #include "HexagonAsmPrinter.h" #include "Hexagon.h" #include "HexagonInstPrinter.h" -#include "MCTargetDesc/HexagonMCInst.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" #include "llvm/ADT/StringExtras.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" @@ -73,50 +73,46 @@ StringRef HexagonInstPrinter::getOpcodeName(unsigned Opcode) const { return MII.getName(Opcode); } -StringRef HexagonInstPrinter::getRegName(unsigned RegNo) const { - return getRegisterName(RegNo); +void HexagonInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { + OS << getRegisterName(RegNo); } -void HexagonInstPrinter::printInst(const MCInst *MI, raw_ostream &O, - StringRef Annot) { - printInst((const HexagonMCInst*)(MI), O, Annot); -} - -void HexagonInstPrinter::printInst(const HexagonMCInst *MI, raw_ostream &O, - StringRef Annot) { +void HexagonInstPrinter::printInst(MCInst const *MI, raw_ostream &O, + StringRef Annot, + const MCSubtargetInfo &STI) { const char startPacket = '{', endPacket = '}'; // TODO: add outer HW loop when it's supported too. if (MI->getOpcode() == Hexagon::ENDLOOP0) { // Ending a harware loop is different from ending an regular packet. - assert(MI->isPacketEnd() && "Loop-end must also end the packet"); + assert(HexagonMCInstrInfo::isPacketEnd(*MI) && "Loop-end must also end the packet"); - if (MI->isPacketBegin()) { + if (HexagonMCInstrInfo::isPacketBegin(*MI)) { // There must be a packet to end a loop. // FIXME: when shuffling is always run, this shouldn't be needed. - HexagonMCInst Nop; + MCInst Nop; StringRef NoAnnot; Nop.setOpcode (Hexagon::A2_nop); - Nop.setPacketBegin (MI->isPacketBegin()); - printInst (&Nop, O, NoAnnot); + HexagonMCInstrInfo::setPacketBegin (Nop, HexagonMCInstrInfo::isPacketBegin(*MI)); + printInst (&Nop, O, NoAnnot, STI); } // Close the packet. - if (MI->isPacketEnd()) + if (HexagonMCInstrInfo::isPacketEnd(*MI)) O << PacketPadding << endPacket; printInstruction(MI, O); } else { // Prefix the insn opening the packet. - if (MI->isPacketBegin()) + if (HexagonMCInstrInfo::isPacketBegin(*MI)) O << PacketPadding << startPacket << '\n'; printInstruction(MI, O); // Suffix the insn closing the packet. - if (MI->isPacketEnd()) + if (HexagonMCInstrInfo::isPacketEnd(*MI)) // Suffix the packet in a new line always, since the GNU assembler has // issues with a closing brace on the same line as CONST{32,64}. O << '\n' << PacketPadding << endPacket; @@ -130,7 +126,7 @@ void HexagonInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, const MCOperand& MO = MI->getOperand(OpNo); if (MO.isReg()) { - O << getRegisterName(MO.getReg()); + printRegName(O, MO.getReg()); } else if(MO.isExpr()) { O << *MO.getExpr(); } else if(MO.isImm()) { @@ -192,7 +188,7 @@ void HexagonInstPrinter::printMEMriOperand(const MCInst *MI, unsigned OpNo, const MCOperand& MO0 = MI->getOperand(OpNo); const MCOperand& MO1 = MI->getOperand(OpNo + 1); - O << getRegisterName(MO0.getReg()); + printRegName(O, MO0.getReg()); O << " + #" << MO1.getImm(); } @@ -201,7 +197,8 @@ void HexagonInstPrinter::printFrameIndexOperand(const MCInst *MI, unsigned OpNo, const MCOperand& MO0 = MI->getOperand(OpNo); const MCOperand& MO1 = MI->getOperand(OpNo + 1); - O << getRegisterName(MO0.getReg()) << ", #" << MO1.getImm(); + printRegName(O, MO0.getReg()); + O << ", #" << MO1.getImm(); } void HexagonInstPrinter::printGlobalOperand(const MCInst *MI, unsigned OpNo, @@ -252,3 +249,17 @@ void HexagonInstPrinter::printSymbol(const MCInst *MI, unsigned OpNo, printOperand(MI, OpNo, O); O << ')'; } + +void HexagonInstPrinter::printExtBrtarget(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const { + const MCOperand &MO = MI->getOperand(OpNo); + const MCInstrDesc &MII = getMII().get(MI->getOpcode()); + + assert((isExtendable(MII.TSFlags) || isExtended(MII.TSFlags)) && + "Expecting an extendable operand"); + + if (MO.isExpr() || isExtended(MII.TSFlags)) { + O << "##"; + } + printOperand(MI, OpNo, O); +} diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h index 55ae95cd06df..3fedaed8fbf9 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h @@ -18,20 +18,18 @@ #include "llvm/MC/MCInstrInfo.h" namespace llvm { - class HexagonMCInst; - class HexagonInstPrinter : public MCInstPrinter { public: - explicit HexagonInstPrinter(const MCAsmInfo &MAI, - const MCInstrInfo &MII, - const MCRegisterInfo &MRI) + explicit HexagonInstPrinter(MCAsmInfo const &MAI, + MCInstrInfo const &MII, + MCRegisterInfo const &MRI) : MCInstPrinter(MAI, MII, MRI), MII(MII) {} - void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; - void printInst(const HexagonMCInst *MI, raw_ostream &O, StringRef Annot); + void printInst(MCInst const *MI, raw_ostream &O, StringRef Annot, + const MCSubtargetInfo &STI) override; virtual StringRef getOpcodeName(unsigned Opcode) const; void printInstruction(const MCInst *MI, raw_ostream &O); - StringRef getRegName(unsigned RegNo) const; + void printRegName(raw_ostream &OS, unsigned RegNo) const override; static const char *getRegisterName(unsigned RegNo); void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; @@ -58,6 +56,7 @@ namespace llvm { void printGlobalOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; void printJumpTable(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; + void printExtBrtarget(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; void printConstantPool(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp index 487872a1f5dd..ae3953abba10 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp @@ -9,8 +9,9 @@ #include "Hexagon.h" #include "MCTargetDesc/HexagonBaseInfo.h" +#include "MCTargetDesc/HexagonFixupKinds.h" #include "MCTargetDesc/HexagonMCCodeEmitter.h" -#include "MCTargetDesc/HexagonMCInst.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" #include "MCTargetDesc/HexagonMCTargetDesc.h" #include "llvm/ADT/Statistic.h" #include "llvm/MC/MCCodeEmitter.h" @@ -35,10 +36,11 @@ namespace { /// Possible values for instruction packet parse field. enum class ParseField { duplex = 0x0, last0 = 0x1, last1 = 0x2, end = 0x3 }; /// \brief Returns the packet bits based on instruction position. -uint32_t getPacketBits(HexagonMCInst const &HMI) { +uint32_t getPacketBits(MCInst const &HMI) { unsigned const ParseFieldOffset = 14; - ParseField Field = HMI.isPacketEnd() ? ParseField::end : ParseField::last0; - return static_cast <uint32_t> (Field) << ParseFieldOffset; + ParseField Field = HexagonMCInstrInfo::isPacketEnd(HMI) ? ParseField::end + : ParseField::last0; + return static_cast<uint32_t>(Field) << ParseFieldOffset; } void emitLittleEndian(uint64_t Binary, raw_ostream &OS) { OS << static_cast<uint8_t>((Binary >> 0x00) & 0xff); @@ -49,20 +51,455 @@ void emitLittleEndian(uint64_t Binary, raw_ostream &OS) { } HexagonMCCodeEmitter::HexagonMCCodeEmitter(MCInstrInfo const &aMII, - MCSubtargetInfo const &aMST, MCContext &aMCT) - : MST(aMST), MCT(aMCT) {} + : MCT(aMCT), MCII(aMII), Addend(new unsigned(0)), + Extended(new bool(false)) {} -void HexagonMCCodeEmitter::EncodeInstruction(MCInst const &MI, raw_ostream &OS, +void HexagonMCCodeEmitter::encodeInstruction(MCInst const &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, MCSubtargetInfo const &STI) const { - HexagonMCInst const &HMB = static_cast<HexagonMCInst const &>(MI); - uint64_t Binary = getBinaryCodeForInstr(HMB, Fixups, STI) | getPacketBits(HMB); - assert(HMB.getDesc().getSize() == 4 && "All instructions should be 32bit"); + uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI) | getPacketBits(MI); + assert(HexagonMCInstrInfo::getDesc(MCII, MI).getSize() == 4 && + "All instructions should be 32bit"); + (void)&MCII; emitLittleEndian(Binary, OS); ++MCNumEmitted; } +static Hexagon::Fixups getFixupNoBits(MCInstrInfo const &MCII, const MCInst &MI, + const MCOperand &MO, + const MCSymbolRefExpr::VariantKind kind) { + const MCInstrDesc &MCID = HexagonMCInstrInfo::getDesc(MCII, MI); + unsigned insnType = llvm::HexagonMCInstrInfo::getType(MCII, MI); + + if (insnType == HexagonII::TypePREFIX) { + switch (kind) { + case llvm::MCSymbolRefExpr::VK_GOTOFF: + return Hexagon::fixup_Hexagon_GOTREL_32_6_X; + case llvm::MCSymbolRefExpr::VK_GOT: + return Hexagon::fixup_Hexagon_GOT_32_6_X; + case llvm::MCSymbolRefExpr::VK_TPREL: + return Hexagon::fixup_Hexagon_TPREL_32_6_X; + case llvm::MCSymbolRefExpr::VK_DTPREL: + return Hexagon::fixup_Hexagon_DTPREL_32_6_X; + case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT: + return Hexagon::fixup_Hexagon_GD_GOT_32_6_X; + case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT: + return Hexagon::fixup_Hexagon_LD_GOT_32_6_X; + case llvm::MCSymbolRefExpr::VK_Hexagon_IE: + return Hexagon::fixup_Hexagon_IE_32_6_X; + case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT: + return Hexagon::fixup_Hexagon_IE_GOT_32_6_X; + default: + if (MCID.isBranch()) + return Hexagon::fixup_Hexagon_B32_PCREL_X; + else + return Hexagon::fixup_Hexagon_32_6_X; + } + } else if (MCID.isBranch()) + return (Hexagon::fixup_Hexagon_B13_PCREL); + + switch (MCID.getOpcode()) { + case Hexagon::HI: + case Hexagon::A2_tfrih: + switch (kind) { + case llvm::MCSymbolRefExpr::VK_GOT: + return Hexagon::fixup_Hexagon_GOT_HI16; + case llvm::MCSymbolRefExpr::VK_GOTOFF: + return Hexagon::fixup_Hexagon_GOTREL_HI16; + case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT: + return Hexagon::fixup_Hexagon_GD_GOT_HI16; + case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT: + return Hexagon::fixup_Hexagon_LD_GOT_HI16; + case llvm::MCSymbolRefExpr::VK_Hexagon_IE: + return Hexagon::fixup_Hexagon_IE_HI16; + case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT: + return Hexagon::fixup_Hexagon_IE_GOT_HI16; + case llvm::MCSymbolRefExpr::VK_TPREL: + return Hexagon::fixup_Hexagon_TPREL_HI16; + case llvm::MCSymbolRefExpr::VK_DTPREL: + return Hexagon::fixup_Hexagon_DTPREL_HI16; + default: + return Hexagon::fixup_Hexagon_HI16; + } + + case Hexagon::LO: + case Hexagon::A2_tfril: + switch (kind) { + case llvm::MCSymbolRefExpr::VK_GOT: + return Hexagon::fixup_Hexagon_GOT_LO16; + case llvm::MCSymbolRefExpr::VK_GOTOFF: + return Hexagon::fixup_Hexagon_GOTREL_LO16; + case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT: + return Hexagon::fixup_Hexagon_GD_GOT_LO16; + case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT: + return Hexagon::fixup_Hexagon_LD_GOT_LO16; + case llvm::MCSymbolRefExpr::VK_Hexagon_IE: + return Hexagon::fixup_Hexagon_IE_LO16; + case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT: + return Hexagon::fixup_Hexagon_IE_GOT_LO16; + case llvm::MCSymbolRefExpr::VK_TPREL: + return Hexagon::fixup_Hexagon_TPREL_LO16; + case llvm::MCSymbolRefExpr::VK_DTPREL: + return Hexagon::fixup_Hexagon_DTPREL_LO16; + default: + return Hexagon::fixup_Hexagon_LO16; + } + + // The only relocs left should be GP relative: + default: + if (MCID.mayStore() || MCID.mayLoad()) { + for (const uint16_t *ImpUses = MCID.getImplicitUses(); *ImpUses; + ++ImpUses) { + if (*ImpUses == Hexagon::GP) { + switch (HexagonMCInstrInfo::getAccessSize(MCII, MI)) { + case HexagonII::MemAccessSize::ByteAccess: + return fixup_Hexagon_GPREL16_0; + case HexagonII::MemAccessSize::HalfWordAccess: + return fixup_Hexagon_GPREL16_1; + case HexagonII::MemAccessSize::WordAccess: + return fixup_Hexagon_GPREL16_2; + case HexagonII::MemAccessSize::DoubleWordAccess: + return fixup_Hexagon_GPREL16_3; + default: + llvm_unreachable("unhandled fixup"); + } + } + } + } else + llvm_unreachable("unhandled fixup"); + } + + return LastTargetFixupKind; +} + +unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI, + const MCOperand &MO, + const MCExpr *ME, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const + +{ + int64_t Res; + + if (ME->EvaluateAsAbsolute(Res)) + return Res; + + MCExpr::ExprKind MK = ME->getKind(); + if (MK == MCExpr::Constant) { + return cast<MCConstantExpr>(ME)->getValue(); + } + if (MK == MCExpr::Binary) { + unsigned Res; + Res = getExprOpValue(MI, MO, cast<MCBinaryExpr>(ME)->getLHS(), Fixups, STI); + Res += + getExprOpValue(MI, MO, cast<MCBinaryExpr>(ME)->getRHS(), Fixups, STI); + return Res; + } + + assert(MK == MCExpr::SymbolRef); + + Hexagon::Fixups FixupKind = + Hexagon::Fixups(Hexagon::fixup_Hexagon_TPREL_LO16); + const MCSymbolRefExpr *MCSRE = static_cast<const MCSymbolRefExpr *>(ME); + const MCInstrDesc &MCID = HexagonMCInstrInfo::getDesc(MCII, MI); + unsigned bits = HexagonMCInstrInfo::getExtentBits(MCII, MI) - + HexagonMCInstrInfo::getExtentAlignment(MCII, MI); + const MCSymbolRefExpr::VariantKind kind = MCSRE->getKind(); + + DEBUG(dbgs() << "----------------------------------------\n"); + DEBUG(dbgs() << "Opcode Name: " << HexagonMCInstrInfo::getName(MCII, MI) + << "\n"); + DEBUG(dbgs() << "Opcode: " << MCID.getOpcode() << "\n"); + DEBUG(dbgs() << "Relocation bits: " << bits << "\n"); + DEBUG(dbgs() << "Addend: " << *Addend << "\n"); + DEBUG(dbgs() << "----------------------------------------\n"); + + switch (bits) { + default: + DEBUG(dbgs() << "unrecognized bit count of " << bits << '\n'); + break; + + case 32: + switch (kind) { + case llvm::MCSymbolRefExpr::VK_Hexagon_PCREL: + FixupKind = Hexagon::fixup_Hexagon_32_PCREL; + break; + case llvm::MCSymbolRefExpr::VK_GOT: + FixupKind = *Extended ? Hexagon::fixup_Hexagon_GOT_32_6_X + : Hexagon::fixup_Hexagon_GOT_32; + break; + case llvm::MCSymbolRefExpr::VK_GOTOFF: + FixupKind = *Extended ? Hexagon::fixup_Hexagon_GOTREL_32_6_X + : Hexagon::fixup_Hexagon_GOTREL_32; + break; + case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT: + FixupKind = *Extended ? Hexagon::fixup_Hexagon_GD_GOT_32_6_X + : Hexagon::fixup_Hexagon_GD_GOT_32; + break; + case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT: + FixupKind = *Extended ? Hexagon::fixup_Hexagon_LD_GOT_32_6_X + : Hexagon::fixup_Hexagon_LD_GOT_32; + break; + case llvm::MCSymbolRefExpr::VK_Hexagon_IE: + FixupKind = *Extended ? Hexagon::fixup_Hexagon_IE_32_6_X + : Hexagon::fixup_Hexagon_IE_32; + break; + case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT: + FixupKind = *Extended ? Hexagon::fixup_Hexagon_IE_GOT_32_6_X + : Hexagon::fixup_Hexagon_IE_GOT_32; + break; + case llvm::MCSymbolRefExpr::VK_TPREL: + FixupKind = *Extended ? Hexagon::fixup_Hexagon_TPREL_32_6_X + : Hexagon::fixup_Hexagon_TPREL_32; + break; + case llvm::MCSymbolRefExpr::VK_DTPREL: + FixupKind = *Extended ? Hexagon::fixup_Hexagon_DTPREL_32_6_X + : Hexagon::fixup_Hexagon_DTPREL_32; + break; + default: + FixupKind = + *Extended ? Hexagon::fixup_Hexagon_32_6_X : Hexagon::fixup_Hexagon_32; + break; + } + break; + + case 22: + switch (kind) { + case llvm::MCSymbolRefExpr::VK_Hexagon_GD_PLT: + FixupKind = Hexagon::fixup_Hexagon_GD_PLT_B22_PCREL; + break; + case llvm::MCSymbolRefExpr::VK_Hexagon_LD_PLT: + FixupKind = Hexagon::fixup_Hexagon_LD_PLT_B22_PCREL; + break; + default: + if (MCID.isBranch() || MCID.isCall()) { + FixupKind = *Extended ? Hexagon::fixup_Hexagon_B22_PCREL_X + : Hexagon::fixup_Hexagon_B22_PCREL; + } else { + errs() << "unrecognized relocation, bits: " << bits << "\n"; + errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n"; + } + break; + } + break; + + case 16: + if (*Extended) { + switch (kind) { + default: + FixupKind = Hexagon::fixup_Hexagon_16_X; + break; + case llvm::MCSymbolRefExpr::VK_GOT: + FixupKind = Hexagon::fixup_Hexagon_GOT_16_X; + break; + case llvm::MCSymbolRefExpr::VK_GOTOFF: + FixupKind = Hexagon::fixup_Hexagon_GOTREL_16_X; + break; + case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT: + FixupKind = Hexagon::fixup_Hexagon_GD_GOT_16_X; + break; + case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT: + FixupKind = Hexagon::fixup_Hexagon_LD_GOT_16_X; + break; + case llvm::MCSymbolRefExpr::VK_Hexagon_IE: + FixupKind = Hexagon::fixup_Hexagon_IE_16_X; + break; + case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT: + FixupKind = Hexagon::fixup_Hexagon_IE_GOT_16_X; + break; + case llvm::MCSymbolRefExpr::VK_TPREL: + FixupKind = Hexagon::fixup_Hexagon_TPREL_16_X; + break; + case llvm::MCSymbolRefExpr::VK_DTPREL: + FixupKind = Hexagon::fixup_Hexagon_DTPREL_16_X; + break; + } + } else + switch (kind) { + default: + errs() << "unrecognized relocation, bits " << bits << "\n"; + errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n"; + break; + case llvm::MCSymbolRefExpr::VK_GOTOFF: + if ((MCID.getOpcode() == Hexagon::HI) || + (MCID.getOpcode() == Hexagon::LO_H)) + FixupKind = Hexagon::fixup_Hexagon_GOTREL_HI16; + else + FixupKind = Hexagon::fixup_Hexagon_GOTREL_LO16; + break; + case llvm::MCSymbolRefExpr::VK_Hexagon_GPREL: + FixupKind = Hexagon::fixup_Hexagon_GPREL16_0; + break; + case llvm::MCSymbolRefExpr::VK_Hexagon_LO16: + FixupKind = Hexagon::fixup_Hexagon_LO16; + break; + case llvm::MCSymbolRefExpr::VK_Hexagon_HI16: + FixupKind = Hexagon::fixup_Hexagon_HI16; + break; + case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT: + FixupKind = Hexagon::fixup_Hexagon_GD_GOT_16; + break; + case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT: + FixupKind = Hexagon::fixup_Hexagon_LD_GOT_16; + break; + case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT: + FixupKind = Hexagon::fixup_Hexagon_IE_GOT_16; + break; + case llvm::MCSymbolRefExpr::VK_TPREL: + FixupKind = Hexagon::fixup_Hexagon_TPREL_16; + break; + case llvm::MCSymbolRefExpr::VK_DTPREL: + FixupKind = Hexagon::fixup_Hexagon_DTPREL_16; + break; + } + break; + + case 15: + if (MCID.isBranch() || MCID.isCall()) + FixupKind = *Extended ? Hexagon::fixup_Hexagon_B15_PCREL_X + : Hexagon::fixup_Hexagon_B15_PCREL; + break; + + case 13: + if (MCID.isBranch()) + FixupKind = Hexagon::fixup_Hexagon_B13_PCREL; + else { + errs() << "unrecognized relocation, bits " << bits << "\n"; + errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n"; + } + break; + + case 12: + if (*Extended) + switch (kind) { + default: + FixupKind = Hexagon::fixup_Hexagon_12_X; + break; + // There isn't a GOT_12_X, both 11_X and 16_X resolve to 6/26 + case llvm::MCSymbolRefExpr::VK_GOT: + FixupKind = Hexagon::fixup_Hexagon_GOT_16_X; + break; + case llvm::MCSymbolRefExpr::VK_GOTOFF: + FixupKind = Hexagon::fixup_Hexagon_GOTREL_16_X; + break; + } + else { + errs() << "unrecognized relocation, bits " << bits << "\n"; + errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n"; + } + break; + + case 11: + if (*Extended) + switch (kind) { + default: + FixupKind = Hexagon::fixup_Hexagon_11_X; + break; + case llvm::MCSymbolRefExpr::VK_GOT: + FixupKind = Hexagon::fixup_Hexagon_GOT_11_X; + break; + case llvm::MCSymbolRefExpr::VK_GOTOFF: + FixupKind = Hexagon::fixup_Hexagon_GOTREL_11_X; + break; + case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT: + FixupKind = Hexagon::fixup_Hexagon_GD_GOT_11_X; + break; + case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT: + FixupKind = Hexagon::fixup_Hexagon_LD_GOT_11_X; + break; + case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT: + FixupKind = Hexagon::fixup_Hexagon_IE_GOT_11_X; + break; + case llvm::MCSymbolRefExpr::VK_TPREL: + FixupKind = Hexagon::fixup_Hexagon_TPREL_11_X; + break; + case llvm::MCSymbolRefExpr::VK_DTPREL: + FixupKind = Hexagon::fixup_Hexagon_DTPREL_11_X; + break; + } + else { + errs() << "unrecognized relocation, bits " << bits << "\n"; + errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n"; + } + break; + + case 10: + if (*Extended) + FixupKind = Hexagon::fixup_Hexagon_10_X; + break; + + case 9: + if (MCID.isBranch() || + (llvm::HexagonMCInstrInfo::getType(MCII, MI) == HexagonII::TypeCR)) + FixupKind = *Extended ? Hexagon::fixup_Hexagon_B9_PCREL_X + : Hexagon::fixup_Hexagon_B9_PCREL; + else if (*Extended) + FixupKind = Hexagon::fixup_Hexagon_9_X; + else { + errs() << "unrecognized relocation, bits " << bits << "\n"; + errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n"; + } + break; + + case 8: + if (*Extended) + FixupKind = Hexagon::fixup_Hexagon_8_X; + else { + errs() << "unrecognized relocation, bits " << bits << "\n"; + errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n"; + } + break; + + case 7: + if (MCID.isBranch() || + (llvm::HexagonMCInstrInfo::getType(MCII, MI) == HexagonII::TypeCR)) + FixupKind = *Extended ? Hexagon::fixup_Hexagon_B7_PCREL_X + : Hexagon::fixup_Hexagon_B7_PCREL; + else if (*Extended) + FixupKind = Hexagon::fixup_Hexagon_7_X; + else { + errs() << "unrecognized relocation, bits " << bits << "\n"; + errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n"; + } + break; + + case 6: + if (*Extended) { + switch (kind) { + default: + FixupKind = Hexagon::fixup_Hexagon_6_X; + break; + case llvm::MCSymbolRefExpr::VK_Hexagon_PCREL: + FixupKind = Hexagon::fixup_Hexagon_6_PCREL_X; + break; + // This is part of an extender, GOT_11 is a + // Word32_U6 unsigned/truncated reloc. + case llvm::MCSymbolRefExpr::VK_GOT: + FixupKind = Hexagon::fixup_Hexagon_GOT_11_X; + break; + case llvm::MCSymbolRefExpr::VK_GOTOFF: + FixupKind = Hexagon::fixup_Hexagon_GOTREL_11_X; + break; + } + } else { + errs() << "unrecognized relocation, bits " << bits << "\n"; + errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n"; + } + break; + + case 0: + FixupKind = getFixupNoBits(MCII, MI, MO, kind); + break; + } + + MCFixup fixup = + MCFixup::create(*Addend, MO.getExpr(), MCFixupKind(FixupKind)); + Fixups.push_back(fixup); + // All of the information is in the fixup. + return (0); +} + unsigned HexagonMCCodeEmitter::getMachineOpValue(MCInst const &MI, MCOperand const &MO, SmallVectorImpl<MCFixup> &Fixups, @@ -71,18 +508,16 @@ HexagonMCCodeEmitter::getMachineOpValue(MCInst const &MI, MCOperand const &MO, return MCT.getRegisterInfo()->getEncodingValue(MO.getReg()); if (MO.isImm()) return static_cast<unsigned>(MO.getImm()); - llvm_unreachable("Only Immediates and Registers implemented right now"); -} -MCSubtargetInfo const &HexagonMCCodeEmitter::getSubtargetInfo() const { - return MST; + // MO must be an ME. + assert(MO.isExpr()); + return getExprOpValue(MI, MO, MO.getExpr(), Fixups, STI); } MCCodeEmitter *llvm::createHexagonMCCodeEmitter(MCInstrInfo const &MII, MCRegisterInfo const &MRI, - MCSubtargetInfo const &MST, MCContext &MCT) { - return new HexagonMCCodeEmitter(MII, MST, MCT); + return new HexagonMCCodeEmitter(MII, MCT); } #include "HexagonGenMCCodeEmitter.inc" diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h index 96048adf34b7..939380af1013 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h @@ -26,16 +26,22 @@ namespace llvm { class HexagonMCCodeEmitter : public MCCodeEmitter { - MCSubtargetInfo const &MST; MCContext &MCT; + MCInstrInfo const &MCII; + std::unique_ptr<unsigned> Addend; + std::unique_ptr<bool> Extended; + + // helper routine for getMachineOpValue() + unsigned getExprOpValue(const MCInst &MI, const MCOperand &MO, + const MCExpr *ME, SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; public: - HexagonMCCodeEmitter(MCInstrInfo const &aMII, MCSubtargetInfo const &aMST, - MCContext &aMCT); + HexagonMCCodeEmitter(MCInstrInfo const &aMII, MCContext &aMCT); MCSubtargetInfo const &getSubtargetInfo() const; - void EncodeInstruction(MCInst const &MI, raw_ostream &OS, + void encodeInstruction(MCInst const &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, MCSubtargetInfo const &STI) const override; @@ -51,8 +57,8 @@ public: MCSubtargetInfo const &STI) const; private: - HexagonMCCodeEmitter(HexagonMCCodeEmitter const &) LLVM_DELETED_FUNCTION; - void operator=(HexagonMCCodeEmitter const &) LLVM_DELETED_FUNCTION; + HexagonMCCodeEmitter(HexagonMCCodeEmitter const &) = delete; + void operator=(HexagonMCCodeEmitter const &) = delete; }; // class HexagonMCCodeEmitter } // namespace llvm diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp deleted file mode 100644 index d8b9a2567eeb..000000000000 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp +++ /dev/null @@ -1,223 +0,0 @@ -//===- HexagonMCInst.cpp - Hexagon sub-class of MCInst --------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class extends MCInst to allow some Hexagon VLIW annotations. -// -//===----------------------------------------------------------------------===// - -#include "HexagonInstrInfo.h" -#include "MCTargetDesc/HexagonBaseInfo.h" -#include "MCTargetDesc/HexagonMCInst.h" -#include "MCTargetDesc/HexagonMCTargetDesc.h" - -using namespace llvm; - -std::unique_ptr <MCInstrInfo const> HexagonMCInst::MCII; - -HexagonMCInst::HexagonMCInst() : MCInst() {} -HexagonMCInst::HexagonMCInst(MCInstrDesc const &mcid) : MCInst() {} - -void HexagonMCInst::AppendImplicitOperands(MCInst &MCI) { - MCI.addOperand(MCOperand::CreateImm(0)); - MCI.addOperand(MCOperand::CreateInst(nullptr)); -} - -std::bitset<16> HexagonMCInst::GetImplicitBits(MCInst const &MCI) { - SanityCheckImplicitOperands(MCI); - std::bitset<16> Bits(MCI.getOperand(MCI.getNumOperands() - 2).getImm()); - return Bits; -} - -void HexagonMCInst::SetImplicitBits(MCInst &MCI, std::bitset<16> Bits) { - SanityCheckImplicitOperands(MCI); - MCI.getOperand(MCI.getNumOperands() - 2).setImm(Bits.to_ulong()); -} - -void HexagonMCInst::setPacketBegin(bool f) { - std::bitset<16> Bits(GetImplicitBits(*this)); - Bits.set(packetBeginIndex, f); - SetImplicitBits(*this, Bits); -} - -bool HexagonMCInst::isPacketBegin() const { - std::bitset<16> Bits(GetImplicitBits(*this)); - return Bits.test(packetBeginIndex); -} - -void HexagonMCInst::setPacketEnd(bool f) { - std::bitset<16> Bits(GetImplicitBits(*this)); - Bits.set(packetEndIndex, f); - SetImplicitBits(*this, Bits); -} - -bool HexagonMCInst::isPacketEnd() const { - std::bitset<16> Bits(GetImplicitBits(*this)); - return Bits.test(packetEndIndex); -} - -void HexagonMCInst::resetPacket() { - setPacketBegin(false); - setPacketEnd(false); -} - -// Return the slots used by the insn. -unsigned HexagonMCInst::getUnits(const HexagonTargetMachine *TM) const { - const HexagonInstrInfo *QII = TM->getSubtargetImpl()->getInstrInfo(); - const InstrItineraryData *II = - TM->getSubtargetImpl()->getInstrItineraryData(); - const InstrStage *IS = - II->beginStage(QII->get(this->getOpcode()).getSchedClass()); - - return (IS->getUnits()); -} - -MCInstrDesc const& HexagonMCInst::getDesc() const { return (MCII->get(getOpcode())); } - -// Return the Hexagon ISA class for the insn. -unsigned HexagonMCInst::getType() const { - const uint64_t F = getDesc().TSFlags; - - return ((F >> HexagonII::TypePos) & HexagonII::TypeMask); -} - -// Return whether the insn is an actual insn. -bool HexagonMCInst::isCanon() const { - return (!getDesc().isPseudo() && !isPrefix() && - getType() != HexagonII::TypeENDLOOP); -} - -// Return whether the insn is a prefix. -bool HexagonMCInst::isPrefix() const { - return (getType() == HexagonII::TypePREFIX); -} - -// Return whether the insn is solo, i.e., cannot be in a packet. -bool HexagonMCInst::isSolo() const { - const uint64_t F = getDesc().TSFlags; - return ((F >> HexagonII::SoloPos) & HexagonII::SoloMask); -} - -// Return whether the insn is a new-value consumer. -bool HexagonMCInst::isNewValue() const { - const uint64_t F = getDesc().TSFlags; - return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask); -} - -// Return whether the instruction is a legal new-value producer. -bool HexagonMCInst::hasNewValue() const { - const uint64_t F = getDesc().TSFlags; - return ((F >> HexagonII::hasNewValuePos) & HexagonII::hasNewValueMask); -} - -// Return the operand that consumes or produces a new value. -const MCOperand &HexagonMCInst::getNewValue() const { - const uint64_t F = getDesc().TSFlags; - const unsigned O = - (F >> HexagonII::NewValueOpPos) & HexagonII::NewValueOpMask; - const MCOperand &MCO = getOperand(O); - - assert((isNewValue() || hasNewValue()) && MCO.isReg()); - return (MCO); -} - -// Return whether the instruction needs to be constant extended. -// 1) Always return true if the instruction has 'isExtended' flag set. -// -// isExtendable: -// 2) For immediate extended operands, return true only if the value is -// out-of-range. -// 3) For global address, always return true. - -bool HexagonMCInst::isConstExtended(void) const { - if (isExtended()) - return true; - - if (!isExtendable()) - return false; - - short ExtOpNum = getCExtOpNum(); - int MinValue = getMinValue(); - int MaxValue = getMaxValue(); - const MCOperand &MO = getOperand(ExtOpNum); - - // We could be using an instruction with an extendable immediate and shoehorn - // a global address into it. If it is a global address it will be constant - // extended. We do this for COMBINE. - // We currently only handle isGlobal() because it is the only kind of - // object we are going to end up with here for now. - // In the future we probably should add isSymbol(), etc. - if (MO.isExpr()) - return true; - - // If the extendable operand is not 'Immediate' type, the instruction should - // have 'isExtended' flag set. - assert(MO.isImm() && "Extendable operand must be Immediate type"); - - int ImmValue = MO.getImm(); - return (ImmValue < MinValue || ImmValue > MaxValue); -} - -// Return whether the instruction must be always extended. -bool HexagonMCInst::isExtended(void) const { - const uint64_t F = getDesc().TSFlags; - return (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask; -} - -// Return true if the instruction may be extended based on the operand value. -bool HexagonMCInst::isExtendable(void) const { - const uint64_t F = getDesc().TSFlags; - return (F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask; -} - -// Return number of bits in the constant extended operand. -unsigned HexagonMCInst::getBitCount(void) const { - const uint64_t F = getDesc().TSFlags; - return ((F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask); -} - -// Return constant extended operand number. -unsigned short HexagonMCInst::getCExtOpNum(void) const { - const uint64_t F = getDesc().TSFlags; - return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask); -} - -// Return whether the operand can be constant extended. -bool HexagonMCInst::isOperandExtended(const unsigned short OperandNum) const { - const uint64_t F = getDesc().TSFlags; - return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask) == - OperandNum; -} - -// Return the min value that a constant extendable operand can have -// without being extended. -int HexagonMCInst::getMinValue(void) const { - const uint64_t F = getDesc().TSFlags; - unsigned isSigned = - (F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask; - unsigned bits = (F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask; - - if (isSigned) // if value is signed - return -1U << (bits - 1); - else - return 0; -} - -// Return the max value that a constant extendable operand can have -// without being extended. -int HexagonMCInst::getMaxValue(void) const { - const uint64_t F = getDesc().TSFlags; - unsigned isSigned = - (F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask; - unsigned bits = (F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask; - - if (isSigned) // if value is signed - return ~(-1U << (bits - 1)); - else - return ~(-1U << bits); -} diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h deleted file mode 100644 index ce9a8db5ac44..000000000000 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h +++ /dev/null @@ -1,108 +0,0 @@ -//===- HexagonMCInst.h - Hexagon sub-class of MCInst ----------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class extends MCInst to allow some VLIW annotations. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINST_H -#define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINST_H - -#include "HexagonTargetMachine.h" -#include "llvm/MC/MCInst.h" -#include <memory> - -extern "C" void LLVMInitializeHexagonTargetMC(); -namespace llvm { -class MCOperand; - -class HexagonMCInst : public MCInst { - friend void ::LLVMInitializeHexagonTargetMC(); - // Used to access TSFlags - static std::unique_ptr <MCInstrInfo const> MCII; - -public: - explicit HexagonMCInst(); - HexagonMCInst(const MCInstrDesc &mcid); - - static void AppendImplicitOperands(MCInst &MCI); - static std::bitset<16> GetImplicitBits(MCInst const &MCI); - static void SetImplicitBits(MCInst &MCI, std::bitset<16> Bits); - static void SanityCheckImplicitOperands(MCInst const &MCI) { - assert(MCI.getNumOperands() >= 2 && "At least the two implicit operands"); - assert(MCI.getOperand(MCI.getNumOperands() - 1).isInst() && - "Implicit bits and flags"); - assert(MCI.getOperand(MCI.getNumOperands() - 2).isImm() && - "Parent pointer"); - } - - void setPacketBegin(bool Y); - bool isPacketBegin() const; - static const size_t packetBeginIndex = 0; - void setPacketEnd(bool Y); - bool isPacketEnd() const; - static const size_t packetEndIndex = 1; - void resetPacket(); - - // Return the slots used by the insn. - unsigned getUnits(const HexagonTargetMachine *TM) const; - - // Return the Hexagon ISA class for the insn. - unsigned getType() const; - - MCInstrDesc const &getDesc() const; - - // Return whether the insn is an actual insn. - bool isCanon() const; - - // Return whether the insn is a prefix. - bool isPrefix() const; - - // Return whether the insn is solo, i.e., cannot be in a packet. - bool isSolo() const; - - // Return whether the instruction needs to be constant extended. - bool isConstExtended() const; - - // Return constant extended operand number. - unsigned short getCExtOpNum(void) const; - - // Return whether the insn is a new-value consumer. - bool isNewValue() const; - - // Return whether the instruction is a legal new-value producer. - bool hasNewValue() const; - - // Return the operand that consumes or produces a new value. - const MCOperand &getNewValue() const; - - // Return number of bits in the constant extended operand. - unsigned getBitCount(void) const; - -private: - // Return whether the instruction must be always extended. - bool isExtended() const; - - // Return true if the insn may be extended based on the operand value. - bool isExtendable() const; - - // Return true if the operand can be constant extended. - bool isOperandExtended(const unsigned short OperandNum) const; - - // Return the min value that a constant extendable operand can have - // without being extended. - int getMinValue() const; - - // Return the max value that a constant extendable operand can have - // without being extended. - int getMaxValue() const; -}; -} - -#endif diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp new file mode 100644 index 000000000000..93c7a0d98bf2 --- /dev/null +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp @@ -0,0 +1,248 @@ +//===- HexagonMCInstrInfo.cpp - Hexagon sub-class of MCInst ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class extends MCInstrInfo to allow Hexagon specific MCInstr queries +// +//===----------------------------------------------------------------------===// + +#include "HexagonMCInstrInfo.h" +#include "HexagonBaseInfo.h" + +namespace llvm { +void HexagonMCInstrInfo::AppendImplicitOperands(MCInst &MCI) { + MCI.addOperand(MCOperand::createImm(0)); + MCI.addOperand(MCOperand::createInst(nullptr)); +} + +HexagonII::MemAccessSize +HexagonMCInstrInfo::getAccessSize(MCInstrInfo const &MCII, MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + + return (HexagonII::MemAccessSize((F >> HexagonII::MemAccessSizePos) & + HexagonII::MemAccesSizeMask)); +} + +unsigned HexagonMCInstrInfo::getBitCount(MCInstrInfo const &MCII, + MCInst const &MCI) { + uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask); +} + +// Return constant extended operand number. +unsigned short HexagonMCInstrInfo::getCExtOpNum(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask); +} + +MCInstrDesc const &HexagonMCInstrInfo::getDesc(MCInstrInfo const &MCII, + MCInst const &MCI) { + return (MCII.get(MCI.getOpcode())); +} + +unsigned HexagonMCInstrInfo::getExtentAlignment(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::ExtentAlignPos) & HexagonII::ExtentAlignMask); +} + +unsigned HexagonMCInstrInfo::getExtentBits(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask); +} + +std::bitset<16> HexagonMCInstrInfo::GetImplicitBits(MCInst const &MCI) { + SanityCheckImplicitOperands(MCI); + std::bitset<16> Bits(MCI.getOperand(MCI.getNumOperands() - 2).getImm()); + return Bits; +} + +// Return the max value that a constant extendable operand can have +// without being extended. +int HexagonMCInstrInfo::getMaxValue(MCInstrInfo const &MCII, + MCInst const &MCI) { + uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + unsigned isSigned = + (F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask; + unsigned bits = (F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask; + + if (isSigned) // if value is signed + return ~(-1U << (bits - 1)); + else + return ~(-1U << bits); +} + +// Return the min value that a constant extendable operand can have +// without being extended. +int HexagonMCInstrInfo::getMinValue(MCInstrInfo const &MCII, + MCInst const &MCI) { + uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + unsigned isSigned = + (F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask; + unsigned bits = (F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask; + + if (isSigned) // if value is signed + return -1U << (bits - 1); + else + return 0; +} + +char const *HexagonMCInstrInfo::getName(MCInstrInfo const &MCII, + MCInst const &MCI) { + return MCII.getName(MCI.getOpcode()); +} + +// Return the operand that consumes or produces a new value. +MCOperand const &HexagonMCInstrInfo::getNewValue(MCInstrInfo const &MCII, + MCInst const &MCI) { + uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + unsigned const O = + (F >> HexagonII::NewValueOpPos) & HexagonII::NewValueOpMask; + MCOperand const &MCO = MCI.getOperand(O); + + assert((HexagonMCInstrInfo::isNewValue(MCII, MCI) || + HexagonMCInstrInfo::hasNewValue(MCII, MCI)) && + MCO.isReg()); + return (MCO); +} + +// Return the Hexagon ISA class for the insn. +unsigned HexagonMCInstrInfo::getType(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + + return ((F >> HexagonII::TypePos) & HexagonII::TypeMask); +} + +// Return whether the instruction is a legal new-value producer. +bool HexagonMCInstrInfo::hasNewValue(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::hasNewValuePos) & HexagonII::hasNewValueMask); +} + +// Return whether the insn is an actual insn. +bool HexagonMCInstrInfo::isCanon(MCInstrInfo const &MCII, MCInst const &MCI) { + return (!HexagonMCInstrInfo::getDesc(MCII, MCI).isPseudo() && + !HexagonMCInstrInfo::isPrefix(MCII, MCI) && + HexagonMCInstrInfo::getType(MCII, MCI) != HexagonII::TypeENDLOOP); +} + +// Return whether the instruction needs to be constant extended. +// 1) Always return true if the instruction has 'isExtended' flag set. +// +// isExtendable: +// 2) For immediate extended operands, return true only if the value is +// out-of-range. +// 3) For global address, always return true. + +bool HexagonMCInstrInfo::isConstExtended(MCInstrInfo const &MCII, + MCInst const &MCI) { + if (HexagonMCInstrInfo::isExtended(MCII, MCI)) + return true; + + if (!HexagonMCInstrInfo::isExtendable(MCII, MCI)) + return false; + + short ExtOpNum = HexagonMCInstrInfo::getCExtOpNum(MCII, MCI); + int MinValue = HexagonMCInstrInfo::getMinValue(MCII, MCI); + int MaxValue = HexagonMCInstrInfo::getMaxValue(MCII, MCI); + MCOperand const &MO = MCI.getOperand(ExtOpNum); + + // We could be using an instruction with an extendable immediate and shoehorn + // a global address into it. If it is a global address it will be constant + // extended. We do this for COMBINE. + // We currently only handle isGlobal() because it is the only kind of + // object we are going to end up with here for now. + // In the future we probably should add isSymbol(), etc. + if (MO.isExpr()) + return true; + + // If the extendable operand is not 'Immediate' type, the instruction should + // have 'isExtended' flag set. + assert(MO.isImm() && "Extendable operand must be Immediate type"); + + int ImmValue = MO.getImm(); + return (ImmValue < MinValue || ImmValue > MaxValue); +} + +// Return true if the instruction may be extended based on the operand value. +bool HexagonMCInstrInfo::isExtendable(MCInstrInfo const &MCII, + MCInst const &MCI) { + uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return (F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask; +} + +// Return whether the instruction must be always extended. +bool HexagonMCInstrInfo::isExtended(MCInstrInfo const &MCII, + MCInst const &MCI) { + uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask; +} + +// Return whether the insn is a new-value consumer. +bool HexagonMCInstrInfo::isNewValue(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask); +} + +// Return whether the operand can be constant extended. +bool HexagonMCInstrInfo::isOperandExtended(MCInstrInfo const &MCII, + MCInst const &MCI, + unsigned short OperandNum) { + uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask) == + OperandNum; +} + +bool HexagonMCInstrInfo::isPacketBegin(MCInst const &MCI) { + std::bitset<16> Bits(GetImplicitBits(MCI)); + return Bits.test(packetBeginIndex); +} + +bool HexagonMCInstrInfo::isPacketEnd(MCInst const &MCI) { + std::bitset<16> Bits(GetImplicitBits(MCI)); + return Bits.test(packetEndIndex); +} + +// Return whether the insn is a prefix. +bool HexagonMCInstrInfo::isPrefix(MCInstrInfo const &MCII, MCInst const &MCI) { + return (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypePREFIX); +} + +// Return whether the insn is solo, i.e., cannot be in a packet. +bool HexagonMCInstrInfo::isSolo(MCInstrInfo const &MCII, MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::SoloPos) & HexagonII::SoloMask); +} + +void HexagonMCInstrInfo::resetPacket(MCInst &MCI) { + setPacketBegin(MCI, false); + setPacketEnd(MCI, false); +} + +void HexagonMCInstrInfo::SetImplicitBits(MCInst &MCI, std::bitset<16> Bits) { + SanityCheckImplicitOperands(MCI); + MCI.getOperand(MCI.getNumOperands() - 2).setImm(Bits.to_ulong()); +} + +void HexagonMCInstrInfo::setPacketBegin(MCInst &MCI, bool f) { + std::bitset<16> Bits(GetImplicitBits(MCI)); + Bits.set(packetBeginIndex, f); + SetImplicitBits(MCI, Bits); +} + +void HexagonMCInstrInfo::setPacketEnd(MCInst &MCI, bool f) { + std::bitset<16> Bits(GetImplicitBits(MCI)); + Bits.set(packetEndIndex, f); + SetImplicitBits(MCI, Bits); +} +} diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h new file mode 100644 index 000000000000..082c80d5ac05 --- /dev/null +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h @@ -0,0 +1,122 @@ +//===- HexagonMCInstrInfo.cpp - Hexagon sub-class of MCInst ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Utility functions for Hexagon specific MCInst queries +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H +#define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H + +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" + +#include <bitset> + +namespace llvm { +class MCInstrDesc; +class MCInstrInfo; +class MCInst; +class MCOperand; +namespace HexagonII { +enum class MemAccessSize; +} +namespace HexagonMCInstrInfo { +void AppendImplicitOperands(MCInst &MCI); + +// Return memory access size +HexagonII::MemAccessSize getAccessSize(MCInstrInfo const &MCII, + MCInst const &MCI); + +// Return number of bits in the constant extended operand. +unsigned getBitCount(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return constant extended operand number. +unsigned short getCExtOpNum(MCInstrInfo const &MCII, MCInst const &MCI); + +MCInstrDesc const &getDesc(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return the implicit alignment of the extendable operand +unsigned getExtentAlignment(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return the number of logical bits of the extendable operand +unsigned getExtentBits(MCInstrInfo const &MCII, MCInst const &MCI); + +std::bitset<16> GetImplicitBits(MCInst const &MCI); + +// Return the max value that a constant extendable operand can have +// without being extended. +int getMaxValue(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return the min value that a constant extendable operand can have +// without being extended. +int getMinValue(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return instruction name +char const *getName(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return the operand that consumes or produces a new value. +MCOperand const &getNewValue(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return the Hexagon ISA class for the insn. +unsigned getType(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return whether the instruction is a legal new-value producer. +bool hasNewValue(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return whether the insn is an actual insn. +bool isCanon(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return whether the instruction needs to be constant extended. +bool isConstExtended(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return true if the insn may be extended based on the operand value. +bool isExtendable(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return whether the instruction must be always extended. +bool isExtended(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return whether the insn is a new-value consumer. +bool isNewValue(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return true if the operand can be constant extended. +bool isOperandExtended(MCInstrInfo const &MCII, MCInst const &MCI, + unsigned short OperandNum); + +bool isPacketBegin(MCInst const &MCI); + +bool isPacketEnd(MCInst const &MCI); + +// Return whether the insn is a prefix. +bool isPrefix(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return whether the insn is solo, i.e., cannot be in a packet. +bool isSolo(MCInstrInfo const &MCII, MCInst const &MCI); + +static const size_t packetBeginIndex = 0; +static const size_t packetEndIndex = 1; + +void resetPacket(MCInst &MCI); + +inline void SanityCheckImplicitOperands(MCInst const &MCI) { + assert(MCI.getNumOperands() >= 2 && "At least the two implicit operands"); + assert(MCI.getOperand(MCI.getNumOperands() - 1).isInst() && + "Implicit bits and flags"); + assert(MCI.getOperand(MCI.getNumOperands() - 2).isImm() && "Parent pointer"); +} + +void SetImplicitBits(MCInst &MCI, std::bitset<16> Bits); + +void setPacketBegin(MCInst &MCI, bool Y); + +void setPacketEnd(MCInst &MCI, bool Y); +} +} + +#endif // LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index ae5a22bdb01b..59395e230fa9 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -14,7 +14,6 @@ #include "HexagonMCTargetDesc.h" #include "HexagonMCAsmInfo.h" #include "MCTargetDesc/HexagonInstPrinter.h" -#include "MCTargetDesc/HexagonMCInst.h" #include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCInstrInfo.h" @@ -36,7 +35,7 @@ using namespace llvm; #define GET_REGINFO_MC_DESC #include "HexagonGenRegisterInfo.inc" -static MCInstrInfo *createHexagonMCInstrInfo() { +MCInstrInfo *llvm::createHexagonMCInstrInfo() { MCInstrInfo *X = new MCInstrInfo(); InitHexagonMCInstrInfo(X); return X; @@ -48,15 +47,6 @@ static MCRegisterInfo *createHexagonMCRegisterInfo(StringRef TT) { return X; } -static MCStreamer * -createHexagonELFStreamer(MCContext &Context, MCAsmBackend &MAB, - raw_ostream &OS, MCCodeEmitter *CE, - bool RelaxAll) { - MCELFStreamer *ES = new MCELFStreamer(Context, MAB, OS, CE); - return ES; -} - - static MCSubtargetInfo * createHexagonMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) { MCSubtargetInfo *X = new MCSubtargetInfo(); @@ -76,32 +66,25 @@ static MCAsmInfo *createHexagonMCAsmInfo(const MCRegisterInfo &MRI, return MAI; } -static MCStreamer *createMCStreamer(Target const &T, StringRef TT, - MCContext &Context, MCAsmBackend &MAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - MCSubtargetInfo const &STI, bool RelaxAll) { - MCStreamer *ES = createHexagonELFStreamer(Context, MAB, OS, Emitter, RelaxAll); - new MCTargetStreamer(*ES); - return ES; -} - - static MCCodeGenInfo *createHexagonMCCodeGenInfo(StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); // For the time being, use static relocations, since there's really no // support for PIC yet. - X->InitMCCodeGenInfo(Reloc::Static, CM, OL); + X->initMCCodeGenInfo(Reloc::Static, CM, OL); return X; } -static MCInstPrinter *createHexagonMCInstPrinter(const Target &T, + +static MCInstPrinter *createHexagonMCInstPrinter(const Triple &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { - return new HexagonInstPrinter(MAI, MII, MRI); + const MCRegisterInfo &MRI) { + if (SyntaxVariant == 0) + return(new HexagonInstPrinter(MAI, MII, MRI)); + else + return nullptr; } // Force static initialization. @@ -116,7 +99,6 @@ extern "C" void LLVMInitializeHexagonTargetMC() { // Register the MC instruction info. TargetRegistry::RegisterMCInstrInfo(TheHexagonTarget, createHexagonMCInstrInfo); - HexagonMCInst::MCII.reset (createHexagonMCInstrInfo()); // Register the MC register info. TargetRegistry::RegisterMCRegInfo(TheHexagonTarget, @@ -137,7 +119,4 @@ extern "C" void LLVMInitializeHexagonTargetMC() { // Register the asm backend TargetRegistry::RegisterMCAsmBackend(TheHexagonTarget, createHexagonAsmBackend); - - // Register the obj streamer - TargetRegistry::RegisterMCObjectStreamer(TheHexagonTarget, createMCStreamer); } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h index 02fd5161d24a..de63fd271aea 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h @@ -27,20 +27,22 @@ class MCSubtargetInfo; class Target; class StringRef; class raw_ostream; +class raw_pwrite_stream; extern Target TheHexagonTarget; +MCInstrInfo *createHexagonMCInstrInfo(); + MCCodeEmitter *createHexagonMCCodeEmitter(MCInstrInfo const &MCII, MCRegisterInfo const &MRI, - MCSubtargetInfo const &MST, MCContext &MCT); MCAsmBackend *createHexagonAsmBackend(Target const &T, MCRegisterInfo const &MRI, StringRef TT, StringRef CPU); -MCObjectWriter *createHexagonELFObjectWriter(raw_ostream &OS, uint8_t OSABI, - StringRef CPU); +MCObjectWriter *createHexagonELFObjectWriter(raw_pwrite_stream &OS, + uint8_t OSABI, StringRef CPU); } // End llvm namespace diff --git a/lib/Target/Hexagon/Makefile b/lib/Target/Hexagon/Makefile index 329c9d3018f0..04b41e5986ac 100644 --- a/lib/Target/Hexagon/Makefile +++ b/lib/Target/Hexagon/Makefile @@ -14,12 +14,12 @@ TARGET = Hexagon BUILT_SOURCES = HexagonGenRegisterInfo.inc \ HexagonGenInstrInfo.inc \ HexagonGenAsmWriter.inc \ - HexagonGenDAGISel.inc HexagonGenSubtargetInfo.inc \
- HexagonGenCallingConv.inc \
- HexagonGenDFAPacketizer.inc \
- HexagonGenMCCodeEmitter.inc \
- HexagonGenDisassemblerTables.inc
-
-DIRS = TargetInfo MCTargetDesc Disassembler
-
-include $(LEVEL)/Makefile.common
+ HexagonGenDAGISel.inc HexagonGenSubtargetInfo.inc \ + HexagonGenCallingConv.inc \ + HexagonGenDFAPacketizer.inc \ + HexagonGenMCCodeEmitter.inc \ + HexagonGenDisassemblerTables.inc + +DIRS = TargetInfo MCTargetDesc Disassembler + +include $(LEVEL)/Makefile.common |